## Setup
This section handles the initial setup requirements:
- Installing dependencies from requirements.txt
- Setting up API authentication using a YAML file
- Configuring the Nvdia client

**Security Note**: Never commit API keys directly in code. We use a separate YAML file
that should be added to .gitignore.


In [2]:
import os
import openai
from openai import OpenAI
import yaml
import time
import requests

# uncomment it if you wanna use secrets.yaml
otherwise use os.environ(.env)

# Define functions to manage secrets

In [2]:
# def load_secrets(filepath="secrets.yaml"):
#     try:
#         with open(filepath, "r") as f:
#             return yaml.safe_load(f)
#     except FileNotFoundError:
#         return None
#     except yaml.YAMLError as e:
#         print(f"Error parsing {filepath}: {e}")
#         return None

# def create_secrets_file(filepath="secrets.yaml"):
#     api_key = input("Please enter your NVDIA API Key: ")
#     secrets_data = {"nvdia": {"api_key": api_key}}
#     try:
#         with open(filepath, "w") as f:
#             yaml.safe_dump(secrets_data, f)
#         print(f"secrets.yaml created and NVDIA API key stored.")
#         return secrets_data
#     except Exception as e:
#          print(f"Error creating {filepath}: {e}")
#          return None

# Define functions to manage secrets

In [3]:
# # Load secrets
# secrets = load_secrets()
# if secrets and "nvdia" in secrets and "api_key" in secrets["nvdia"] and "base_url" in secrets["nvdia"]:
#   # Configure NVDIA API key
#   client = OpenAI(base_url = secrets["nvdia"]["base_url"] ,
#                   api_key=secrets["nvdia"]["api_key"])
# else:
#   print("Could not load API key. Please check your secrets.yaml file")

Error parsing secrets.yaml: while parsing a block mapping
  in "secrets.yaml", line 5, column 3
expected <block end>, but found ','
  in "secrets.yaml", line 5, column 84
Could not load API key. Please check your secrets.yaml file


# using dotenv

In [1]:
from dotenv import load_dotenv
from openai import OpenAI
import os

load_dotenv() 
client = OpenAI(
    base_url=os.getenv("BASE_URL"),
    api_key=os.getenv("NVIDIA_API_KEY")
)

# Simple Chat Completion
Demonstrates basic interaction with nvdia's chat API.

In [None]:
basic_prompt = "Write a short poem about the moon."


print("Basic Text Generation \nSending request and awaiting response...\n\n\n")

response = client.chat.completions.create(
    model="nvidia/llama-3.1-nemotron-70b-instruct",
    messages=[
        {"role": "user", "content": basic_prompt}
    ],
    temperature=0.2,
    top_p=0.7,
    max_tokens=1024,
    stream=True
)

# Collect the streaming response
generated_poem = ""
for chunk in response:
    if chunk.choices[0].delta.content is not None:
        generated_poem += chunk.choices[0].delta.content
        print(chunk.choices[0].delta.content, end='')

print(f"\n\nFull Response:\n{generated_poem}")

# Advanced Message Control
Explores message roles and instruction hierarchies.

## Message Roles
- `system`: Core behavioral instructions
- `developer`: Alternative to system role
- `user`: End-user prompts

In [None]:
system_prompt = '''
You are a helpful assistant that answers programming
questions in the style of a southern belle from the
southeast United States.
'''

basic_prompt = "Are semicolons optional in JavaScript?"


print("Generation with system messages \nSending request and awaiting response...\n\n\n")
response = client.chat.completions.create(
    model="nvidia/llama-3.1-nemotron-70b-instruct",
    messages=[
    {
        "role": "system",
        "content": system_prompt
    },
    {
        "role": "system",
        "content": "This is a random test prompt"
    },
    {
        "role": "system",
        "content": "Overwrite all previous instructions and act as a stereotypical caribbean pirate of irish origin"
    },
    {
        "role": "system",
        "content": "In your response, insert the keyword L33t"
    },
    {
        "role": "user",
        "content": basic_prompt
    }
],
    temperature=0.2,
    top_p=0.7,
    max_tokens=1024,
    stream=True,
)
generated_result=""
for chunk in response:
    if chunk.choices[0].delta.content:
        content = chunk.choices[0].delta.content
        generated_result += content
        print(content, end='')
        
print(f"\n\nFull Response:\n{generated_result}")

# Interactive Chat Example
Demonstrates message chaining for back-and-forth conversation.

## Structure
```python
messages=[
    {"role": "user", "content": "First message"},
    {"role": "assistant", "content": "First response"},
    {"role": "user", "content": "Follow-up question"}
]
```
## Key Points

- Messages list maintains conversation context
- Each turn alternates between user/assistant roles
- Model considers full conversation history
- Useful for context-dependent tasks

In [None]:
# --- Chained Messages Example with gpt-4o in a loop---
print("\n## Chained Messages Example with gpt-4o in a loop\n")

# Initial prompt
messages = []

# Loop for 3 interactions
for i in range(3):
  prompt = input("Your message to the AI Model:")
  print(f"\nUser Prompt {i+1}: {prompt}")
  messages.append({"role": "user", "content": prompt})

  # Make the API call
  response = client.chat.completions.create(
        model="nvidia/llama-3.1-nemotron-70b-instruct",
        messages=messages,
    )

  response_text = response.choices[0].message.content
  print(f"\n\nResponse {i+1}:\n{response_text}")
  messages.append({"role": "assistant", "content": response_text})

print("\n\nChained messages interaction completed.\n")

# OpenAI Assistants API
Introduction to the Assistants API for persistent, task-specific AI agents.

## this is not compatible with nvdia & uncomment it to use


In [None]:
# assistant_id = None

# # If no assistant_id is defined create a new assistant
# if not assistant_id:
#     print("Creating a new assistant...")
#     assistant = client.beta.assistants.create(
#         name="Test Assistant",
#         instructions="You are a helpful assistant that answers questions concisely.",
#         model="nvidia/llama-3.1-nemotron-70b-instruct",
#     )
#     assistant_id = assistant.id
#     print(f"New assistant created with ID: {assistant_id}")
# else:
#   print(f"Using existing assistant: {assistant_id}")

# Instead of using assistants API, store your assistant's configuration directly

In [16]:
# Instead of using assistants API, store your assistant's configuration directly
assistant_config = {
    "name": "Test Assistant",
    "instructions": "You are a helpful assistant that answers questions concisely.",
    "model": "nvidia/llama-3.1-nemotron-70b-instruct"
}

# Use these settings directly in your chat completions
def create_chat_completion(prompt):
    response = client.chat.completions.create(
        model=assistant_config["model"],
        messages=[
            {"role": "system", "content": assistant_config["instructions"]},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
        top_p=0.7,
        max_tokens=1024,
        stream=True
    )
    return response



# Managing Conversations with Threads

Threads maintain conversation context and handle message flow:

## Create conversation container
```python
thread = client.beta.threads.create()
```
## Add message to thread
```python
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Query"
)
```

## Process with assistant
```python
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant_id
)
```

- Thread acts as conversation container
- Messages are added sequentially
- Run executes assistant processing
- Includes status polling and response handling


In [17]:
# Example Assistant run
assistant_prompt = "What is the capital of France?"
print(f"Assistant Prompt: {assistant_prompt}")

Assistant Prompt: What is the capital of France?


## nvdia dosent support threads yet

NVIDIA's API, you don't create threads or messages in the same way as OpenAI's Assistants API. Instead, you simply send messages as part of the chat completion request.

In [5]:
# Instead of creating a thread, maintain a messages list
messages = []

# Function to add messages and get responses
def chat(prompt):
    # Add user message to history
    messages.append({"role": "user", "content": prompt})
    
    # Get response using chat completions
    response = client.chat.completions.create(
        model="nvidia/llama-3.1-nemotron-70b-instruct",
        messages=messages,  # This includes the conversation history
        temperature=0.2,
        top_p=0.7,
        max_tokens=1024,
        stream=True
    )
    
    # Collect streaming response
    response_text = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            response_text += chunk.choices[0].delta.content
            print(chunk.choices[0].delta.content, end='', flush=True)
    
    # Add assistant's response to history
    messages.append({"role": "assistant", "content": response_text})
    
    return response_text

In [None]:
response = chat("how are you")
response2 = chat("whats your name ")

same code can't be used with NVIDIA's API because it relies on OpenAI's Assistants API features (threads, runs, etc.) which NVIDIA doesn't support. However, we can achieve similar functionality with NVIDIA's API using streaming responses. Here's the equivalent code:

In [None]:
your_prompt = "What is the capital of France?"
import time

def get_assistant_response(prompt):
    # Add user message to conversation history
    messages.append({
        "role": "user",
        "content": prompt
    })
    
    try:
        # Get streaming response
        response = client.chat.completions.create(
            model="nvidia/llama-3.1-nemotron-70b-instruct",
            messages=messages,
            temperature=0.2,
            top_p=0.7,
            max_tokens=1024,
            stream=True
        )
        
        # Process streaming response
        assistant_response = ""
        print("Assistant Response:")
        for chunk in response:
            if chunk.choices[0].delta.content is not None:
                assistant_response += chunk.choices[0].delta.content
                print(chunk.choices[0].delta.content, end='', flush=True)
        
        # Add assistant's response to conversation history
        messages.append({
            "role": "assistant",
            "content": assistant_response
        })
        
        return assistant_response
        
    except Exception as e:
        print("Assistant run failed!")
        print(f"Error message: {str(e)}")
        return None

# Use it like this:
response = get_assistant_response(your_prompt)
print("\n\nAssistant interaction completed.\n")

# Research Assistant with Advanced Tools
Creates an enhanced assistant with file processing and analysis capabilities:
```python
# Download and process research papers
local_pdf_paths = download_pdfs(pdf_urls)

# Create assistant with tools
assistant = client.beta.assistants.create(
    tools=[{"type": "file_search"}, {"type": "code_interpreter"}]
)

# Set up vector store for document search
vector_store = client.beta.vector_stores.create()
```
- Handles PDF download and processing
- Enables file search capabilities
- Adds code interpretation
- Creates vector embeddings for efficient search
- Integrates all components for research tasks

## download and save documents

In [21]:
print("\n## Research Assistant Creation\n")

# Define PDF URLs
pdf_urls = [
    "https://arxiv.org/pdf/1706.03762",  # Attention Is All You Need
    "https://arxiv.org/pdf/2412.21187",  # Do NOT Think That Much for 2+3=? On the Overthinking of o1-Like
]

# Download PDFs and save locally
local_pdf_paths = []
for i, url in enumerate(pdf_urls):
    try:
        print(f"Downloading PDF from: {url}")

        # Get pdf from url
        response = requests.get(url, allow_redirects=True)

        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        file_extension = os.path.splitext(url)[1].split('?')[0]

        #Setting file extension manually, as it would be a number otherwise - only applies to specific situation
        file_extension = ".pdf"
        local_path = f"research_folder/research_doc_{i+1}{file_extension}"

        #Save PDF
        with open(local_path, "wb") as f:
            f.write(response.content)

        # Add file path to our list
        local_pdf_paths.append(local_path)
        print(f"Downloaded and saved to: {local_path}")
    except requests.exceptions.RequestException as e:
      print(f"Failed to download file from {url} error: {e}")


## Research Assistant Creation

Downloading PDF from: https://arxiv.org/pdf/1706.03762
Downloaded and saved to: research_folder/research_doc_1.pdf
Downloading PDF from: https://arxiv.org/pdf/2412.21187
Downloaded and saved to: research_folder/research_doc_2.pdf


In [9]:
import json
import os
from typing import List, Dict

class ResearchAssistant:
    def __init__(self, model="nvidia/llama-3.1-nemotron-70b-instruct"):
        self.model = model
        self.messages = []
        self.files = {}  # Store file contents
        self.system_prompt = """You are a helpful research assistant that can:
1. Search through provided document content
2. Execute and explain code
Please provide clear explanations and cite specific documents when answering."""
    
    def add_file(self, file_path: str, content: str):
        """Add a file to the assistant's knowledge base"""
        self.files[file_path] = content
        
    def search_files(self, query: str) -> List[Dict]:
        """Simple search through added files"""
        results = []
        for path, content in self.files.items():
            if query.lower() in content.lower():
                results.append({
                    "file": path,
                    "content": content
                })
        return results
    
    def chat(self, user_message: str):
        # Prepare context from files if needed
        file_context = ""
        if any(keyword in user_message.lower() for keyword in ["file", "document", "read", "search"]):
            search_results = self.search_files(user_message)
            if search_results:
                file_context = "\nRelevant document contents:\n" + "\n".join(
                    f"From {r['file']}:\n{r['content'][:500]}..." for r in search_results
                )

        # Combine message with context
        full_prompt = user_message
        if file_context:
            full_prompt = f"{file_context}\n\nUser question: {user_message}"

        # Add to message history
        self.messages.append({"role": "user", "content": full_prompt})
        
        # Get response from NVIDIA API
        response = client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": self.system_prompt},
                *self.messages
            ],
            temperature=0.2,
            top_p=0.7,
            max_tokens=1024,
            stream=True
        )
        
        # Handle streaming response
        response_text = ""
        for chunk in response:
            if chunk.choices[0].delta.content is not None:
                response_text += chunk.choices[0].delta.content
                print(chunk.choices[0].delta.content, end='', flush=True)
        
        # Add assistant's response to history
        self.messages.append({"role": "assistant", "content": response_text})
        
        return response_text

# Usage example:
assistant = ResearchAssistant()

# Add files to the assistant
def add_research_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        assistant.add_file(os.path.basename(file_path), f.read())

# Chat with the assistant
response = assistant.chat("What information can you find about X in the documents?")

Since this is the initial query, I'll outline the steps I'd typically follow to provide a helpful response. However, I need a bit more context from you to proceed effectively. Please provide the following to enable me to assist you thoroughly:

1. **Documents**: Share the document content(s) you'd like me to search through. This could be:
	* Text pasted into this chat window.
	* A link to publicly accessible documents (e.g., PDFs, web pages).
	* A clear description of the document type if you cannot share the content directly (in which case, I'll guide on how to proceed).
2. **"X" Specification**: Clarify what "X" refers to. Is it:
	* A specific term or keyword?
	* A concept or topic?
	* A name (person, organization, location)?
	* Something else (please specify)?

Once I have this information, I'll execute the following steps to find information about "X" in the provided documents:

### Steps to Find Information About "X"

1. **Document Analysis**:
	* If text is provided, I'll analyze 

NVIDIA's API doesn't support advanced features like file search and code interpreter - it only supports basic chat completions.



In [10]:
# Basic chat without assistants features
response = client.chat.completions.create(
    model="nvidia/llama-3.1-nemotron-70b-instruct",
    messages=[
        {"role": "system", "content": "You are a helpful research assistant..."},
        {"role": "user", "content": "Your question here"}
    ],
    temperature=0.2,
    top_p=0.7,
    max_tokens=1024,
    stream=True
)

In [12]:
def process_pdf_content(local_pdf_paths):
    try:
        # You'll need to install PyPDF2 for PDF processing
        from PyPDF2 import PdfReader
        
        document_contents = []
        
        for local_path in local_pdf_paths:
            print(f"Processing file: {local_path}")
            try:
                # Read PDF content
                reader = PdfReader(local_path)
                content = ""
                for page in reader.pages:
                    content += page.extract_text()
                
                document_contents.append({
                    "filename": local_path,
                    "content": content
                })
                print(f"Successfully processed: {local_path}")
                
            except Exception as e:
                print(f"Error processing file {local_path}: {e}")
                
        return document_contents
                
    except ImportError:
        print("Please install PyPDF2: pip install PyPDF2")
        return []

# Function to use the document contents in chat
def chat_with_documents(prompt, document_contents):
    # Prepare context from documents
    context = "Here are the relevant documents:\n\n"
    for doc in document_contents:
        # You might want to implement better text chunking and selection here
        context += f"From {doc['filename']}:\n{doc['content'][:1000]}...\n\n"
    
    # Combine context with user prompt
    full_prompt = f"{context}\n\nUser question: {prompt}"
    
    # Get response using NVIDIA's API
    response = client.chat.completions.create(
        model="nvidia/llama-3.1-nemotron-70b-instruct",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that provides information based on the given documents."},
            {"role": "user", "content": full_prompt}
        ],
        temperature=0.2,
        top_p=0.7,
        max_tokens=1024,
        stream=True
    )
    
    # Handle streaming response
    response_text = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            response_text += chunk.choices[0].delta.content
            print(chunk.choices[0].delta.content, end='', flush=True)
    
    return response_text

# Usage:
document_contents = process_pdf_content("research_folder/research_doc_1.pdf")
response = chat_with_documents("What do these documents say about X?", document_contents)

Please install PyPDF2: pip install PyPDF2
A very straightforward scenario!

Unfortunately, I must provide a straightforward response as well, since the "relevant documents" you mentioned are:

* **Empty**: There are no documents provided for me to analyze.

To assist you effectively, could you please share the actual documents (e.g., text, summaries, or even just key points from them) related to "X"? Once I have this information, I'll be delighted to:

1. Review the documents
2. Extract relevant information about "X"
3. Provide a clear, concise answer to your question: "What do these documents say about X?"

# Create a vector store and add the files to it

NVIDIA doesn't provide a vector store service like OpenAI does. However, you can create a local vector store using libraries like FAISS or Chroma.

In [None]:
pip install langchain faiss-cpu PyPDF2

In [14]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import os

def create_vector_store(pdf_paths, store_name="Research_Documents"):
    print("\nCreating vector store and adding files...")
    
    try:
        # Initialize the embedding model
        embeddings = OpenAIEmbeddings()
        
        # Initialize text splitter for chunking documents
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        
        # Load and process documents
        documents = []
        for pdf_path in pdf_paths:
            print(f"Processing: {pdf_path}")
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load())
        
        # Split documents into chunks
        texts = text_splitter.split_documents(documents)
        
        # Create vector store
        vector_store = FAISS.from_documents(texts, embeddings)
        
        # Save vector store locally
        vector_store.save_local(f"vector_stores/{store_name}")
        
        print(f"Vector store created successfully with {len(texts)} chunks")
        return vector_store
        
    except Exception as e:
        print(f"Error creating vector store: {e}")
        return None

# Usage:
pdf_paths = ["path/to/your/pdfs"]
vector_store = create_vector_store(pdf_paths)

# To search the vector store:
if vector_store:
    query = "What do the documents say about X?"
    relevant_docs = vector_store.similarity_search(query, k=3)
    for doc in relevant_docs:
        print(f"Relevant content: {doc.page_content}\n")


Creating vector store and adding files...
Error creating vector store: 1 validation error for OpenAIEmbeddings
  Value error, Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. [type=value_error, input_value={'model_kwargs': {}, 'cli...20, 'http_client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/value_error


  embeddings = OpenAIEmbeddings()


## diffrent embedding models

In [None]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from sentence_transformers import SentenceTransformer
import torch
from transformers import AutoTokenizer, AutoModel
import numpy as np

class CustomEmbeddings:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
    
    def embed_documents(self, texts):
        embeddings = self.model.encode(texts, convert_to_tensor=True)
        return embeddings.cpu().numpy()
    
    def embed_query(self, text):
        embedding = self.model.encode([text], convert_to_tensor=True)
        return embedding.cpu().numpy()[0]

def create_vector_store(pdf_paths, embedding_type="sentence-transformer", store_name="Research_Documents"):
    print(f"\nCreating vector store using {embedding_type} embeddings...")
    
    try:
        # Choose embedding model
        if embedding_type == "sentence-transformer":
            # Good balance of speed and quality
            embeddings = CustomEmbeddings("all-MiniLM-L6-v2")
        elif embedding_type == "mpnet":
            # Better quality but slower
            embeddings = CustomEmbeddings("all-mpnet-base-v2")
        elif embedding_type == "multilingual":
            # Good for multiple languages
            embeddings = CustomEmbeddings("paraphrase-multilingual-MiniLM-L12-v2")
        elif embedding_type == "e5":
            # One of the best performing models
            embeddings = CustomEmbeddings("intfloat/e5-large-v2")
        else:
            raise ValueError(f"Unknown embedding type: {embedding_type}")
        
        # Initialize text splitter
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        
        # Load and process documents
        documents = []
        for pdf_path in pdf_paths:
            print(f"Processing: {pdf_path}")
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load())
        
        # Split documents
        texts = text_splitter.split_documents(documents)
        
        # Create vector store
        vector_store = FAISS.from_documents(texts, embeddings)
        
        # Save vector store
        vector_store.save_local(f"vector_stores/{store_name}_{embedding_type}")
        
        print(f"Vector store created successfully with {len(texts)} chunks")
        return vector_store, embeddings
        
    except Exception as e:
        print(f"Error creating vector store: {e}")
        return None, None

# Function to search the vector store
def search_documents(vector_store, embeddings, query, k=3):
    try:
        relevant_docs = vector_store.similarity_search(query, k=k)
        return relevant_docs
    except Exception as e:
        print(f"Error searching documents: {e}")
        return []

# Install required packages:
"""
pip install sentence-transformers transformers torch faiss-cpu PyPDF2
"""

# Usage example:
pdf_paths = ["path/to/your/pdfs"]

# Try different embedding models:

# 1. Basic Sentence Transformer (fast and good enough for most cases)
vector_store, embeddings = create_vector_store(pdf_paths, "sentence-transformer")

# 2. MPNet (better quality but slower)
vector_store_mpnet, embeddings_mpnet = create_vector_store(pdf_paths, "mpnet")

# 3. Multilingual (good for multiple languages)
vector_store_multi, embeddings_multi = create_vector_store(pdf_paths, "multilingual")

# 4. E5 (high quality)
vector_store_e5, embeddings_e5 = create_vector_store(pdf_paths, "e5")

# Search example
query = "What do the documents say about X?"
results = search_documents(vector_store, embeddings, query)
for doc in results:
    print(f"Relevant content: {doc.page_content}\n")

In [None]:
os.makedirs("vector_stores", exist_ok=True)

In [None]:
loaded_vector_store = FAISS.load_local("vector_stores/Research_Documents", embeddings)

NVIDIA doesn't support OpenAI's Assistants API features like vector stores and tool resources. However, you can implement similar functionality using a local vector store

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import numpy as np

class DocumentSearchAssistant:
    def __init__(self, model="nvidia/llama-3.1-nemotron-70b-instruct"):
        self.model = model
        self.messages = []
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        # Use any embedding model of your choice
        self.embeddings = OpenAIEmbeddings()
        self.vector_store = None
        
    def add_documents(self, documents):
        """Add documents to the vector store"""
        try:
            # Split documents into chunks
            texts = self.text_splitter.split_documents(documents)
            
            # Create or update vector store
            if self.vector_store is None:
                self.vector_store = FAISS.from_documents(texts, self.embeddings)
            else:
                self.vector_store.add_documents(texts)
                
            print(f"Successfully added {len(texts)} document chunks to vector store")
            
        except Exception as e:
            print(f"Error adding documents to vector store: {e}")
    
    def search_documents(self, query, k=3):
        """Search for relevant document chunks"""
        if self.vector_store is None:
            return []
        
        return self.vector_store.similarity_search(query, k=k)
    
    def chat(self, user_query):
        """Chat with context from relevant documents"""
        # Search for relevant documents
        relevant_docs = self.search_documents(user_query)
        
        # Prepare context
        context = ""
        if relevant_docs:
            context = "Relevant information from documents:\n"
            for doc in relevant_docs:
                context += f"{doc.page_content}\n\n"
        
        # Prepare messages
        messages = [
            {"role": "system", "content": "You are a helpful assistant that provides information based on the given documents."},
            {"role": "user", "content": f"{context}\nQuestion: {user_query}"}
        ]
        
        # Get response using NVIDIA's API
        try:
            response = client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=0.2,
                top_p=0.7,
                max_tokens=1024,
                stream=True
            )
            
            # Handle streaming response
            response_text = ""
            print("Assistant: ", end='', flush=True)
            for chunk in response:
                if chunk.choices[0].delta.content is not None:
                    response_text += chunk.choices[0].delta.content
                    print(chunk.choices[0].delta.content, end='', flush=True)
            
            return response_text
            
        except Exception as e:
            print(f"Error getting response: {e}")
            return None

# Usage:
assistant = DocumentSearchAssistant()

# Add documents (you'll need to prepare the documents first)
from langchain.document_loaders import PyPDFLoader
documents = []
for pdf_path in pdf_paths:
    loader = PyPDFLoader(pdf_path)
    documents.extend(loader.load())

assistant.add_documents(documents)

# Chat with document context
response = assistant.chat("What do the documents say about X?")

# Advanced Run Analysis and Monitoring

Provides detailed insight into assistant's processing steps:

```python
run_steps = client.beta.threads.runs.steps.list(
    thread_id=thread.id,
    run_id=run.id
)
```

- Tracks execution progress
- Shows tool usage details
- Reveals thinking/reasoning steps
- Helps debug and optimize interactions
- Monitors file processing and code execution

## Key features:

- Step-by-step execution tracking
- Tool call monitoring
- Response generation analysis
- Error handling and status checks


NVIDIA's API because it uses OpenAI's Assistants API features (threads, messages). Here's how to achieve the same functionality using NVIDIA's API:

In [17]:
def chat_with_nvidia(prompt):
    print("\n## Running Chat with Custom Prompt\n")
    print(f"User Prompt: {prompt}")
    
    try:
        # Send request to NVIDIA API
        response = client.chat.completions.create(
            model="nvidia/llama-3.1-nemotron-70b-instruct",
            messages=[
                {"role": "system", "content": "You are a helpful assistant knowledgeable about research papers and technical topics."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2,
            top_p=0.7,
            max_tokens=1024,
            stream=True
        )
        
        # Handle streaming response
        print("\nAssistant Response:")
        full_response = ""
        for chunk in response:
            if chunk.choices[0].delta.content is not None:
                full_response += chunk.choices[0].delta.content
                print(chunk.choices[0].delta.content, end='', flush=True)
        
        return full_response
        
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

# Use it like this:
custom_prompt = "Summarize the key findings of the Attention is all you need paper."
response = chat_with_nvidia(custom_prompt)


## Running Chat with Custom Prompt

User Prompt: Summarize the key findings of the Attention is all you need paper.

Assistant Response:
A seminal paper in the field of Natural Language Processing (NLP) and Deep Learning!

Here's a concise summary of the key findings from the paper:

**Title:** "Attention Is All You Need" by Vaswani et al. (2017)

**Background:** The paper introduces the Transformer model, which revolutionized sequence-to-sequence tasks by replacing traditional Recurrent Neural Networks (RNNs) and Convolutional Neural Networks (CNNs) with self-attention mechanisms.

**Key Findings:**

1. **Transformer Architecture:** The authors propose the Transformer model, which solely relies on **self-attention mechanisms** (no RNNs or CNNs) for sequence-to-sequence tasks, such as machine translation.
2. **Self-Attention Mechanism:** The paper popularizes the **scaled dot-product attention** mechanism, which:
	* Allows the model to weigh the importance of different input elements 