In [12]:
import ollama
import PyPDF2
import numpy as np

ollama.list()

{'models': [{'name': 'nomic-embed-text:latest',
   'model': 'nomic-embed-text:latest',
   'modified_at': '2024-11-16T13:50:03.4460443+01:00',
   'size': 274302450,
   'digest': '0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'nomic-bert',
    'families': ['nomic-bert'],
    'parameter_size': '137M',
    'quantization_level': 'F16'}},
  {'name': 'llama3.1:8b-instruct-q4_0',
   'model': 'llama3.1:8b-instruct-q4_0',
   'modified_at': '2024-11-16T13:48:48.5783556+01:00',
   'size': 4661230766,
   'digest': '42182419e9508c30c4b1fe55015f06b65f4ca4b9e28a744be55008d21998a093',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'llama',
    'families': ['llama'],
    'parameter_size': '8.0B',
    'quantization_level': 'Q4_0'}}]}

In [13]:
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file_path: str):
    text = ""
    with open(pdf_file_path, 'rb') as pdf_file:
        reader = PyPDF2.PdfReader(pdf_file)
        for page in reader.pages:
            text += page.extract_text()
    return text

In [14]:
# Function to chunk the extracted text
def chunk_text(text: str, chunk_size=500):
    # Split text into chunks of specified size
    words = text.split()
    chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks

In [15]:
# Function to calculate cosine similarity between two vectors
def cosine_similarity(vec1, vec2):
    # Compute dot product
    dot_product = np.dot(vec1, vec2)
    
    # Compute magnitude of the vectors
    magnitude_vec1 = np.sqrt(np.dot(vec1, vec1))
    magnitude_vec2 = np.sqrt(np.dot(vec2, vec2))
    
    # Avoid division by zero in case of zero vectors
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0.0
    
    # Compute cosine similarity
    cosine_similarity = dot_product / (magnitude_vec1 * magnitude_vec2)
    return cosine_similarity

In [16]:
# Function to calculate cosine similarity and find the top N matching chunks
def find_top_n_chunks(question_embedding, chunk_embeddings, top_n=3):
    # Calculate cosine similarities manually
    similarities = [cosine_similarity(question_embedding, chunk_embedding) for chunk_embedding in chunk_embeddings]
    
    # Get the indices of the top N chunks with the highest cosine similarities
    top_n_indices = np.argsort(similarities)[-top_n:][::-1]
    
    # Return the indices and corresponding similarity scores
    top_n_chunks = [(idx, similarities[idx]) for idx in top_n_indices]
    return top_n_chunks


In [17]:
# Main function to handle RAG
def rag_ollama(pdf_file_path: str, questions: list, chunk_size: int, top_n: int,  model_name: str="nomic-embed-text:latest"):
    # Step 1: Extract text from the PDF
    pdf_text = extract_text_from_pdf(pdf_file_path)
    
    # Step 2: Chunk the PDF text
    pdf_chunks = chunk_text(pdf_text, chunk_size)
    
    # Step 3: Embed the PDF chunks
    chunk_embeddings = ollama.embed(model=model_name, input=pdf_chunks)["embeddings"]
    
    # Step 4: Embed the questions
    question_embeddings = ollama.embed(model=model_name, input=questions)["embeddings"]
    
    # Step 5: Find the top N matching chunks for each question
    results = []
    for i, question_embedding in enumerate(question_embeddings):
        top_n_chunks = find_top_n_chunks(question_embedding, chunk_embeddings, top_n)
        results.append({
            'question': questions[i],
            'top_chunks': [(pdf_chunks[idx], similarity) for idx, similarity in top_n_chunks]
        })
    
    return results

In [18]:
def convert_to_llm_conversation(question: str, top_chunks: list[str]):
    messages = []
    chunks = "\n\n".join([x[0] for x in top_chunks])
    content = f"""You will always answer based on these chunks. If the user ask for a document, these chunks are the document:\n{chunks}\n\n{question}"""
    messages.append({"role": "user", "content": content})
    
    return messages

In [19]:
def generate(question: str, top_chunks: list[str], model: str = "llama3.1:8b-instruct-q4_0"):
    messages = convert_to_llm_conversation(question, top_chunks)
    
    return ollama.chat(model=model, messages=messages)

In [20]:
# Example usage:
filename = "./media/AgentQ.pdf"
questions = [
    "What is the main topic discussed in the document?",
    "Can you explain the methodology used?",
    "Large Language Models (LLMs) have shown remarkable capabilities in natural language tasks requiring complex reasoning, yet their application in agentic, multi-step reasoning within interactive environments remains a difficult challenge. "
]

# Get the answers
answers = rag_ollama(filename, questions, chunk_size=1000, top_n=3)

for answer in answers:
    print(f"Question: {answer['question']}")
    for idx, (chunk, similarity) in enumerate(answer['top_chunks'], 1):
        print(f"Top {idx} matching chunk (Score: {similarity:.4f}):\n{chunk}\n")
    
    llm_response = generate(answer['question'], answer['top_chunks'])
    print(llm_response["message"]["content"])
    print("-"*50)

Question: What is the main topic discussed in the document?
Top 1 matching chunk (Score: 0.4559):
suitable. Recent works such as Gandhi et al. (2024); Lehnert et al. (2024) have even suggested directly learning to optimally search and explore in reasoning tasks using meta-reinforcement learning. We believe this is a promising research direction for autonomous agents, which we will pursue in further work. Discrepancy between zero-shot vs search results. Similar to some recent works that focus on code and reasoning, we observe significant gap between zero-shot agent performance and performance of the agent equipped with search capabilities Brown et al. (2024); Snell et al. (2024). Investigating these trade-offs at scale and the potential effect of different search/optimization approaches. Online safety and interaction. The design of agent Q allows for largely autonomous exploration, self-evaluation and improvement with limited human intervention. However, the agent might make a significa

In [21]:
from pymongo import MongoClient

In [60]:
username = "mongoadmin"  
password = "super-save-password"  
host = "localhost"          
port = 27017                
database_name = "vector_store"  

# Construct the MongoDB URI with authentication
connection_string = f"mongodb://{username}:{password}@{host}:{port}"

# Connect to MongoDB
client = MongoClient(connection_string, connect=True)
database = client.get_database(database_name)
collection = database.get_collection("lecture_embeddings")

In [61]:
import pymongo.collection

def insert_embedding(embedding: list[float], text: str, collection: pymongo.collection.Collection):

    # Document to insert
    document = {
        "vector": embedding,  # The vector field
        "magnitude": sum(x**2 for x in embedding)**0.5,  # Optional: precompute the vector's magnitude
        "text": text
    }

    # Insert the document
    result = collection.insert_one(document)

    # Print the result
    print(f"Inserted document with ID: {result.inserted_id}")

In [97]:
filename = "./media/AgentQ.pdf"
chunk_size = 1000
model_name: str="nomic-embed-text:latest"

pdf_text = extract_text_from_pdf(filename)
    
    # Step 2: Chunk the PDF text
pdf_chunks = chunk_text(pdf_text, chunk_size)

# Step 3: Embed the PDF chunks
chunk_embeddings = ollama.embed(model=model_name, input=pdf_chunks)["embeddings"]

for idx, chunk_embedding in enumerate(chunk_embeddings):
    insert_embedding(chunk_embedding, pdf_chunks[idx], collection)

Inserted document with ID: 6738af3bcfefad7a72de04ec
Inserted document with ID: 6738af3bcfefad7a72de04ed
Inserted document with ID: 6738af3bcfefad7a72de04ee
Inserted document with ID: 6738af3bcfefad7a72de04ef
Inserted document with ID: 6738af3bcfefad7a72de04f0
Inserted document with ID: 6738af3ccfefad7a72de04f1
Inserted document with ID: 6738af3ccfefad7a72de04f2
Inserted document with ID: 6738af3ccfefad7a72de04f3
Inserted document with ID: 6738af3ccfefad7a72de04f4
Inserted document with ID: 6738af3ccfefad7a72de04f5
Inserted document with ID: 6738af3ccfefad7a72de04f6


In [None]:
def fetch_chunks_from_mongo(question: str, collection: pymongo.collection.Collection, model_name: str = "nomic-embed-text:latest", k_entries: int = 5):
    question_embedding = ollama.embed(model=model_name, input=question)["embeddings"][0]
    query_magnitude = np.sqrt(np.sum(np.square(question_embedding)))
    
    pipeline =  [
        {
            # Compute the dot product
            "$addFields": {
                "dot_product": {
                    "$sum": {
                        "$map": {
                            "input": {"$range": [0, {"$size": "$vector"}]},  # Iterate over indices
                            "as": "index",
                            "in": {
                                "$multiply": [
                                    {"$arrayElemAt": ["$vector", "$$index"]},
                                    {"$arrayElemAt": [question_embedding, "$$index"]}
                                ]
                            }
                        }
                    }
                }
            }
        },
        {
            # Compute magnitude of stored vectors dynamically if not precomputed
            "$addFields": {
                "vector_magnitude": {
                    "$sqrt": {
                        "$sum": {
                            "$map": {
                                "input": "$vector",
                                "as": "x",
                                "in": {"$multiply": ["$$x", "$$x"]}
                            }
                        }
                    }
                }
            }
        },
        {
            # Compute cosine similarity
            "$addFields": {
                "cosine_similarity": {
                    "$cond": {
                        "if": {"$and": [{"$gt": ["$vector_magnitude", 0]}, {"$gt": [query_magnitude, 0]}]},
                        "then": {
                            "$divide": [
                                "$dot_product",
                                {"$multiply": ["$vector_magnitude", query_magnitude]}
                            ]
                        },
                        "else": 0
                    }
                }
            }
        },
        {
            # Sort by cosine similarity in descending order
            "$sort": {"cosine_similarity": -1}
        },
        {
            # Limit the number of results
            "$limit": k_entries
        },
        {
            # Project the fields you want in the output
            "$project": {"_id": 1, "cosine_similarity": 1, "text": 1}
        }
    ]
    
    results = list(collection.aggregate(pipeline))

    return results

In [98]:
question = "Who wrote the paper 'attention is all you need'?"

chunks = fetch_chunks_from_mongo(question, collection)
prepped_chunks = [(x["text"], x["cosine_similarity"]) for x in chunks]

In [99]:

print("From DB Pipeline")
for chunk, score in prepped_chunks:
    print(chunk[:10], f"score: {score}")
print("-"*50)
print("\nFrom Script")
for answer in rag_ollama(filename, [question], chunk_size=1000, top_n=5):
    for idx, (chunk, score) in enumerate(answer['top_chunks'], 1):
        print(chunk[:10], f"score: {score}")

From DB Pipeline
is auto-re score: 0.5308237258832028
for LSTM n score: 0.5285202409229166
and observ score: 0.497049528147839
the layers score: 0.47391614027930395
English-Fr score: 0.4373282603842479
--------------------------------------------------

From Script
Zhou, and  score: 0.4321475263448718
Agent Q: A score: 0.41545025954904324
suitable.  score: 0.41537031717508704
successful score: 0.40572579791169966
tree from  score: 0.3973562382018639


In [94]:
question = "Who wrote the paper 'attention is all you need'?"

llm_response = generate(question, [(x["text"], x["cosine_similarity"]) for x in fetch_chunks_from_mongo(question, collection)])
print(llm_response["message"]["content"])

The authors of the paper "Attention Is All You Need" are:

* Vaswani, Ashish
* Shazeer, Noam
* Parmar, Niki
* Uszkoreit, Jakob
* Jones, Llion
* Gomez, Ajay
* Kaiser, Luke
* Polosukhin, Ilya

This paper was published in 2017 and introduced the Transformer architecture, which is the core of the model described in the text you provided.
