In [1]:
import os
os.environ['USER_AGENT'] = 'chrome'

import re
import json
import pandas as pd

from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

from ragas import evaluate


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datetime import datetime
timestamp = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")

In [3]:
from datasets import Dataset

In [None]:
# List of URLs to load documents from
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]


In [5]:
#Step 1: Initialize ChromaDB and check for existing embeddings
persist_directory = "./chroma_db"

# Initialize the Ollama Embeddings model
ollama_embeddings = OllamaEmbeddings(model="all-minilm", show_progress=True)

# Set up ChromaDB with LangChain ChromaVectorStore
chroma_store = Chroma(
    persist_directory=persist_directory,
    embedding_function=ollama_embeddings
)

# Step 2: Check if there are already embeddings stored in ChromaDB
existing_documents = chroma_store._collection.count()

if existing_documents == 0:
    print("ChromaDB is empty, loading documents and generating embeddings...")

    # Load documents from the URLs
    docs = [WebBaseLoader(url).load() for url in urls]
    docs_list = [item for sublist in docs for item in sublist]

    # Initialize a text splitter with specified chunk size and overlap
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=350, chunk_overlap=75
    )

    # Split the documents into chunks
    doc_splits = text_splitter.split_documents(docs_list)

    print(f"Number of document chunks: {len(doc_splits)}")

    # Add documents and embeddings to ChromaDB collection
    chroma_store.add_documents(doc_splits)
else:
    print(f"ChromaDB already populated with {existing_documents} documents, skipping embedding generation.")

  ollama_embeddings = OllamaEmbeddings(model="all-minilm", show_progress=True)


ChromaDB already populated with 30 documents, skipping embedding generation.


In [None]:
K=4
retriever = chroma_store.as_retriever(k=K)

In [None]:
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following documents to answer the question.
    Keep your response strictly limited to the information provided
    in the documents. Do not add any additional or related information
    to the question outside of what is in the documents. Limit your 
    response to 3 to 6 lines maximum.

    Question: {question}
    Documents: {documents}
    Answer:
    """,
    input_variables=["question", "documents"],
)

In [None]:
# Use llama3.2:3b for response generation
llm = ChatOllama(
    model="llama3.2:3b",
    temperature=0,
    seed=42
)

# Create a chain combining the prompt template and LLM
rag_chain = prompt | llm | StrOutputParser()

# Define the RAG application class
class RAGApplication:
    def __init__(self, retriever, rag_chain):
        self.retriever = retriever
        self.rag_chain = rag_chain
    def run(self, question):
        # Retrieve relevant documents
        documents = self.retriever.invoke(question)
        # Extract content from retrieved documents
        doc_texts = "\\n".join([doc.page_content for doc in documents])
        # Get the answer from the language model
        answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
        return answer

In [9]:
# Initialize the RAG application
rag_application = RAGApplication(retriever, rag_chain)
# Example usage
question = "What is tree of thoughts?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

OllamaEmbeddings: 100%|██████████| 1/1 [00:03<00:00,  3.07s/it]


Question: What is tree of thoughts?
Answer: The "Chain of Thought" is a method to repeatedly prompt the model to ask following-up questions to construct the thought process iteratively, as described in Self-Ask (Press et al. 2022).


In [10]:
# Load your test set (assuming 'test_set_prompt_eng.csv' is the file uploaded)
test_set = pd.read_csv('../data/test_set_prompt_eng.csv')

In [None]:
class RAGApplication:
    def __init__(self, retriever, rag_chain):
        self.retriever = retriever
        self.rag_chain = rag_chain
        self.retrieved_context = None

    def retrieve(self, question):
        # Retrieve relevant documents and extract content
        documents = self.retriever.invoke(question)
        doc_texts = "\\n".join([doc.page_content for doc in documents])
        self.retrieved_context = doc_texts
        return
    
    def run(self, question):
        self.retrieve(question)
        answer = self.rag_chain.invoke({"question": question, "documents": self.retrieved_context})
        return answer

In [14]:
rag_application=RAGApplication(retriever, rag_chain)

In [16]:
test_set.head()

Unnamed: 0,Query,Answer
0,What is prompt engineering?,"Prompt Engineering, also known as In-Context P..."
1,What are the basic approaches for prompting a ...,Zero-shot and few-shot learning are two of the...
2,What are the issues in few-shot learning that ...,(1) Majority label bias exists if the distribu...
3,What is Chain-of-Thought (CoT) prompting?,Chain-of-thought (CoT) prompting generates a s...
4,What are the types of Chain-of-Thought prompts?,Two main types of CoT prompting:\n\nFew-shot C...


In [17]:
answers = []
contexts = []
for query in test_set['Query']:
  answers.append(rag_application.run(query))
  contexts.append(rag_application.retrieved_context)

OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.33s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.32s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.31s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.21s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.08s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.07s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.09s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.11s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.06s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.16s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.06s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.11s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.09s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.14s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.13s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [

In [None]:
data = {
    "question": test_set['Query'],
    "answer": answers,
    "retrieved_context": contexts,
    "reference": test_set['Answer']
}
  
dataset = pd.DataFrame(data)

In [19]:
dataset.head()

Unnamed: 0,question,answer,retrieved_context,reference
0,What is prompt engineering?,Prompt engineering is the process of designing...,OpenAI Cookbook has many in-depth examples for...,"Prompt Engineering, also known as In-Context P..."
1,What are the basic approaches for prompting a ...,The basic approaches for prompting a language ...,OpenAI Cookbook has many in-depth examples for...,Zero-shot and few-shot learning are two of the...
2,What are the issues in few-shot learning that ...,The issues in few-shot learning that lead to p...,Zero-Shot#\nZero-shot learning is to simply fe...,(1) Majority label bias exists if the distribu...
3,What is Chain-of-Thought (CoT) prompting?,Chain-of-Thought (CoT) prompting is a techniqu...,Definition: Determine which category the quest...,Chain-of-thought (CoT) prompting generates a s...
4,What are the types of Chain-of-Thought prompts?,"The question asks for the category ""Quantity"" ...",References#\n[1] Zhao et al. “Calibrate Before...,Two main types of CoT prompting:\n\nFew-shot C...


In [20]:
dataset.to_csv(f'eval_set_{timestamp}.csv')

In [21]:
dataset.head()

Unnamed: 0,question,answer,retrieved_context,reference
0,What is prompt engineering?,Prompt engineering is the process of designing...,OpenAI Cookbook has many in-depth examples for...,"Prompt Engineering, also known as In-Context P..."
1,What are the basic approaches for prompting a ...,The basic approaches for prompting a language ...,OpenAI Cookbook has many in-depth examples for...,Zero-shot and few-shot learning are two of the...
2,What are the issues in few-shot learning that ...,The issues in few-shot learning that lead to p...,Zero-Shot#\nZero-shot learning is to simply fe...,(1) Majority label bias exists if the distribu...
3,What is Chain-of-Thought (CoT) prompting?,Chain-of-Thought (CoT) prompting is a techniqu...,Definition: Determine which category the quest...,Chain-of-thought (CoT) prompting generates a s...
4,What are the types of Chain-of-Thought prompts?,"The question asks for the category ""Quantity"" ...",References#\n[1] Zhao et al. “Calibrate Before...,Two main types of CoT prompting:\n\nFew-shot C...
