In [1]:
import os
from langchain_community.retrievers import WikipediaRetriever
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
import torch
import random
import numpy as np
import pandas as pd
import ast
from datasets import load_dataset
import random
from evaluation import EvaluationMetrics
import time

os.environ["OPENAI_API_KEY"] = "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"




In [2]:
# This is for normal RAG pipeline
'''

from langchain_openai import OpenAIEmbeddings
from scipy.spatial.distance import cosine

class CustomWikipediaRetriever:
    def __init__(self, k=3, top_m=10, temperature=1.0, std_error=0.0, embedding_model=None):
        self.retriever = WikipediaRetriever(top_k_results=top_m)
        self.embedding_model = embedding_model or OpenAIEmbeddings()
        self.k = k  # Number of documents to sample
        self.temperature = temperature  # Temperature scaling for softmax
        self.std_error = std_error

    def softmax(self, similarities, temperature):
        """Apply softmax with temperature to convert similarities into probabilities."""
        e_similarities = np.exp(np.array(similarities) / temperature)
        return e_similarities / np.sum(e_similarities)

    def add_noise_to_probabilities(self, probabilities):
        """Add normal distributed noise to the probabilities."""
        noise = np.random.normal(0, self.std_error, len(probabilities))
        noisy_probabilities = probabilities + noise
        
        # Ensure all probabilities are non-negative
        noisy_probabilities = np.maximum(noisy_probabilities, 0)
        
        # Normalize the probabilities so they sum to 1
        return noisy_probabilities / np.sum(noisy_probabilities)

    def retrieve_with_similarity(self, query):
        # Get top-m documents from the retriever
        docs = self.retriever.get_relevant_documents(query)
        
        # Calculate the similarity scores
        query_embedding = self.embedding_model.embed_query(query)
        doc_embeddings = [self.embedding_model.embed_documents([doc.page_content])[0] for doc in docs]
        
        # Calculate cosine similarity scores
        similarities = [1 - cosine(query_embedding, doc_emb) for doc_emb in doc_embeddings]
        
        # Convert similarities to probabilities
        probabilities = self.softmax(similarities, self.temperature)

        noisy_probabilities = self.add_noise_to_probabilities(probabilities)

        # Randomly sample k documents from the top-m using probabilities
        sampled_indices = np.random.choice(len(docs), size=self.k, replace=False, p=noisy_probabilities)
        results = [docs[i] for i in sampled_indices]

        return results


class QAChain:
    def __init__(self, k: int = 3, top_m: int = 10, temperature=1.0, std_error=0.0, embedding_model=None):
        """
        Initialize the QAChain class with the desired number of retrieved documents (k).

        Parameters:
        - k: Number of documents to retrieve from Wikipedia for context (default is 3).
        - embedding_model: The embedding model to use (default is OpenAIEmbeddings).
        """
        self.retriever = CustomWikipediaRetriever(k=k, top_m=top_m, temperature=temperature, std_error=std_error, embedding_model=embedding_model)

        # Define the prompt template
        self.prompt = ChatPromptTemplate.from_template(
            """Answer the question based only on the context provided as short as possible.

            Context: {context}

            Question: {question}"""
        )
        
        self.llm = ChatOpenAI(model="gpt-3.5-turbo")

        # Create the full chain
        self.chain = (
            {"context": self.retrieve_docs,  # Fixed method reference
             "question": RunnablePassthrough()}
            | self.prompt
            | self.llm
            | StrOutputParser()
        )

    def retrieve_docs(self, query):
        """
        Retrieve and format documents from Wikipedia for the chain.
        """
        docs = self.retriever.retrieve_with_similarity(query)
        return "\n\n".join(doc.page_content for doc in docs)
    
    def answer(self, question: str):
        """
        Answer a given question based on the context retrieved from Wikipedia.

        Parameters:
        - question: The question to ask.
        """
        return self.chain.invoke(question)


# Instantiate the QAChain without explicitly passing an embedding model
qa_chain = QAChain(k=3)

# Retrieve documents with similarity scores and provide the answer
query = "What is the capital of France?"
answer = qa_chain.answer(query)

# Print the answer
print(answer)
'''


'\n\nfrom langchain_openai import OpenAIEmbeddings\nfrom scipy.spatial.distance import cosine\n\nclass CustomWikipediaRetriever:\n    def __init__(self, k=3, top_m=10, temperature=1.0, std_error=0.0, embedding_model=None):\n        self.retriever = WikipediaRetriever(top_k_results=top_m)\n        self.embedding_model = embedding_model or OpenAIEmbeddings()\n        self.k = k  # Number of documents to sample\n        self.temperature = temperature  # Temperature scaling for softmax\n        self.std_error = std_error\n\n    def softmax(self, similarities, temperature):\n        """Apply softmax with temperature to convert similarities into probabilities."""\n        e_similarities = np.exp(np.array(similarities) / temperature)\n        return e_similarities / np.sum(e_similarities)\n\n    def add_noise_to_probabilities(self, probabilities):\n        """Add normal distributed noise to the probabilities."""\n        noise = np.random.normal(0, self.std_error, len(probabilities))\n       

In [3]:
# This is for evaluation only


from langchain_openai import OpenAIEmbeddings
from scipy.spatial.distance import cosine

class CustomWikipediaRetriever:
    def __init__(self, max_m=10, embedding_model=None):
        self.retriever = WikipediaRetriever(top_k_results=max_m)
        self.embedding_model = embedding_model or OpenAIEmbeddings()
        self.docs = []
        self.similarities = []

    def softmax(self, temperature, similarities):
        """Apply softmax with temperature to convert similarities into probabilities."""
        e_similarities = np.exp(np.array(similarities) / temperature)
        return e_similarities / np.sum(e_similarities)

    def add_noise_to_probabilities(self, probabilities, std_error):
        """Add normal distributed noise to the probabilities."""
        noise = np.random.normal(0, std_error, len(probabilities))
        noisy_probabilities = probabilities + noise
        
        # Ensure all probabilities are non-negative
        noisy_probabilities = np.maximum(noisy_probabilities, 0)
        
        # Normalize the probabilities so they sum to 1
        return noisy_probabilities / np.sum(noisy_probabilities)

    def retrieve_with_similarity(self, query):
        # Get top-m documents from the retriever
        docs = self.retriever.get_relevant_documents(query)
        
        # Calculate the similarity scores
        query_embedding = self.embedding_model.embed_query(query)
        doc_embeddings = [self.embedding_model.embed_documents([doc.page_content])[0] for doc in docs]
        
        # Calculate cosine similarity scores
        similarities = [1 - cosine(query_embedding, doc_emb) for doc_emb in doc_embeddings]
        
        sorted_indices = np.argsort(similarities)[::-1]
        sorted_docs = [docs[i] for i in sorted_indices]
        sorted_similarities = np.array(similarities)[sorted_indices]
        
        self.docs = sorted_docs
        self.similarities = sorted_similarities

    def retrieve(self, temperature=1.0, std_error=0.0, k=3, top_m=10):
        # Convert similarities to probabilities

        similarities = self.similarities[:min(len(self.docs),top_m)]
        
        probabilities = self.softmax(temperature, similarities)
        
        noisy_probabilities = self.add_noise_to_probabilities(probabilities, std_error)

        # Randomly sample k documents from the top-m using probabilities
        sampled_indices = np.random.choice(min(len(self.docs),top_m), size=k, replace=False, p=noisy_probabilities)
        results = [self.docs[i] for i in sampled_indices]

        return results


class QAChain:
    def __init__(self, docs):
        """
        Initialize the QAChain class with the desired number of retrieved documents (k).

        Parameters:
        - k: Number of documents to retrieve from Wikipedia for context (default is 3).
        - embedding_model: The embedding model to use (default is OpenAIEmbeddings).
        """
        
        self.docs = docs

        # Define the prompt template
        self.prompt = ChatPromptTemplate.from_template(
            """Answer the question based only on the context provided as short as possible.

            Context: {context}

            Question: {question}"""
        )
        
        self.llm = ChatOpenAI(model="gpt-3.5-turbo")

        # Create the full chain
        self.chain = (
            {"context": self.retrieve_docs,  # Fixed method reference
             "question": RunnablePassthrough()}
            | self.prompt
            | self.llm
            | StrOutputParser()
        )

    def retrieve_docs(self, query):
        """
        Retrieve and format documents from Wikipedia for the chain.
        """
        
        return "\n\n".join(doc.page_content for doc in self.docs)
    
    def answer(self, question: str):
        """
        Answer a given question based on the context retrieved from Wikipedia.

        Parameters:
        - question: The question to ask.
        """
        return self.chain.invoke(question)


retriever = CustomWikipediaRetriever()
query = "who is the founder of quantum physics?"
retriever.retrieve_with_similarity(query)

for i in range(3):
    docs = retriever.retrieve()
    print(docs[0].page_content[:100])
    qa_chain = QAChain(docs=docs)
    answer = qa_chain.answer(query)
    print(answer)



  docs = self.retriever.get_relevant_documents(query)


Nikolay Storonsky (born 21 July 1984) is a Russian-born British entrepreneur. He is best known as th
Pyotr Kapitsa
Fotini G. Markopoulou-Kalamara (Greek: Φωτεινή Μαρκοπούλου-Καλαμαρά; born April 3, 1971) is a Greek 
Yakov Frenkel and Matvei Petrovich Bronstein are notable founders of quantum physics.
Guillaume Verdon-Akzam, also known as Guillaume Verdon, or Gill Verdon is a Canadian mathematical ph
The founder of quantum physics is not mentioned in the provided context.


In [4]:
m_test = [5, 8, 12, 16, 20, 25]
k_test = [1, 2, 3, 4, 5, 6]
t_test = [0.1, 0.5, 1.0, 2.0, 10.0]
e_test = [0.05, 0.1, 0.15, 0.2]
num_sample = 500
num_times = 3
rng = 42


In [5]:
eval = EvaluationMetrics()


In [6]:
from get_dataset import get_nq, get_tqa, get_squad, get_asqa

nq = get_nq()
tqa = get_tqa()
squad = get_squad()
asqa = get_asqa()


In [7]:
name_to_ds = {"NQ": nq, "TriviaQA": tqa, "SQuAD": squad, "ASQA": asqa}

def evaluate(name):
    ds = name_to_ds[name]
    os.makedirs("top-m_results/vary_m", exist_ok=True)
    os.makedirs("top-m_results/vary_k", exist_ok=True)
    os.makedirs("top-m_results/temperature", exist_ok=True)
    os.makedirs("top-m_results/std_error", exist_ok=True)

    
    random.seed(rng)
    selected_idx = random.sample(range(ds.shape[0]), num_sample)

    max_m = max(m_test)

    references = ds.loc[selected_idx, "answer"].values.tolist()
    
    m_candidates = [[] for _ in range(len(m_test))]
    m_results = []
    k_candidates = [[] for _ in range(len(k_test))]
    k_results = []
    t_candidates = [[] for _ in range(len(t_test))]
    t_results = []
    e_candidates = [[] for _ in range(len(e_test))]
    e_results = []
    
    for i in selected_idx:
        q = ds.loc[i, "question"]
        a = ds.loc[i, "answer"]
        retriever = CustomWikipediaRetriever(max_m=max_m)
        retriever.retrieve_with_similarity(q)
        
        for j in range(len(m_test)):
            top_m = m_test[j]
            c = []
            for _ in range(num_times):
                docs = retriever.retrieve(top_m=top_m)
                qa_chain = QAChain(docs=docs)
                answer = qa_chain.answer(q)
                c.append(answer)
                time.sleep(0.03)
                m_results.append({"idx in original dataset":i, "question":q , "top-m": top_m, "num_times":_, "retrieved docs": docs, "generated answer": answer, "standard answer": a})
            m_candidates[j].append(c)
            
        for j in range(len(k_test)):
            k = k_test[j]
            c = []
            for _ in range(num_times):
                docs = retriever.retrieve(k=k)
                qa_chain = QAChain(docs=docs)
                answer = qa_chain.answer(q)
                c.append(answer)
                time.sleep(0.03)
                k_results.append({"idx in original dataset":i, "question":q , "k": k, "num_times":_, "retrieved docs": docs, "generated answer": answer, "standard answer": a})
            k_candidates[j].append(c)
            
        for j in range(len(t_test)):
            t = t_test[j]
            c = []
            for _ in range(num_times):
                docs = retriever.retrieve(temperature=t)
                qa_chain = QAChain(docs=docs)
                answer = qa_chain.answer(q)
                c.append(answer)
                time.sleep(0.03)
                t_results.append({"idx in original dataset":i, "question":q , "temperature": t, "num_times":_, "retrieved docs": docs, "generated answer": answer, "standard answer": a})
            t_candidates[j].append(c)
            
        for j in range(len(e_test)):
            e = e_test[j]
            c = []
            for _ in range(num_times):
                docs = retriever.retrieve(std_error=e)
                qa_chain = QAChain(docs=docs)
                answer = qa_chain.answer(q)
                c.append(answer)
                time.sleep(0.03)
                e_results.append({"idx in original dataset":i, "question":q , "standard error": e, "num_times":_, "retrieved docs": docs, "generated answer": answer, "standard answer": a})
            e_candidates[j].append(c)

    m_results = pd.DataFrame(m_results)
    m_results.to_csv(f'top-m_results/vary_m/{name}.csv')
    
    k_results = pd.DataFrame(k_results)
    k_results.to_csv(f'top-m_results/vary_k/{name}.csv')
    
    t_results = pd.DataFrame(t_results)
    t_results.to_csv(f'top-m_results/temperature/{name}.csv')
    
    e_results = pd.DataFrame(e_results)
    e_results.to_csv(f'top-m_results/std_error/{name}.csv')
    
    
    m_rougeL, m_diversity = eval.cal_scores(m_candidates, references)
    with open(f'top-m_results/vary_m/{name}.txt', 'w') as file:
        sys.stdout = file
        print("The top-m docs m are:", m_test)
        print("rougeL score for different m is:", m_rougeL)
        print("diversity score for different m is:", m_diversity)
    sys.stdout = sys.__stdout__

    
    k_rougeL, k_diversity = eval.cal_scores(k_candidates, references) 
    with open(f'top-m_results/vary_k/{name}.txt', 'w') as file:
        sys.stdout = file
        print("The sampled k docs k are:", k_test)
        print("rougeL score for different k is:", k_rougeL)
        print("diversity score for different k is:", k_diversity)
    sys.stdout = sys.__stdout__
    
    
    t_rougeL, t_diversity = eval.cal_scores(t_candidates, references)
    with open(f'top-m_results/temperature/{name}.txt', 'w') as file:
        sys.stdout = file
        print("The temperature t are:", t_test)
        print("rougeL score for different t is:", t_rougeL)
        print("diversity score for different t is:", t_diversity)
    sys.stdout = sys.__stdout__

    
    e_rougeL, e_diversity = eval.cal_scores(e_candidates, references)
    with open(f'top-m_results/std_error/{name}.txt', 'w') as file:
        sys.stdout = file
        print("The standard error e are:", e_test)
        print("rougeL score for different e is:", e_rougeL)
        print("diversity score for different e is:", e_diversity)
    sys.stdout = sys.__stdout__
        
    

    

In [None]:
datasets = ["NQ", "TriviaQA", "SQuAD", "ASQA"]

for ds in datasets:
    evaluate(ds)