In [1]:
import os
import openai
from getpass import getpass

openai.api_key = getpass("Please provide your OpenAI Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

In [31]:
from langchain_community.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader(
    "data/meta10k.pdf",
)

documents = loader.load()

In [32]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Note: we can play around with chunksize here....

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 600,
    chunk_overlap = 50
)

documents = text_splitter.split_documents(documents)
len(documents)

1101

In [5]:
from langchain_openai import OpenAIEmbeddings

# we can also play around with the embeddings here... 
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002"
)

In [5]:
from langchain_community.vectorstores import Qdrant

qdrant_vector_store = Qdrant.from_documents(
    documents,
    embeddings,
    location=":memory:",
    collection_name="Meta10k",
)

In [6]:
retriever = qdrant_vector_store.as_retriever()

In [9]:
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context. 
When answer about financial information, look for content only in table form.
If you cannot answer the question with the context, please respond with 'I don't know':

Context:
{context}

Question:
{question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [10]:
from operator import itemgetter

from langchain_openai import ChatOpenAI


primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [13]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

retrieval_augmented_qa_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": prompt | primary_qa_llm, "context": itemgetter("context")}
)

In [15]:
import pandas as pd

# load testset from csv
testset = pd.read_csv("testset1.csv")
testset.head()

Unnamed: 0.1,Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,0,How does the use of a common identifier help i...,['mobile browser on a given day. We do not req...,,simple,"[{'source': 'data/meta10k.pdf', 'file_path': '...",True
1,1,How is net income used in the computation of d...,['The numerators and denominators of the basic...,The net income is used as the numerator in the...,simple,"[{'source': 'data/meta10k.pdf', 'file_path': '...",True
2,2,How does tax effects from share-based compensa...,['Effective Tax Rate Items. Our effective tax ...,The tax effects from share-based compensation ...,simple,"[{'source': 'data/meta10k.pdf', 'file_path': '...",True
3,3,How is earnings per share computed using the t...,['Table of Contents\nNote\xa04. Earnings per S...,Earnings per share (EPS) using the two-class m...,simple,"[{'source': 'data/meta10k.pdf', 'file_path': '...",True
4,4,How does a decrease in user engagement affect ...,['reduce our ability to effectively target ads...,A decrease in user engagement can adversely af...,multi_context,"[{'source': 'data/meta10k.pdf', 'file_path': '...",True


In [16]:
test_questions = testset["question"].values.tolist()
test_groundtruths = testset["ground_truth"].values.tolist()

In [17]:
answers = []
contexts = []

for question in test_questions:
  response = retrieval_augmented_qa_chain.invoke({"question" : question})
  answers.append(response["response"].content)
  contexts.append([context.page_content for context in response["context"]])

In [22]:
test_groundtruths[0]=''

In [23]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [24]:
response_dataset[0]

{'question': 'How does the use of a common identifier help in attributing multiple user accounts within and across products?',
 'answer': "I don't know",
 'contexts': ['mobile browser on a given day. We do not require people to use a common identifier or link their accounts to use multiple products in our Family, and\ntherefore must seek to attribute multiple user accounts within and across products to individual people. Our calculations of DAP rely upon complex\ntechniques, algorithms, and machine learning models that seek to estimate the underlying number of unique people using one or more of these\nproducts, including by matching user accounts within an individual product and across multiple products when we believe they are attributable to a\nsingle person, and counting such group of accounts as one person. As these techniques and models require significant judgment, are developed based\non internal reviews of limited samples of user accounts, and are calibrated against user survey

In [25]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]

In [26]:
results = evaluate(response_dataset, metrics)

Evaluating:   2%|▏         | 1/60 [00:02<02:18,  2.35s/it]No statements were generated from the answer.
Evaluating:  37%|███▋      | 22/60 [00:06<00:07,  5.28it/s]No statements were generated from the answer.
Evaluating:  43%|████▎     | 26/60 [00:08<00:09,  3.71it/s]No statements were generated from the answer.
Evaluating:  47%|████▋     | 28/60 [00:09<00:10,  3.10it/s]No statements were generated from the answer.
Evaluating:  60%|██████    | 36/60 [00:10<00:04,  5.21it/s]No statements were generated from the answer.
Evaluating:  73%|███████▎  | 44/60 [00:12<00:03,  4.03it/s]No statements were generated from the answer.
Evaluating: 100%|██████████| 60/60 [00:18<00:00,  3.31it/s]


In [27]:
results_df = results.to_pandas()
results_df

Unnamed: 0,question,answer,contexts,ground_truth,faithfulness,answer_relevancy,context_recall,context_precision,answer_correctness
0,How does the use of a common identifier help i...,I don't know,[mobile browser on a given day. We do not requ...,,,0.0,1.0,1.0,0.177729
1,How is net income used in the computation of d...,Net income for diluted EPS is adjusted by the ...,"[stock, while the diluted EPS of Class B commo...",The net income is used as the numerator in the...,1.0,0.922074,1.0,1.0,0.724957
2,How does tax effects from share-based compensa...,I don't know,"[that period, our effective tax rate may be in...",The tax effects from share-based compensation ...,,0.0,1.0,1.0,0.171742
3,How is earnings per share computed using the t...,Earnings per share is computed using the two-c...,[Table of Contents\nNote 4. Earnings per Share...,Earnings per share (EPS) using the two-class m...,1.0,0.92742,1.0,1.0,0.617972
4,How does a decrease in user engagement affect ...,I don't know,"[•\ndecreases in user engagement, including ti...",A decrease in user engagement can adversely af...,,0.0,1.0,1.0,0.182496
5,What is the role of a PCAOB-registered public ...,I don't know,"[We also have audited, in accordance with the ...",The role of a PCAOB-registered public accounti...,,0.0,1.0,1.0,0.172857
6,How are foreign currency transaction gains and...,Foreign currency transaction gains and losses ...,"[cumulative translation losses, net of tax of ...",Foreign currency transaction gains and losses ...,1.0,0.978235,1.0,0.805556,0.740551
7,How does acting in the best interests of stock...,I don't know,[jurisdictions remain ongoing and could subjec...,"Mr. Zuckerberg, as a stockholder, is entitled ...",,0.0,1.0,0.5,0.182571
8,"How are dilutive securities, such as restricte...","Dilutive securities, such as restricted stock ...","[stock, while the diluted EPS of Class B commo...","Dilutive securities, such as restricted stock ...",1.0,0.942772,1.0,1.0,0.49704
9,What documents are included in the Consolidate...,"Consolidated Balance Sheets, Consolidated Stat...",[Table of Contents\nPART IV\nItem 15. Exhibit ...,"Consolidated Balance Sheets, Consolidated Stat...",1.0,0.953695,1.0,1.0,1.0


In [30]:
from langchain.retrievers import MultiQueryRetriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain import hub



advanced_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=primary_qa_llm)
document_chain = create_stuff_documents_chain(primary_qa_llm, retrieval_qa_prompt)
retrieval_chain = create_retrieval_chain(advanced_retriever, document_chain)

In [31]:
answers = []
contexts = []

for question in test_questions:
  response = retrieval_chain.invoke({"input" : question})
  answers.append(response["answer"])
  contexts.append([context.page_content for context in response["context"]])

In [32]:
response_dataset_advanced_retrieval = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [33]:
advanced_retrieval_results = evaluate(response_dataset_advanced_retrieval, metrics)

Evaluating: 100%|██████████| 60/60 [00:27<00:00,  2.21it/s]


In [34]:
advanced_retrieval_results_df = advanced_retrieval_results.to_pandas()
advanced_retrieval_results_df

Unnamed: 0,question,answer,contexts,ground_truth,faithfulness,answer_relevancy,context_recall,context_precision,answer_correctness
0,How does the use of a common identifier help i...,The use of a common identifier is not required...,[mobile browser on a given day. We do not requ...,,1.0,0.900714,1.0,1.0,0.16578
1,How is net income used in the computation of d...,Net income is used in the computation of dilut...,[Class\nA\nClass\nB\nClass\nA\nClass\nB\nClass...,The net income is used as the numerator in the...,1.0,0.999999,1.0,1.0,0.829077
2,How does tax effects from share-based compensa...,Tax effects from share-based compensation can ...,"[that period, our effective tax rate may be in...",The tax effects from share-based compensation ...,1.0,0.996214,1.0,1.0,0.983491
3,How is earnings per share computed using the t...,Earnings per share (EPS) is computed using the...,[Table of Contents\nNote 4. Earnings per Share...,Earnings per share (EPS) using the two-class m...,1.0,0.913181,1.0,1.0,0.807227
4,How does a decrease in user engagement affect ...,A decrease in user engagement can adversely im...,"[•\ndecreases in user engagement, including ti...",A decrease in user engagement can adversely af...,1.0,0.919927,1.0,1.0,0.615741
5,What is the role of a PCAOB-registered public ...,The role of a PCAOB-registered public accounti...,"[We also have audited, in accordance with the ...",The role of a PCAOB-registered public accounti...,1.0,0.954333,1.0,1.0,0.580379
6,How are foreign currency transaction gains and...,Foreign currency transaction gains and losses ...,"[cumulative translation losses, net of tax of ...",Foreign currency transaction gains and losses ...,1.0,0.990724,1.0,0.8875,0.749499
7,How does acting in the best interests of stock...,Acting in the best interests of stockholders i...,[jurisdictions remain ongoing and could subjec...,"Mr. Zuckerberg, as a stockholder, is entitled ...",1.0,0.872917,1.0,0.5,0.906111
8,"How are dilutive securities, such as restricte...","In the calculation of diluted EPS, net income ...","[stock, while the diluted EPS of Class B commo...","Dilutive securities, such as restricted stock ...",1.0,0.888579,1.0,1.0,0.696153
9,What documents are included in the Consolidate...,The documents included in the Consolidated Fin...,[Table of Contents\nPART IV\nItem 15. Exhibit ...,"Consolidated Balance Sheets, Consolidated Stat...",1.0,1.0,1.0,1.0,0.980326


In [35]:
import pandas as pd

df_original = pd.DataFrame(list(results.items()), columns=['Metric', 'Baseline'])
df_comparison = pd.DataFrame(list(advanced_retrieval_results.items()), columns=['Metric', 'MultiQueryRetriever with Document Stuffing'])

df_merged = pd.merge(df_original, df_comparison, on='Metric')

df_merged['Delta'] = df_merged['MultiQueryRetriever with Document Stuffing'] - df_merged['Baseline']

df_merged

Unnamed: 0,Metric,Baseline,MultiQueryRetriever with Document Stuffing,Delta
0,faithfulness,1.0,1.0,0.0
1,answer_relevancy,0.477016,0.948467,0.47145
2,context_recall,1.0,1.0,0.0
3,context_precision,0.94213,0.948958,0.006829
4,answer_correctness,0.438678,0.722512,0.283835


In [6]:
new_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
from langchain_community.vectorstores import Qdrant

qdrant_vector_store = Qdrant.from_documents(
    documents,
    new_embeddings,
    location=":memory:",
    collection_name="Meta10k",
)

In [33]:
from langchain.retrievers import MultiQueryRetriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain import hub
from langchain_openai import ChatOpenAI


primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
retrieval_qa_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
document_chain = create_stuff_documents_chain(primary_qa_llm, retrieval_qa_prompt)

new_retriever = qdrant_vector_store.as_retriever()
new_advanced_retriever = MultiQueryRetriever.from_llm(retriever=new_retriever, llm=primary_qa_llm)
new_retrieval_chain = create_retrieval_chain(new_advanced_retriever, document_chain)


In [34]:
response = new_retrieval_chain.invoke({"input" : "who are the board of directors of meta?"})
answer = response["answer"]
print(answer)

The board of directors of Meta Platforms, Inc. includes:
1. Robert M. Kimmitt
2. Sheryl K. Sandberg
3. Tracey T. Travis
4. Tony Xu


In [30]:
retrieved_documents = new_retriever.invoke("Who are Meta's 'Directors' (i.e., members of the Board of Directors)?")
for doc in retrieved_documents:
  print(doc.page_content)
  print('-----------------')

workforce.
We want our products to work for people around the world and we need to grow and keep the best talent in order to do that. We also remain committed
to having a skilled, inclusive and diverse workforce because we believe cognitive diversity fuels innovation. To aid in this effort, we have taken steps to reduce
bias from our hiring processes and performance management systems, as well as offering learning and development courses for our employees.
Corporate Information
We were incorporated in Delaware in July 2004. We completed our initial public offering in May 2012 and our Class A common stock is currently listed
on the Nasdaq Global Select Market under the symbol "META." Our principal executive offices are located at 1 Meta Way, Menlo Park, California 94025, and
our telephone number is (650) 543-4800.
Meta, the Meta logo, Meta Quest, Meta Horizon, Facebook, FB, Instagram, Oculus, WhatsApp, Reels, and our other registered or common law
-----------------
Listing
Our Class A c

In [16]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context. 
When answer about financial information, look for content only in table form.
If you cannot answer the question with the context, please respond with 'I don't know':

Context:
{context}

Question:
{question}
"""

prompt = ChatPromptTemplate.from_template(template)
retrieval_augmented_qa_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | new_retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": prompt | primary_qa_llm, "context": itemgetter("context")}
)

In [27]:
result = retrieval_augmented_qa_chain.invoke({"question" : "who are the board of directors of meta?"})
answer = result["response"].content
print(answer)

I don't know


In [24]:
question = "Who are the board of directors of meta?"

result = retrieval_augmented_qa_chain.invoke({"question" : question})

print(result["response"].content)
print(result["response"].answer)

I don't know.


AttributeError: 'AIMessage' object has no attribute 'answer'

In [38]:
new_retriever = qdrant_vector_store.as_retriever()
new_advanced_retriever = MultiQueryRetriever.from_llm(retriever=new_retriever, llm=primary_qa_llm)
new_retrieval_chain = create_retrieval_chain(new_advanced_retriever, document_chain)

answers = []
contexts = []

for question in test_questions:
  response = new_retrieval_chain.invoke({"input" : question})
  answers.append(response["answer"])
  contexts.append([context.page_content for context in response["context"]])

In [40]:
new_response_dataset_advanced_retrieval = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})
new_advanced_retrieval_results = evaluate(new_response_dataset_advanced_retrieval, metrics)

Evaluating: 100%|██████████| 60/60 [00:24<00:00,  2.42it/s]


In [41]:
df_baseline = pd.DataFrame(list(results.items()), columns=['Metric', 'Baseline'])
df_original = pd.DataFrame(list(advanced_retrieval_results.items()), columns=['Metric', 'ADA'])
df_comparison = pd.DataFrame(list(new_advanced_retrieval_results.items()), columns=['Metric', 'Text Embedding 3'])

df_merged = pd.merge(df_original, df_comparison, on='Metric')
df_merged = pd.merge(df_baseline, df_merged, on="Metric")

df_merged['Delta - TE3 -> ADA'] = df_merged['Text Embedding 3'] - df_merged['ADA']
df_merged['Delta - TE3 -> Baseline'] = df_merged['Text Embedding 3'] - df_merged['Baseline']

df_merged

Unnamed: 0,Metric,Baseline,ADA,Text Embedding 3,Delta - TE3 -> ADA,Delta - TE3 -> Baseline
0,faithfulness,1.0,1.0,1.0,0.0,0.0
1,answer_relevancy,0.477016,0.948467,0.940713,-0.007754,0.463696
2,context_recall,1.0,1.0,0.944444,-0.055556,-0.055556
3,context_precision,0.94213,0.948958,0.983681,0.034722,0.041551
4,answer_correctness,0.438678,0.722512,0.680868,-0.041645,0.24219


In [9]:


results_df.to_csv("results.csv")
advanced_retrieval_results_df.to_csv("advanced_retrieval_results.csv")


NameError: name 'results_df' is not defined

In [45]:
new_advanced_retrieval_results_df = new_advanced_retrieval_results.to_pandas()
new_advanced_retrieval_results_df.to_csv("new_advanced_retrieval_results.csv")