<a target="_blank" href="https://colab.research.google.com/github/gox6/colab-demos/blob/main/rags/evaluate-rags-rigorously-or-perish.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Project Setup

In [2]:
# Installing Python packages & hiding
!pip install --quiet \
  chromadb \
  datasets \
  langchain \
  langchain_chroma \
  plotly \
  polars \
  ragas

In [3]:
# Importing the packages
from functools import reduce
import os

from datasets import load_dataset
from getpass import getpass
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import plotly.express as px
import polars as pl
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context, conditional


In [4]:
# Managing secrets
# - If using Colab please use Colab Secrets
# - If running outside Colab please provide secrets as environmental variables
COLAB = os.getenv("COLAB_RELEASE_TAG") is not None

if COLAB:
  from google.colab import userdata, data_table
  # Secrets
  OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
  os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
  runtime_info = "Colab runtime"

  # Enabling Colab's data formatter for pandas
  data_table.enable_dataframe_formatter()
elif OPENAI_API_KEY := os.environ.get('OPENAI_API_KEY'):
  # Secrets
  runtime_info = "Non Colab runtime"
else:
  OPENAI_API_KEY = getpass("OPENAI_API_KEY")
  os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
  runtime_info = "Non Colab runtime"

print(runtime_info)

Colab runtime


#Exploring Different Types of Question Evolution in RAGAs




In [5]:
# Getting the batch article
the_batch_newsletter_loader = WebBaseLoader("https://www.deeplearning.ai/the-batch/data-points-issue-245/")
the_batch_newsletter = the_batch_newsletter_loader.load()


the_batch_newsletter

[Document(page_content='Big Updates for GPT-4 Turbo, Gemini 1.5, Mixtral, and More\uf8ffüåü New Course! Enroll in Quantization Fundamentals With Hugging FaceCoursesShort CoursesSpecializationsAI NewsletterThe BatchAndrew\'s LetterData PointsML ResearchBlogCommunityForumEventsAmbassadorsAmbassador SpotlightResourcesCompanyAboutCareersContactStart LearningWeekly IssuesAndrew\'s LettersData PointsML ResearchBusinessScienceAI & SocietyCultureHardwareAI CareersAboutSubscribeThe BatchData PointsArticleBig Updates for GPT-4 Turbo, Gemini 1.5, Mixtral, and More Plus, AI Helps Rebuild Lost MemoriesData PointsPublishedApr 17, 2024Reading time6 min readShareThis week\'s top AI news and research stories\xa0featured\xa0Google\'s Vertex AI Agent Builder, security holes in generated code, a series of policy violations in the GPT Store, and RA-DIT, a fine-tuning procedure that trains an LLM and retrieval model together to improve the LLM‚Äôs ability to capitalize on retrieved content. But first: U.S. 

In [6]:
# Examining question evolution types evailable in ragas library
llm = ChatOpenAI(model="gpt-3.5-turbo")
generator_llm = llm
critic_llm = llm
embeddings = OpenAIEmbeddings()

example_generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# Change resulting question type distribution
distributions = [{simple: 1}, {reasoning: 1}, {multi_context: 1}, {conditional: 1}]

In [7]:
# This step COSTS $$$ ...
# question_evolution_types = list(map(lambda x: example_generator.generate_with_langchain_docs(the_batch_newsletter, 1, x), distributions))

In [8]:
# Displaying examples
question_evolution_types_pd = reduce(lambda x, y: pd.concat([x, y], axis=0), [x.to_pandas() for x in question_evolution_types])
examples = question_evolution_types_pd.loc[:, ["evolution_type", "question", "ground_truth"]]
examples

NameError: name 'question_evolution_types' is not defined

In [9]:
# There is randomness in generating evaluation sets in ragas, which stems for the ragas sampling as well as from LLMs indeterminacy.
# As the result the above generated examples may be different than the ones described in the Medium blog post, which are displayed below.

original_medium_examples_pd = pl.read_csv(
    "https://gist.github.com/gox6/31f66ff936be445a9d16836a79f640a9/raw/example-question-evolution-types-in-ragas.csv",
    separator=",",
).to_pandas()
display(original_medium_examples_pd)

Unnamed: 0,index,evolution_type,question,ground_truth
0,0,simple,How has generative AI adoption impacted artist...,Generative AI adoption has positively impacted...
1,0,reasoning,How does Spotify's AI playlist generator work ...,Spotify's AI Playlist feature allows users to ...
2,0,multi_context,What sets GPT-4 Turbo apart from Mixtral 8x22B?,GPT-4 Turbo stands out from Mixtral 8x22B due ...
3,0,conditional,How does generative AI impact artists' product...,Generative AI adoption boosts artists' product...


#Data: CNN and Daily Mail news articles


In [11]:
# Loading small sample of article from CNN and Daily Mail news dateset on HF: https://huggingface.co/datasets/cnn_dailymail
# - Not directly via LangChain with HuggingFaceDatasetLoader class because, it doesn't have split argument

interim = load_dataset(path="cnn_dailymail", name='1.0.0', split='train[:100]')
news_pl = (pl.from_arrow(interim.data.table)
           .with_columns([pl.col("article").str.split(' ').list.len().alias("word_count")]))
news_pd = news_pl.to_pandas()
from langchain_community.document_loaders import PolarsDataFrameLoader
loader = PolarsDataFrameLoader(news_pl, page_content_column="article")
news = loader.load()


In [12]:
# Distribution of artciles by word count
fig = px.histogram(news_pl, x="word_count", marginal="rug")
fig.update_layout(
    title_text="Distribution of articles by word count", # title of plot
    xaxis_title_text='Word Count', # xaxis label
    yaxis_title_text='# Articles', # yaxis label
)
fig.show()

In [13]:
# Seeing news data
if COLAB:
  display(data_table.DataTable(news_pd, include_index=True, num_rows_per_page=5))
else:
  display(news_pd.head(5))

Unnamed: 0,article,highlights,id,word_count
0,"LONDON, England (Reuters) -- Harry Potter star...",Harry Potter star Daniel Radcliffe gets £20M f...,42c027e4ff9730fbb3de84c1af0d2c506e41c3e4,456
1,Editor's note: In our Behind the Scenes series...,Mentally ill inmates in Miami are housed on th...,ee8871b15c50d0db17b0179a6d2beab35065f1e9,700
2,"MINNEAPOLIS, Minnesota (CNN) -- Drivers who we...","NEW: ""I thought I was going to die,"" driver sa...",06352019a19ae31e527f37f7571c6dd7f0c5da37,746
3,WASHINGTON (CNN) -- Doctors removed five small...,"Five small polyps found during procedure; ""non...",24521a2abb2e1f5e34e6824e0f9e56904a2b0e88,415
4,(CNN) -- The National Football League has ind...,"NEW: NFL chief, Atlanta Falcons owner critical...",7fe70cc8b12fab2d0a258fababf7d9c6b5e1262a,977
...,...,...,...,...
95,"DENVER, Colorado -- A Colorado man terrorized ...",Some witnesses say Colorado does nothing to pr...,f70a7abb6c5b0ef383ea12a4d9ca046a5bd854e5,844
96,"LONDON, England (CNN) -- Previously unseen foo...",NEW: Jury shown new footage of Diana taken hou...,a3dd38ec7bc9d7e8423b96d8fd0641a2a5d5c984,659
97,WASHINGTON (CNN) -- Republicans reacted with s...,"Republican Sen. Lindsey Graham: ""I am astounde...",654c6b29b96d2a5a818d91400c20f838b0e8b6df,721
98,"ST. PETERSBURG, Florida (CNN) -- The acrimony ...","YouTube questions address taxes, the Bible, ab...",764d9ce99a1e3f79d95fbc4b68adbce14e7f8bcd,1161


#Generating Synthetic Evaluation Set

In [14]:
# Examining question evolution types evailable in ragas library
llm = ChatOpenAI(model="gpt-3.5-turbo")
generator_llm = llm
critic_llm = llm
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# Set question type distribution
distributions = {simple: 0.25, reasoning: 0.25, multi_context: 0.25,conditional: 0.25}

In [None]:
# This costs some real $$$
synthetic_evaluation_set = generator.generate_with_langchain_docs(documents=news, test_size=20, distributions=distributions)

In [None]:
synthetic_evaluation_set

In [15]:
# Seeing news data
synthetic_evaluation_set_pd = synthetic_evaluation_set.to_pandas()
if COLAB:
  display(data_table.DataTable(synthetic_evaluation_set_pd, include_index=True, num_rows_per_page=5))
else:
  display(synthetic_evaluation_set_pd.head(5))

NameError: name 'synthetic_evaluation_set' is not defined

In [16]:
def concat_pl_dfs(paths: list[str]):
  dfs = pl.DataFrame()
  for path in paths:
    df = pl.read_csv(path, separator=",")
    dfs = pl.concat([dfs, df], how="vertical")

  return dfs

urls = ["https://gist.github.com/gox6/20f8332dc4f0071e81b1e6e0ed15f14e/raw/synthetic2.csv",
        "https://gist.github.com/gox6/16a8578440b5fdc5d2304192a64ca721/raw/synthetic3.csv",
        "https://gist.github.com/gox6/f3d0f4b6cd9d8bd7d4c0d481f3d94a22/raw/synthetic4.csv"]

eval_set = concat_pl_dfs(urls).drop("index")



In [17]:
eval_set = eval_set.sort("contexts").with_columns([pl.col("contexts").cum_count().over("contexts").alias("cnt")])
eval_set.filter(pl.col("cnt") == pl.lit(0))
eval_set = eval_set.sort("evolution_type").with_columns([pl.col("evolution_type").cum_count().over("evolution_type").alias("cnt")])
eval_set.filter(pl.col("cnt") < pl.lit(8))


question,contexts,ground_truth,evolution_type,metadata,episode_done,cnt
str,str,str,str,str,bool,u32
"""What actions a…","""(CNN) -- A gir…","""Law enforcemen…","""conditional""","""{'highlights':…",true,0
"""How many John …","""(CNN) -- The p…","""The partnershi…","""conditional""","""{'highlights':…",true,1
"""Who hosts ""Win…","""(LiveWire) -- …","""nan""","""conditional""","""{'highlights':…",true,2
"""How has the po…","""AMMAN, Jordan …","""The policy of …","""conditional""","""{'highlights':…",true,3
"""How would Barc…","""MADRID, Spain …","""If Messi didn'…","""conditional""","""{'highlights':…",true,4
"""How did Interp…","""PARIS, France …","""Interpol used …","""conditional""","""{'highlights':…",true,5
"""What did Uru s…","""uru, one of th…","""Uru saw a woma…","""conditional""","""{'highlights':…",true,6
"""What are the m…",""" identified on…","""The main compo…","""multi_context""","""{'highlights':…",true,0
"""How did Beckha…","""(CNN) -- Footb…","""Beckham's move…","""multi_context""","""{'highlights':…",true,1
"""How did Warren…","""(CNN) -- Polyg…","""If Jeffs disav…","""multi_context""","""{'highlights':…",true,2


# Setup Vector Database: ChromaDB

In [47]:
import chromadb
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings()
chroma_client = chromadb.EphemeralClient()


def get_vectordb_collection(chroma_client, documents,  chunk_size=None, overlap_size=0):

    if chunk_size is None:
      collection_name = "full_text"
    else:
      collection_name = f"chunk_size{chunk_size}_overlap_size{overlap_size}"

    langchain_chroma = Chroma(client=chroma_client,
                              collection_name=collection_name,
                              embedding_function=embeddings,
                              )

    existing_collections = [collection.name for collection in chroma_client.list_collections()]
    if collection_name not in existing_collections:
      langchain_chroma.from_documents(collection_name=collection_name,
                                      documents=documents,
                                      embedding=embedding)
    return langchain_chroma


In [44]:
chroma_client.list_collections()

[Collection(name=collection_name),
 Collection(name=langchain),
 Collection(name=full_text),
 Collection(name=full_text1)]

In [None]:
# vectordb_chunk300_overlap_0
# vectordb_chunk300_overlap_100
# vectordb_chunk300_overlap_200
# vectordb_chunk600_overlap_0
# vectordb_chunk600_overlap_100
# vectordb_chunk300_overlap_200

In [89]:
def get_chain(chroma_client, documents, chunk_size=None, overlap_size=0, top_k=4, lambda_mult=0.25):

    vectordb_collection = get_vectordb_collection(chroma_client=chroma_client, documents=documents, chunk_size=chunk_size, overlap_size=overlap_size)
    retriever = vectordb_collection.as_retriever(top_k=top_k, lambda_mult=lambda_mult)

    template = """Answer the question based only on the following context.
    If the context doesn't contain entities present in the question say you don't know.

    {context}

    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(template)
    model = ChatOpenAI(model="gpt-3.5-turbo")


    def format_docs(docs):
        return "\n\n".join([doc.page_content for doc in docs])


    chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )

    return chain




In [85]:
help(get_vectordb_collection(chroma_client, news).as_retriever)

Help on method as_retriever in module langchain_core.vectorstores:

as_retriever(**kwargs: 'Any') -> 'VectorStoreRetriever' method of langchain_community.vectorstores.chroma.Chroma instance
    Return VectorStoreRetriever initialized from this VectorStore.
    
    Args:
        search_type (Optional[str]): Defines the type of search that
            the Retriever should perform.
            Can be "similarity" (default), "mmr", or
            "similarity_score_threshold".
        search_kwargs (Optional[Dict]): Keyword arguments to pass to the
            search function. Can include things like:
                k: Amount of documents to return (Default: 4)
                score_threshold: Minimum relevance threshold
                    for similarity_score_threshold
                fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
                lambda_mult: Diversity of results returned by MMR;
                    1 for minimum diversity and 0 for maximum. (Defaul

In [84]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


template = """Answer the question based only on the following context.
If the context doesn't contain entities present in the question say you don't know.

{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI(model="gpt-3.5-turbo")


def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])


chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

chain.invoke("Who is Lech Walesa?")

"I don't know."

In [70]:
news_pl

article,highlights,id,word_count
str,str,str,u32
"""LONDON, Englan…","""Harry Potter s…","""42c027e4ff9730…",456
"""Editor's note:…","""Mentally ill i…","""ee8871b15c50d0…",700
"""MINNEAPOLIS, M…","""NEW: ""I though…","""06352019a19ae3…",746
"""WASHINGTON (CN…","""Five small pol…","""24521a2abb2e1f…",415
"""(CNN) -- The …","""NEW: NFL chief…","""7fe70cc8b12fab…",977
"""BAGHDAD, Iraq …","""Parents beam w…","""a1ebb8bb4d370a…",707
"""BAGHDAD, Iraq …","""Aid workers: V…","""7c0e61ac829a3b…",899
"""BOGOTA, Colomb…","""Tomas Medina C…","""f0d73bdab71176…",340
"""WASHINGTON (CN…","""President Bush…","""5e22bbfc723241…",526
"""(CNN) -- Polic…","""Empty anti-tan…","""613d6311ec2c19…",349


In [75]:
news_pl.filter(pl.col("article").str.contains("Walesa"))

article,highlights,id,word_count
str,str,str,u32


In [None]:
help(vectordb.add_collection)

# Evaluation of RAG

# Optimisation of RAG

# New section

In [None]:
from ragas import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    answer_correctness
)

result = evaluate(
    ts_ds,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
        answer_correctness
    ],
)

In [None]:
result

In [None]:
from langchain.vectorstores import Chroma


client = chromadb.EphemeralClient()
full_text_collection = client.get_or_create_collection(name="full_text")
full_text_collection.add(
    documents=,
    metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...],
    ids=["id1", "id2", "id3", ...]
)









In [None]:
# import
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import CharacterTextSplitter


# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(docs)

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)

# query it
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

In [None]:
embeddings = OpenAIEmbeddings()
new_client = chromadb.EphemeralClient()
openai_lc_client = Chroma.from_documents(
    bbc_news, embeddings, client=new_client, collection_name="no_split"
)



query = "What did the president say about Ketanji Brown Jackson"
docs = openai_lc_client.similarity_search(query)
print(docs[0].page_content)

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter


text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(bbc_news)
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)

In [None]:

from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import OpenAIEmbeddings

vectorstore = DocArrayInMemorySearch.from_texts(
    ["harrison worked at kensho", "bears like to eat honey"],
    embedding=OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

chain.invoke("where did harrison work?")