<a target="_blank" href="https://colab.research.google.com/github/gox6/colab-demos/blob/main/rags/evaluate-rags-rigorously-or-perish.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [7]:
# Installing Python packages & hiding
!pip install --quiet \
  arxiv \
  chromadb \
  langchain \
  polars \
  pymupdf \
  pytube \
  ragas xmltodict \
  youtube-transcript-api

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.8/30.8 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Importing the packages
from collections import Counter, defaultdict
import os

import chromadb
from langchain_core.documents.base import Document
from langchain_community.document_loaders import YoutubeLoader
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import polars as pl
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

In [3]:
# Managing secrets
# - If using Colab please use Colab Secrets
# - If running outside Colab please provide secrets as environmental variables

COLAB = os.getenv("COLAB_RELEASE_TAG") is not None

if COLAB:
  from google.colab import userdata, data_table
  # Secrets
  OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
  os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
  runtime_info = "Colab runtime"

  # Enabling Colab's data formatter for pandas
  data_table.enable_dataframe_formatter()
else:
  # Secrets
  OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
  runtime_info = "Non Colab runtime"

print(runtime_info)

Colab runtime


In [11]:
from datasets import load_dataset
dataset = load_dataset("cnn_dailymail", '1.0.0')

Downloading data:   0%|          | 0.00/256M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/257M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/259M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [26]:
from langchain_community.document_loaders.hugging_face_dataset import (
    HuggingFaceDatasetLoader,
)
loader = HuggingFaceDatasetLoader("cnn_dailymail", "article", Config='1.0.0')
news = loader.load()

TypeError: BaseLoader.load() got an unexpected keyword argument 'Config'

In [34]:
cnn_pd = dataset["train"][0:100]
cnn_pd.set_format("pandas")

AttributeError: 'dict' object has no attribute 'set_format'

In [30]:
dataset["train"][ "highlights"]

["Harry Potter star Daniel Radcliffe gets £20M fortune as he turns 18 Monday . Young actor says he has no plans to fritter his cash away . Radcliffe's earnings from first five Potter films have been held in trust fund .",
 'Mentally ill inmates in Miami are housed on the "forgotten floor" Judge Steven Leifman says most are there as a result of "avoidable felonies" While CNN tours facility, patient shouts: "I am the son of the president" Leifman says the system is unjust and he\'s fighting for change .',
 'NEW: "I thought I was going to die," driver says . Man says pickup truck was folded in half; he just has cut on face . Driver: "I probably had a 30-, 35-foot free fall" Minnesota bridge collapsed during rush hour Wednesday .',
 'Five small polyps found during procedure; "none worrisome," spokesman says . President reclaims powers transferred to vice president . Bush undergoes routine colonoscopy at Camp David .',
 "NEW: NFL chief, Atlanta Falcons owner critical of Michael Vick's condu

In [81]:
# 30 Youtube videos from BBC News

youtube_bbc_news_videos = ["https://youtu.be/YEHA-u8b43A", "https://youtu.be/TlFfHjOMSXQ", "https://youtu.be/-kZolk9EoMA", "https://youtu.be/JQMZkrz6X08",
                           "https://youtu.be/D1iMZaLjBU4", "https://youtu.be/Sqcv9lCADxE", "https://youtu.be/Sshl0SFO4ZI", "https://youtu.be/mpOyMOZWEcU",
                           "https://youtu.be/Cg3YMWcjLI4", "https://youtu.be/yQIMSv9Luw4", "https://youtu.be/5LYGnqoCLGk", "https://youtu.be/KCepbsLLUMY",
                           "https://youtu.be/lHggWT2iLdo", "https://youtu.be/TTUsxD62398", "https://youtu.be/3YMleRGjeqE", "https://youtu.be/h-5dqQMZTZQ",
                           "https://youtu.be/4GBcZJpw8yI", "https://youtu.be/QOUGlWEpwL4", "https://youtu.be/T05I-SBhXoI", "https://youtu.be/Iz-XY6XfXjk",
                           "https://youtu.be/yyLFQrb--pw", "https://youtu.be/wMqJbMPNM6A", "https://youtu.be/kOcuwLBPBP8", "https://youtu.be/7goRcrFKs3U",
                           "https://youtu.be/FoBOSLofM3E", "https://youtu.be/97nEBjiQI1M", "https://youtu.be/bjmK4lGKNqY", "https://youtu.be/XWLA5A6bpwE",
                           "https://youtu.be/DkmrhVpCmac", "https://youtu.be/Dxar1d1aTUo"
                          ]


count = Counter(youtube_bbc_news_videos)
assert len(count) == 30

In [101]:
# Collecting transcripts of Youtube videos
# Long running cell: around 60s

def get_youtube_transcripts(urls: list[str]) -> list[Document]:

  docs = list(map(lambda url: YoutubeLoader.from_youtube_url(url, add_video_info=True).load()[0], urls))

  return docs

bbc_news = get_youtube_transcripts(youtube_bbc_news_videos)


In [169]:
bbc_

[Document(page_content="well the Tesla now and a whistle blow whistleblower who's battled Elon Musk and the car maker through the courts for a decade has been speaking to the BBC Christina Balon says she's still seeking a public apology for how she was treated after raising safety concerns about Tesla vehicles until 2014 Miss Balon was a rising star at the car maker in the US she spoke to our technology editor Zoe Kleinman I was the only woman in the team like engineering them and uh in the beginning was great in the early days Christina Balon was doing so well at Tesla her initials were engraved on the car's batteries but she says it wasn't long before things took a turn for the worse everything went South when I realized that they were hiding some critical safety issues she claims they pretended that they didn't know and you decided to go right to the top you went to Elon Musk himself I tried to and what exactly was the safety concern they realized that they make a design and gening 

In [167]:
# Transforming list of LangChain documents into dataframe to review data conveniently

def to_pl(list_of_docs: list[Document]) -> pl.DataFrame:
  data = defaultdict(list)

  for doc in list_of_docs:
    doc_dict = doc.dict()
    for key in doc_dict.keys():
      if key != 'metadata':
        data[key].append(doc_dict[key])
      else:
        metadata = doc_dict['metadata']
        for subkey in metadata.keys():
          data[subkey].append(metadata[subkey])

  df_pl = pl.DataFrame(data).rename({"length": "length_in_seconds"})
  df_pl = df_pl.with_columns([(pl.lit('https://youtu.be/') + pl.col('source')).alias('video_url'),
                              (pl.col('page_content').str.split(by=' ').list.len()).alias("length_in_words")])

  df_pl = df_pl.select([ 'publish_date', 'author', 'title',  'video_url', 'view_count', 'length_in_seconds', 'length_in_words', 'page_content' ])
  return df_pl


df_pl = to_pl(bbc_news)
df_pd = df_pl.to_pandas()   # Switching to Pandas from Polars dataframe as it is then better displayed in Colab

# Displaying data with Colab formatter, or just in pandas
display(df_pd)

Unnamed: 0,publish_date,author,title,video_url,view_count,length_in_seconds,length_in_words,page_content
0,2024-04-16 00:00:00,BBC News,Tesla whistleblower says she wants an Elon Mus...,https://youtu.be/YEHA-u8b43A,48723,218,531,well the Tesla now and a whistle blow whistleb...
1,2024-04-16 00:00:00,BBC News,UK smoking ban: MPs to vote on banning young p...,https://youtu.be/TlFfHjOMSXQ,26558,192,513,smoking is the single biggest cause of prevent...
2,2024-04-16 00:00:00,BBC News,Right-wing event in Brussels told to shut down...,https://youtu.be/-kZolk9EoMA,24412,181,592,the organizers of a conference in Brussels whi...
3,2024-04-16 00:00:00,BBC News,Israel demands sanctions on Iranian missile pr...,https://youtu.be/JQMZkrz6X08,150330,458,1074,and we begin with events in the Middle East wh...
4,2024-04-16 00:00:00,BBC News,Olympic flame lit in Greece's ancient Olympia ...,https://youtu.be/D1iMZaLjBU4,40224,695,1062,now I want to take you back to Greece and to O...
5,2024-04-11 00:00:00,BBC News,Elon Musk’s AI chatbot generated disinformatio...,https://youtu.be/Sqcv9lCADxE,55637,895,2875,you're watching the context it's time for our ...
6,2024-04-04 00:00:00,BBC News,Taiwan earthquake: More than 600 stranded a da...,https://youtu.be/Sshl0SFO4ZI,159263,445,1243,let's turn to Taiwan rescue efforts are underw...
7,2024-04-03 00:00:00,BBC News,Uganda’s top appeals court rejects petition to...,https://youtu.be/mpOyMOZWEcU,31330,219,596,to Uganda next and the country's constitutiona...
8,2024-04-11 00:00:00,BBC News,US President Joe Biden vows 'ironclad' support...,https://youtu.be/Cg3YMWcjLI4,111440,353,1005,hello I'm Nikki schill welcome to the program ...
9,2024-04-09 00:00:00,BBC News,Lord Cameron meets Antony Blinken after 'priva...,https://youtu.be/yQIMSv9Luw4,44282,484,1440,right we're going to look now at UK US diploma...


In [213]:
# Example LangChain Document object
bbc_news[19]

Document(page_content="3 months after a door plug blew off a Boeing plane mid-flight the US Airline maker is facing increased scrutiny well now the Federal Aviation Administration is investigating a whistleblower's claims that the company took shortcuts that impacted quality and safety our North America business correspondent Aon Delmore has the latest a former Boeing Employee who worked on the 787 accused the company of cutting Corners during production and overlooking engineering problems that could weak in the plane's structural Integrity Boeing dismissed the claims as inaccurate and reiterated confidence in the 787 Dreamliner meanwhile the company delivered its lowest number of planes in the first quarter of the year since 2021 just 83 planes down from 157 the quarter prior Boeing Executives say the company is slowing down production so that it can improve quality control but delivery delays are sparking criticism from customers who have little choice to fulfill their needs Boeing 

In [None]:
client = chromadb.Client()


In [202]:
base_docs = bbc_news[19:20]

# generator with openai models
llm = ChatOpenAI(model="gpt-3.5-turbo")
generator_llm = llm
critic_llm = llm
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# Change resulting question type distribution
distributions = {
    simple: 1,
}




# use generator.generate_with_llamaindex_docs if you use llama-index as document loader
testset = generator.generate_with_langchain_docs(base_docs, 1, distributions)
testset.to_pandas()

embedding nodes:   0%|          | 0/2 [00:00<?, ?it/s]



Generating:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What are the recent challenges faced by Boeing...,[3 months after a door plug blew off a Boeing ...,The recent challenges faced by Boeing in relat...,simple,"[{'source': 'Iz-XY6XfXjk', 'title': 'Boeing hi...",True


In [221]:
ts_ds = testset.to_dataset()
ts_ds = ts_ds.add_column("answer", ts_ds["ground_truth"])

In [224]:
from ragas import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    answer_correctness
)

result = evaluate(
    ts_ds,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
        answer_correctness
    ],
)

Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

In [225]:
result

{'context_precision': 1.0000, 'faithfulness': 1.0000, 'answer_relevancy': 0.9852, 'context_recall': 1.0000, 'answer_correctness': 1.0000}

In [236]:
from langchain.vectorstores import Chroma


client = chromadb.EphemeralClient()
full_text_collection = client.get_or_create_collection(name="full_text")
full_text_collection.add(
    documents=,
    metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...],
    ids=["id1", "id2", "id3", ...]
)









In [183]:
# import
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import CharacterTextSplitter


# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(docs)

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)

# query it
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

ImportError: Could not import sentence_transformers python package. Please install it with `pip install sentence-transformers`.

In [186]:
embeddings = OpenAIEmbeddings()
new_client = chromadb.EphemeralClient()
openai_lc_client = Chroma.from_documents(
    bbc_news, embeddings, client=new_client, collection_name="no_split"
)



query = "What did the president say about Ketanji Brown Jackson"
docs = openai_lc_client.similarity_search(query)
print(docs[0].page_content)

hello I'm Nikki schill welcome to the program we start this hour with the latest developments in the Middle East President Biden has promised Israel Ironclad United States support amid fears that Iran could launch reprisals for an attack that killed senior Iranians Us Media are reporting that an American General visit Israel on Thursday to discuss Washington's fears President Biden warned at Iran is threatening to launch a significant attack after Israel struck the Iranian Consulate in Syria 10 days ago Mr Biden was speaking hours after the Iranian supreme leader again said Israel would be punished for the strike on its consulate meanwhile the Hamas political leader ishmail hania says there'll be no change in the group's demands for a permanent ceas fight in Gaza despite the killing of three of his sons in an Israeli air strike Israeli media is reporting that the Prime Minister Benjamin Netanyahu had not been briefed in advance of the attack this video is believed to show Ismael hania 

In [233]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter


text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(bbc_news)
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)

ImportError: Could not import faiss python package. Please install it with `pip install faiss-gpu` (for CUDA supported GPU) or `pip install faiss-cpu` (depending on Python version).

In [None]:

from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import OpenAIEmbeddings

vectorstore = DocArrayInMemorySearch.from_texts(
    ["harrison worked at kensho", "bears like to eat honey"],
    embedding=OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

chain.invoke("where did harrison work?")