In [18]:
%%time
!pip install -q -q -r requirements.txt

CPU times: user 340 ms, sys: 57.9 ms, total: 398 ms
Wall time: 7.06 s


In [15]:
from llama_index.core.llama_dataset import download_llama_dataset, LabelledRagDataset
from dotenv import load_dotenv
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
import os
from llama_index.core import VectorStoreIndex, Settings
from ragas.metrics import (
    Faithfulness,
    ContextPrecision,
    ContextRecall
)
from ragas.llms import LlamaIndexLLMWrapper
from ragas.embeddings import LlamaIndexEmbeddingsWrapper
from ragas.dataset_schema import SingleTurnSample, EvaluationDataset
from ragas.evaluation import evaluate
from ragas.run_config import RunConfig
import pandas as pd

In [3]:
rag_dataset, documents = download_llama_dataset(
    llama_dataset_class="PaulGrahamEssayDataset", 
    download_dir="./data",
    show_progress=True
)

100%|██████████| 1/1 [00:00<00:00,  4.43it/s]
Loading files: 100%|██████████| 1/1 [00:00<00:00, 63.10file/s]


In [4]:
rag_dataset.to_pandas()

Unnamed: 0,query,reference_contexts,reference_answer,reference_answer_by,query_by
0,"In the essay, the author mentions his early ex...",[What I Worked On\n\nFebruary 2021\n\nBefore c...,The first computer the author used for program...,ai (gpt-4),ai (gpt-4)
1,The author switched his major from philosophy ...,[What I Worked On\n\nFebruary 2021\n\nBefore c...,The two specific influences that led the autho...,ai (gpt-4),ai (gpt-4)
2,"In the essay, the author discusses his initial...",[I couldn't have put this into words when I wa...,The two main influences that initially drew th...,ai (gpt-4),ai (gpt-4)
3,The author mentions his shift of interest towa...,[I couldn't have put this into words when I wa...,The author shifted his interest towards Lisp a...,ai (gpt-4),ai (gpt-4)
4,"In the essay, the author mentions his interest...",[So I looked around to see what I could salvag...,"The author in the essay is Paul Graham, who wa...",ai (gpt-4),ai (gpt-4)
5,The author discusses his decision to write a b...,[So I looked around to see what I could salvag...,The author decided to write a book on Lisp hac...,ai (gpt-4),ai (gpt-4)
6,"In the essay, the author mentions a quick deci...","[I didn't want to drop out of grad school, but...",The author decided to attempt writing his diss...,ai (gpt-4),ai (gpt-4)
7,The author describes the atmosphere and practi...,"[I didn't want to drop out of grad school, but...","According to the author's account, the student...",ai (gpt-4),ai (gpt-4)
8,"In the essay, the author discusses his experie...","[We actually had one of those little stoves, f...","In the essay, the author explains that paintin...",ai (gpt-4),ai (gpt-4)
9,The author shares his work experience at a com...,"[We actually had one of those little stoves, f...","Interleaf, the company where the author worked...",ai (gpt-4),ai (gpt-4)


In [5]:
embed_model = AzureOpenAIEmbedding(
    model='text-embedding-3-small', # Update with the embeddings deployment name
    api_key=os.environ['OPENAI_API_KEY'],
    api_version=os.environ['OPENAI_API_VERSION'],
    azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT']
)

llm = AzureOpenAI(
    engine="gpt-4o", # Update with the language model deployment name 
    model="gpt-4o", # Update with the language model name
    temperature=0.0,
    api_key=os.environ['OPENAI_API_KEY'],
    api_version=os.environ['OPENAI_API_VERSION'],
    azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT']
)

In [6]:
Settings.embed_model = embed_model
Settings.llm = llm

In [7]:
index = VectorStoreIndex.from_documents(
    documents=documents,
    show_progress=True
)
query_engine = index.as_query_engine()

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/22 [00:00<?, ?it/s]

In [8]:
sample_size = 5
sub_dataset = LabelledRagDataset(examples=rag_dataset.examples[:sample_size])
sub_dataset.to_pandas()

Unnamed: 0,query,reference_contexts,reference_answer,reference_answer_by,query_by
0,"In the essay, the author mentions his early ex...",[What I Worked On\n\nFebruary 2021\n\nBefore c...,The first computer the author used for program...,ai (gpt-4),ai (gpt-4)
1,The author switched his major from philosophy ...,[What I Worked On\n\nFebruary 2021\n\nBefore c...,The two specific influences that led the autho...,ai (gpt-4),ai (gpt-4)
2,"In the essay, the author discusses his initial...",[I couldn't have put this into words when I wa...,The two main influences that initially drew th...,ai (gpt-4),ai (gpt-4)
3,The author mentions his shift of interest towa...,[I couldn't have put this into words when I wa...,The author shifted his interest towards Lisp a...,ai (gpt-4),ai (gpt-4)
4,"In the essay, the author mentions his interest...",[So I looked around to see what I could salvag...,"The author in the essay is Paul Graham, who wa...",ai (gpt-4),ai (gpt-4)


In [9]:
%%time
predictions = sub_dataset.make_predictions_with(
    predictor = query_engine,
    show_progress = True
)

100%|██████████| 5/5 [01:05<00:00, 13.03s/it]


In [10]:
list_of_samples = []

for idx in range(len(sub_dataset.examples)):
    list_of_samples.append(
        SingleTurnSample (
            user_input = sub_dataset.examples[idx].query,
            reference = sub_dataset.examples[idx].reference_answer,
            response = predictions.predictions[idx].response,
            retrieved_contexts = predictions.predictions[idx].contexts
        )
    )

ragas_evaluation_dataset = EvaluationDataset(list_of_samples)
ragas_evaluation_dataset.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference
0,"In the essay, the author mentions his early ex...",[What I Worked On\n\nFebruary 2021\n\nBefore c...,The first computer the author used for program...,The first computer the author used for program...
1,The author switched his major from philosophy ...,[All that seemed left for philosophy were edge...,The author developed an interest in AI due to ...,The two specific influences that led the autho...
2,"In the essay, the author discusses his initial...",[All that seemed left for philosophy were edge...,The author's initial interest in AI was influe...,The two main influences that initially drew th...
3,The author mentions his shift of interest towa...,"[The stroke destroyed her balance, and she was...",The author shifted his interest towards Lisp b...,The author shifted his interest towards Lisp a...
4,"In the essay, the author mentions his interest...","[Its brokenness did, as so often happens, gene...","During his time in grad school, the author att...","The author in the essay is Paul Graham, who wa..."


In [11]:
evaluator_llm = LlamaIndexLLMWrapper(llm)
evaluator_embeddings = LlamaIndexEmbeddingsWrapper(embed_model)

In [12]:
%%time

metrics = [
    Faithfulness(llm=evaluator_llm),
    ContextPrecision(llm=evaluator_llm),
    ContextRecall(llm=evaluator_llm)
]
ragas_evaluation_result = evaluate(
    dataset=ragas_evaluation_dataset,
    metrics=metrics,
    llm=evaluator_llm,
    embeddings=evaluator_embeddings,
    run_config=RunConfig(timeout=1800, max_wait=180, max_retries=20),
    show_progress=True,
    batch_size=5
)

Evaluating:   0%|          | 0/15 [00:00<?, ?it/s]

Batch 1/3:   0%|          | 0/5 [00:00<?, ?it/s]

CPU times: user 771 ms, sys: 90.5 ms, total: 862 ms
Wall time: 7min 32s


In [13]:
df_ragas_result = ragas_evaluation_result.to_pandas()
df_ragas_result

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,context_precision,context_recall
0,"In the essay, the author mentions his early ex...",[What I Worked On\n\nFebruary 2021\n\nBefore c...,The first computer the author used for program...,The first computer the author used for program...,1.0,1.0,1.0
1,The author switched his major from philosophy ...,[All that seemed left for philosophy were edge...,The author developed an interest in AI due to ...,The two specific influences that led the autho...,0.9,1.0,1.0
2,"In the essay, the author discusses his initial...",[All that seemed left for philosophy were edge...,The author's initial interest in AI was influe...,The two main influences that initially drew th...,1.0,1.0,1.0
3,The author mentions his shift of interest towa...,"[The stroke destroyed her balance, and she was...",The author shifted his interest towards Lisp b...,The author shifted his interest towards Lisp a...,1.0,0.5,1.0
4,"In the essay, the author mentions his interest...","[Its brokenness did, as so often happens, gene...","During his time in grad school, the author att...","The author in the essay is Paul Graham, who wa...",1.0,1.0,0.866667


In [16]:
df_test_dataset = pd.read_json('./test-dataset.json', orient='records')
df_test_dataset

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,context_precision,context_recall
0,"In the essay, the author mentions his early ex...",[What I Worked On\n\nFebruary 2021\n\nBefore c...,The first computer the author used for program...,The first computer the author used for program...,1.0,1.0,1.0
1,The author switched his major from philosophy ...,[I couldn't have put this into words when I wa...,The author developed an interest in AI due to ...,The two specific influences that led the autho...,0.9,1.0,1.0
2,"In the essay, the author discusses his initial...",[I couldn't have put this into words when I wa...,The author was initially drawn to AI by two ma...,The two main influences that initially drew th...,1.0,1.0,1.0
3,The author mentions his shift of interest towa...,[I couldn't have put this into words when I wa...,The author shifted his interest towards Lisp b...,The author shifted his interest towards Lisp a...,0.9,1.0,1.0
4,"In the essay, the author mentions his interest...",[So I looked around to see what I could salvag...,"During his time in grad school, the author att...","The author in the essay is Paul Graham, who wa...",0.846154,1.0,1.0
5,The author discusses his decision to write a b...,[I couldn't have put this into words when I wa...,The author decided to write a book on Lisp hac...,The author decided to write a book on Lisp hac...,0.5,1.0,1.0
6,"In the essay, the author mentions a quick deci...",[So I looked around to see what I could salvag...,The author made a quick decision to attempt to...,The author decided to attempt writing his diss...,1.0,1.0,1.0
7,The author describes the atmosphere and practi...,"[I didn't want to drop out of grad school, but...",The author describes the atmosphere at the Acc...,"According to the author's account, the student...",1.0,1.0,1.0
8,"In the essay, the author discusses his experie...","[We actually had one of those little stoves, f...",The author describes painting still lives as d...,"In the essay, the author explains that paintin...",0.923077,1.0,1.0
9,The author shares his work experience at a com...,"[We actually had one of those little stoves, f...",Interleaf had added a unique feature to their ...,"Interleaf, the company where the author worked...",0.8125,1.0,0.857143
