In [1]:
import pandas as pd

In [10]:
github_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv"
url = f"{github_url}?raw=true"
df = pd.read_csv(url)
df = df[:300]
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


## Q1. Getting the embeddings model

In [13]:
from sentence_transformers import SentenceTransformer
model_name ='multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(model_name)   # download model to "C:\Users\[user]]\.cache\huggingface\hub"

You try to use a model that was created with version 3.0.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





model.safetensors:  31%|###1      | 136M/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [14]:
answer_llm = df.iloc[0].answer_llm

In [16]:
print(embedding_model.encode(answer_llm)[0])

-0.42244676


## Q2

In [17]:
import numpy as np

In [27]:
evaluations = []

for idx, row in df.iterrows():
    orig_v = embedding_model.encode(row['answer_orig'])
    llm_v = embedding_model.encode(row['answer_llm'])
    
    evaluations.append(llm_v.dot(orig_v))

In [28]:
print(np.percentile(evaluations, 75))

31.674309730529785


## Q3

In [29]:
def normalize(v):
    norm = np.sqrt((v * v).sum())
    v_norm = v / norm
    return v_norm

In [32]:
evaluations_norm = []

for idx, row in df.iterrows():
    orig_v = embedding_model.encode(row['answer_orig'])
    llm_v = embedding_model.encode(row['answer_llm'])
    
    orig_v_norm = normalize(orig_v)
    llm_v_norm = normalize(llm_v)
    
    evaluations_norm.append(llm_v_norm.dot(orig_v_norm))

In [33]:
print(np.percentile(evaluations_norm, 75))

0.8362347632646561


## Q4

In [None]:
# ! pip install rouge

In [35]:
from rouge import Rouge
rouge_scorer = Rouge()

r = df.iloc[10].to_dict()

scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]

In [36]:
print(scores['rouge-1']['f'])

0.45454544954545456


## Q5

In [38]:
(scores['rouge-1']['f'] + scores['rouge-2']['f'] + scores['rouge-l']['f']) / 3

0.35490034990035496

## Q6

In [40]:
# get means of all scores of rouge-2
scores = []
for idx, row in df.iterrows():
    score = rouge_scorer.get_scores(row['answer_llm'], row['answer_orig'])[0]
    scores.append(score['rouge-2']['f'])

print(np.mean(scores))

0.20696501983423318
