In [23]:
import pandas as pd
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
import numpy as np


github_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv"
model_name = "multi-qa-mpnet-base-dot-v1"


In [5]:
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [6]:
df

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp
...,...,...,...,...,...
1825,Some suggested titles for listing the Machine ...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,What are some suggested titles for listing the...,machine-learning-zoomcamp
1826,It is best advised that you do not list the Ma...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,Should I list the Machine Learning Zoomcamp ex...,machine-learning-zoomcamp
1827,You can incorporate your Machine Learning Zoom...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,In which LinkedIn sections can I incorporate m...,machine-learning-zoomcamp
1828,The advice on including a project link in a CV...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,Who gave advice on including a project link in...,machine-learning-zoomcamp


## Q1: Getting the embeddings model: What's the first value of the resulting vector?

In [10]:
embedding_model = SentenceTransformer(model_name)

In [11]:
answer_llm = df.iloc[0].answer_llm


In [12]:
answer_llm

'You can sign up for the course by visiting the course page at [http://mlzoomcamp.com/](http://mlzoomcamp.com/).'

In [15]:
embeddings = embedding_model.encode(answer_llm)
first_value = embeddings[0]
print(first_value)

-0.4224468


## Q2 Computing the dot product: What's the 75% percentile of the score?

In [16]:
results = df.to_dict(orient='records')

In [21]:
def compute_dot_product(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = embedding_model.encode(answer_llm)
    v_orig = embedding_model.encode(answer_orig)
    
    return v_llm.dot(v_orig)

In [22]:
evaluations = []

for record in tqdm(results):
    sim = compute_dot_product(record)
    evaluations.append(sim)

100%|█████████████████████████████████████████████████████| 1830/1830 [10:16<00:00,  2.97it/s]


In [24]:
print (np.median(evaluations))

28.897568


In [29]:
print (np.percentile(evaluations,75))

32.38979244232178


In [25]:
df["dot_product"] = evaluations

In [27]:
df["dot_product"].describe()

count    1830.000000
mean       28.015770
std         6.413297
min         3.511809
25%        24.631172
50%        28.897568
75%        32.389792
max        44.296776
Name: dot_product, dtype: float64

## Q3. Computing the cosine: What's the 75% cosine in the scores?

In [30]:
def normalize(v):
    norm = np.sqrt((v * v).sum())
    v_norm = v / norm
    return v_norm

In [31]:
def compute_dot_product_normalized(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = normalize(embedding_model.encode(answer_llm))
    v_orig = normalize(embedding_model.encode(answer_orig))
    
    return v_llm.dot(v_orig)

In [32]:
normalized_evaluations = []

for record in tqdm(results):
    sim = compute_dot_product_normalized(record)
    normalized_evaluations.append(sim)

100%|█████████████████████████████████████████████████████| 1830/1830 [10:10<00:00,  3.00it/s]


In [33]:
print (np.percentile(normalized_evaluations,75))

0.8531119078397751


## Q4. Rouge: What's the F score for rouge-1?

In [34]:
from rouge import Rouge


In [35]:
rouge_scorer = Rouge()

In [63]:
def compute_rouge(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    scores = rouge_scorer.get_scores(answer_orig, answer_llm)[0]
    return scores["rouge-1"]["f"]

In [64]:
rouge_fscore_evaluations = []

for record in tqdm(results):
    sim = compute_rouge(record)
    rouge_fscore_evaluations.append(sim)

100%|████████████████████████████████████████████████████| 1830/1830 [00:02<00:00, 689.30it/s]


In [65]:
rouge_fscore_evaluations

[0.09523809178130524,
 0.12499999641113292,
 0.41558441095631643,
 0.2162162117421476,
 0.14207649881095297,
 0.43137254522106894,
 0.41269840791131274,
 0.30434782321361065,
 0.5172413747919143,
 0.34374999517578125,
 0.45454544954545456,
 0.6060606010606061,
 0.753623183415249,
 0.7297297247881666,
 0.6849315018952901,
 0.4772727222727273,
 0.47999999515022235,
 0.3888888841358025,
 0.2857142807256236,
 0.37499999505,
 0.24719100710011369,
 0.2682926793664485,
 0.5242718399095109,
 0.18918918665814466,
 0.5048543641813555,
 0.6206896501724138,
 0.3333333290589569,
 0.4130434736011342,
 0.633663361446917,
 0.359550557257922,
 0.4489795870054144,
 0.5217391255198489,
 0.4090909041322315,
 0.25714285306122453,
 0.16666666172839517,
 0.19999999601250007,
 0.29999999601250005,
 0.42857142429705225,
 0.23255813514332083,
 0.7096774143756504,
 0.4848484798530762,
 0.3934426180166622,
 0.5245901590002687,
 0.3692307642414202,
 0.22222221852839513,
 0.11111110797839514,
 0.15789473383656513,


In [66]:
df["rouge"] = rouge_fscore_evaluations
df["rouge"].describe()

count    1830.000000
mean        0.351695
std         0.158905
min         0.000000
25%         0.238887
50%         0.356300
75%         0.460133
max         0.950000
Name: rouge, dtype: float64

In [70]:
df[df.document == "5170565b"]

Unnamed: 0,answer_llm,answer_orig,document,question,course,dot_product,rouge
10,"Yes, all sessions are recorded, so if you miss...","Everything is recorded, so you won’t miss anyt...",5170565b,Are sessions recorded if I miss one?,machine-learning-zoomcamp,32.344711,0.454545
11,"Yes, you can ask your questions in advance if ...","Everything is recorded, so you won’t miss anyt...",5170565b,Can I ask questions in advance if I can't atte...,machine-learning-zoomcamp,31.441839,0.606061
12,"If you miss a session, don't worry! Everything...","Everything is recorded, so you won’t miss anyt...",5170565b,How will my questions be addressed if I miss a...,machine-learning-zoomcamp,36.380722,0.753623
13,"Yes, there is a way to catch up on a missed se...","Everything is recorded, so you won’t miss anyt...",5170565b,Is there a way to catch up on a missed session?,machine-learning-zoomcamp,33.340508,0.72973
14,"Yes, you can still interact with instructors a...","Everything is recorded, so you won’t miss anyt...",5170565b,Can I still interact with instructors after mi...,machine-learning-zoomcamp,30.606165,0.684932


## Q5. Average rouge score

In [76]:
idx10 = df.iloc[10]

In [78]:
idx10["answer_orig"]

'Everything is recorded, so you won’t miss anything. You will be able to ask your questions for office hours in advance and we will cover them during the live stream. Also, you can always ask questions in Slack.'

In [83]:
score_idx10 = rouge_scorer.get_scores(idx10["answer_orig"], idx10["answer_llm"])[0]
print (score_idx10["rouge-1"]["f"])
print (score_idx10["rouge-2"]["f"])
print (score_idx10["rouge-l"]["f"])
print ((score_idx10["rouge-1"]["f"]+score_idx10["rouge-2"]["f"]+score_idx10["rouge-l"]["f"])/3)

0.45454544954545456
0.21621621121621637
0.42424241924242434
0.36500136000136507


## Q6. Average rouge score for all data points

In [84]:
def compute_all_rouge(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    scores = rouge_scorer.get_scores(answer_orig, answer_llm)[0]
    return scores["rouge-2"]["f"]

In [85]:
rouge_fscore_2_evaluations = []

for record in tqdm(results):
    sim = compute_all_rouge(record)
    rouge_fscore_2_evaluations.append(sim)

100%|████████████████████████████████████████████████████| 1830/1830 [00:02<00:00, 730.81it/s]


In [88]:
df["rouge-2"] = rouge_fscore_2_evaluations
df["rouge-2"].describe()

count    1830.000000
mean        0.176717
std         0.135790
min         0.000000
25%         0.074074
50%         0.156993
75%         0.251150
max         0.952381
Name: rouge-2, dtype: float64

In [87]:
df

Unnamed: 0,answer_llm,answer_orig,document,question,course,dot_product,rouge,rouge-2
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp,17.515987,0.095238,0.095238
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp,13.418400,0.125000,0.125000
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp,25.313251,0.415584,0.415584
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp,12.147415,0.216216,0.216216
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp,18.747738,0.142076,0.142076
...,...,...,...,...,...,...,...,...
1825,Some suggested titles for listing the Machine ...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,What are some suggested titles for listing the...,machine-learning-zoomcamp,34.590019,0.336134,0.336134
1826,It is best advised that you do not list the Ma...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,Should I list the Machine Learning Zoomcamp ex...,machine-learning-zoomcamp,34.473343,0.453782,0.453782
1827,You can incorporate your Machine Learning Zoom...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,In which LinkedIn sections can I incorporate m...,machine-learning-zoomcamp,35.101955,0.442748,0.442748
1828,The advice on including a project link in a CV...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,Who gave advice on including a project link in...,machine-learning-zoomcamp,23.863712,0.191489,0.191489
