In [1]:
import pandas as pd
import numpy as np

## Q1. Getting the embeddings model

In [2]:
github_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv'

In [3]:
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [5]:
df.head(3)

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp


In [7]:
model_name = 'multi-qa-mpnet-base-dot-v1'

In [8]:
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer(model_name)

You try to use a model that was created with version 3.0.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





In [9]:
answer_llm = df.iloc[0].answer_llm

In [13]:
v = embedding_model.encode(answer_llm)

In [14]:
v[0]

-0.42244655

## Q2. Computing the dot product

In [15]:
def compute_similarity(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = embedding_model.encode(answer_llm)
    v_orig = embedding_model.encode(answer_orig)
    
    return v_llm.dot(v_orig)

In [17]:
evaluations = []

record_df = df.to_dict(orient='records')
for record in record_df:
    sim = compute_similarity(record)
    evaluations.append(sim)

In [19]:
print(f"75 percentile of the evaluations: {np.percentile(evaluations, 75)}")

75 percentile of the evaluations: 32.38980007171631


## Q3. Computing the cosine

In [20]:
def normalization(v):
    norm = np.sqrt((v * v).sum())
    v_norm = v / norm
    return v_norm

In [21]:
def compute_similarity_norm(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = embedding_model.encode(answer_llm)
    v_llm_norm = normalization(v_llm)
    v_orig = embedding_model.encode(answer_orig)
    v_orig_norm = normalization(v_orig)
    return v_llm_norm.dot(v_orig_norm)

In [22]:
evaluations_norm = []
for record in record_df:
    sim = compute_similarity_norm(record)
    evaluations_norm.append(sim)


In [25]:
print(f"75 percentile of the evaluations: {np.percentile(evaluations_norm, 75):3f}")

75 percentile of the evaluations: 0.853112


## Q4. Rouge

In [31]:
# !pip install rouge
from rouge import Rouge
rouge_scorer = Rouge()

r = df[df['document']=='5170565b'].loc[10, :]
scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]

In [34]:
print('F score for rouge-1:', scores['rouge-1']['f'])

F score for rouge-1: 0.45454544954545456


## Q5. Average Rouge score

In [36]:
scores['rouge-1']['f'], scores['rouge-2']['f'], scores['rouge-l']['f']

(0.45454544954545456, 0.21621621121621637, 0.393939388939394)

In [39]:
print('Average F-score between rouge-1, rouge-2 and rouge-l',
      (scores['rouge-1']['f'] + scores['rouge-2']['f'] + scores['rouge-l']['f'])/3)

Average F-score between rouge-1, rouge-2 and rouge-l 0.35490034990035496


## Q6. Average Rouge 2

In [56]:
df

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp
...,...,...,...,...,...
1825,Some suggested titles for listing the Machine ...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,What are some suggested titles for listing the...,machine-learning-zoomcamp
1826,It is best advised that you do not list the Ma...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,Should I list the Machine Learning Zoomcamp ex...,machine-learning-zoomcamp
1827,You can incorporate your Machine Learning Zoom...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,In which LinkedIn sections can I incorporate m...,machine-learning-zoomcamp
1828,The advice on including a project link in a CV...,I’ve seen LinkedIn users list DataTalksClub as...,c6a22665,Who gave advice on including a project link in...,machine-learning-zoomcamp


In [78]:
df = df.assign(r2 = lambda r: rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]['rouge-2']['f'])

In [79]:
df.head(3)

Unnamed: 0,answer_llm,answer_orig,document,question,course,r2
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp,0.028169
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp,0.028169
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp,0.028169


In [80]:
print(f"Mean r2 score is {df.loc[:, 'r2'].mean():3f}")

Mean r2 score is 0.028169
