In [1]:
import pandas as pd


In [2]:
github_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv"
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [3]:
df = df.iloc[:300]

**Q1**

In [4]:
from sentence_transformers import SentenceTransformer


model_name = 'multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(model_name)

You try to use a model that was created with version 3.0.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





In [7]:
answer_llm = df.iloc[0].answer_llm

You can sign up for the course by visiting the course page at [http://mlzoomcamp.com/](http://mlzoomcamp.com/).


In [8]:
embedding = embedding_model.encode(answer_llm)
print(embedding[0])

-0.42244655


**Q2**

In [11]:
import numpy as np
from tqdm import tqdm

print("Starting to compute dot products...")
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing records"):

    answer_llm = row['answer_llm']
    answer_orig = row['answer_orig']
    
    embedding1 = embedding_model.encode(answer_llm)
    embedding2 = embedding_model.encode(answer_orig)
    
    dot_product = np.dot(embedding1, embedding2)
    
    evaluations.append(dot_product)

percentile_75 = np.percentile(evaluations, 75)
print(f"75th percentile of dot products: {percentile_75}")

Starting to compute dot products...


Processing records: 100%|███████████████████████████████████████████████████████████████████| 300/300 [02:22<00:00,  2.11it/s]

75th percentile of dot products: 31.67430877685547





**Q3**

In [13]:
def normalize_vector(v):
    norm = np.sqrt((v * v).sum())
    return v / norm

In [14]:
def cosine_similarity(v1, v2):

    v1_norm = normalize_vector(v1)
    v2_norm = normalize_vector(v2)

    return np.dot(v1_norm, v2_norm)

In [15]:
cosine_similarities = []

print("Starting to compute cosine similarities...")
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing records"):

    answer_llm = row['answer_llm']
    answer_orig = row['answer_orig']
    

    embedding1 = embedding_model.encode(answer_llm)
    embedding2 = embedding_model.encode(answer_orig)

    cosine_sim = cosine_similarity(embedding1, embedding2)

    cosine_similarities.append(cosine_sim)

percentile_75_cosine = np.percentile(cosine_similarities, 75)
print(f"75th percentile of cosine similarities: {percentile_75_cosine}")

Starting to compute cosine similarities...


Processing records: 100%|███████████████████████████████████████████████████████████████████| 300/300 [02:20<00:00,  2.13it/s]

75th percentile of cosine similarities: 0.8362348973751068





**Q4**

In [19]:
from rouge import Rouge

rouge_scorer = Rouge()

r = df.iloc[10]

scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]
print(scores)
print(scores['rouge-1']['f'])

{'rouge-1': {'r': 0.45454545454545453, 'p': 0.45454545454545453, 'f': 0.45454544954545456}, 'rouge-2': {'r': 0.21621621621621623, 'p': 0.21621621621621623, 'f': 0.21621621121621637}, 'rouge-l': {'r': 0.3939393939393939, 'p': 0.3939393939393939, 'f': 0.393939388939394}}
0.45454544954545456


**Q5**

In [20]:
rouge_1 = scores['rouge-1']['f']
rouge_2 = scores['rouge-2']['f']
rouge_l = scores['rouge-l']['f']

rouge_avg = (rouge_1 + rouge_2 + rouge_l) / 3
rouge_avg

0.35490034990035496

**Q6**

In [26]:
rouge_1_scores = []
rouge_2_scores = []
rouge_l_scores = []


for index, row in df.iterrows():
    
    scores = rouge_scorer.get_scores(row['answer_llm'], row['answer_orig'])[0]
    
    rouge_1_scores.append(scores['rouge-1']['f'])
    rouge_2_scores.append(scores['rouge-2']['f'])
    rouge_l_scores.append(scores['rouge-l']['f'])


# rouge_df = pd.DataFrame({
#     'rouge-1': rouge_1_scores,
#     'rouge-2': rouge_2_scores,
#     'rouge-l': rouge_l_scores
# })


# rouge_df.to_csv('rouge_scores.csv', index=False)

average_rouge_2 = np.mean(rouge_2_scores)
print(average_rouge_2)

0.20696501983423318
