# Homework 4

In [3]:
import numpy as np
import pandas as pd
from rouge import Rouge
from typing import Literal
from sentence_transformers import SentenceTransformer

  from tqdm.autonotebook import tqdm, trange


In [4]:
github_url = "https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/04-monitoring/data/results-gpt4o-mini.csv"
url = f'{github_url}?raw=1'
df = pd.read_csv(url)
df = df.iloc[:300]
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


### Q1:

In [7]:
model_name = "multi-qa-mpnet-base-dot-v1"
embedding_model = SentenceTransformer(model_name)

In [8]:
answer_llm = df.iloc[0].answer_llm
answer_llm

'You can sign up for the course by visiting the course page at [http://mlzoomcamp.com/](http://mlzoomcamp.com/).'

In [10]:
embedding_vector = embedding_model.encode(answer_llm)
print(f"Q1: The first value of the resulting vector {embedding_vector[0]:.2f}")

Q1: The first value of the resulting vector -0.42


### Q2:

In [14]:
def compute_dot_product(
        model: SentenceTransformer,
        row: pd.Series) -> np.float32:
    return model.encode(row.answer_llm) @ model.encode(row.answer_orig)

evaluations = df.apply(
    lambda row: compute_dot_product(embedding_model, row),
    axis=1
)

In [15]:
# Calculate the 75th percentile of the scores
percentile_75 = np.percentile(evaluations, 75)

print(f"Q2: The 75th percentile of the scores is {percentile_75:.2f}")

Q2: The 75th percentile of the scores is 31.67


### Q3:

In [16]:
def compute_cos_similarity(
        model: SentenceTransformer,
        row: pd.Series) -> np.float32:
    lhs = model.encode(row.answer_llm)
    rhs = model.encode(row.answer_orig)
    return (lhs / np.linalg.norm(lhs, 2)) @ (rhs / np.linalg.norm(rhs, 2))
    

evaluations_cos = df.apply(
    lambda row: compute_cos_similarity(embedding_model, row),
    axis=1
)

In [17]:
percentile_75_q3 = np.percentile(evaluations_cos, 75.0)
print(f"Q3: The 75th percentile cosine scores is {percentile_75_q3:.2f}")

Q3: The 75th percentile cosine scores is 0.84


### Q4:

In [18]:
rouge_scorer = Rouge()

scores = rouge_scorer.get_scores(df['answer_llm'], df['answer_orig'])[10]

In [19]:
rouge_1_f_score = scores['rouge-1']['f']

print(f"Q4: The F1 score for ROUGE-1 at index 10 is {rouge_1_f_score:.2f}")

Q4: The F1 score for ROUGE-1 at index 10 is 0.45


### Q5:

In [20]:
f_scores = [score['f'] for score in scores.values()]

average_f_score = sum(f_scores) / len(f_scores)

In [21]:
print(f"Q5: The average ROUGE score (F1) for index 10 is {average_f_score:.2f}")

Q5: The average ROUGE score (F1) for index 10 is 0.35


### Q6:

In [26]:
scores = []
for llm, orig in zip(df['answer_llm'], df['answer_orig']):
    score = rouge_scorer.get_scores(llm, orig)[0]
    scores.append(score)

scores_df = pd.DataFrame(scores)

average_rouge_2 = scores_df['rouge-2'].apply(lambda x: x['f']).mean()

display(scores_df)

Unnamed: 0,rouge-1,rouge-2,rouge-l
0,"{'r': 0.061224489795918366, 'p': 0.21428571428...","{'r': 0.017543859649122806, 'p': 0.07142857142...","{'r': 0.061224489795918366, 'p': 0.21428571428..."
1,"{'r': 0.08163265306122448, 'p': 0.266666666666...","{'r': 0.03508771929824561, 'p': 0.133333333333...","{'r': 0.061224489795918366, 'p': 0.2, 'f': 0.0..."
2,"{'r': 0.32653061224489793, 'p': 0.571428571428...","{'r': 0.14035087719298245, 'p': 0.242424242424...","{'r': 0.30612244897959184, 'p': 0.535714285714..."
3,"{'r': 0.16326530612244897, 'p': 0.32, 'f': 0.2...","{'r': 0.03508771929824561, 'p': 0.071428571428...","{'r': 0.14285714285714285, 'p': 0.28, 'f': 0.1..."
4,"{'r': 0.2653061224489796, 'p': 0.0970149253731...","{'r': 0.07017543859649122, 'p': 0.022346368715...","{'r': 0.22448979591836735, 'p': 0.082089552238..."
...,...,...,...
295,"{'r': 0.6428571428571429, 'p': 0.6666666666666...","{'r': 0.559322033898305, 'p': 0.52380952380952...","{'r': 0.6071428571428571, 'p': 0.6296296296296..."
296,"{'r': 0.6428571428571429, 'p': 0.5454545454545...","{'r': 0.5423728813559322, 'p': 0.4, 'f': 0.460...","{'r': 0.6071428571428571, 'p': 0.5151515151515..."
297,"{'r': 0.6607142857142857, 'p': 0.6491228070175...","{'r': 0.5932203389830508, 'p': 0.5384615384615...","{'r': 0.6428571428571429, 'p': 0.6315789473684..."
298,"{'r': 0.2857142857142857, 'p': 0.3265306122448...","{'r': 0.13559322033898305, 'p': 0.129032258064...","{'r': 0.2857142857142857, 'p': 0.3265306122448..."


In [28]:
print(f"Q6: Average rouge score for all the data points is {average_rouge_2:.2f}")

Q6: Average rouge score for all the data points is 0.21
