In [1]:
import requests 
import pandas as pd
import numpy as np

In [2]:
github_url ='https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv'
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [3]:
df = df.iloc[:300]
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


In [4]:
from sentence_transformers import SentenceTransformer
model_name = 'multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(model_name)

  from tqdm.autonotebook import tqdm, trange


## Q1. Getting the embeddings model

In [5]:
answer_llm = df.iloc[0].answer_llm
answer_llm

'You can sign up for the course by visiting the course page at [http://mlzoomcamp.com/](http://mlzoomcamp.com/).'

In [6]:
embedding_model.encode(answer_llm)

array([-4.22446579e-01, -2.24856094e-01, -3.24058473e-01, -2.84758508e-01,
        7.25637982e-03,  1.01186477e-01,  1.03716679e-01, -1.89983442e-01,
       -2.80597862e-02,  2.71588922e-01, -1.15337484e-01,  1.14666171e-01,
       -8.49585980e-02,  3.32365513e-01,  5.52722663e-02, -2.22195774e-01,
       -1.42540708e-01,  1.02519087e-01, -1.52333796e-01, -2.02912658e-01,
        1.98424123e-02,  8.38147700e-02, -5.68632305e-01,  2.32842825e-02,
       -1.67292669e-01, -2.39256859e-01, -8.05463567e-02,  2.57082582e-02,
       -8.15465227e-02, -7.39290714e-02, -2.61549950e-01,  1.92575473e-02,
        3.22909385e-01,  1.90357044e-01, -9.34726413e-05, -2.13165492e-01,
        2.88941171e-02, -1.79531835e-02, -5.92757724e-02,  1.99918330e-01,
       -4.75172400e-02,  1.71634063e-01, -2.45916881e-02, -9.38061625e-02,
       -3.57002705e-01,  1.33263960e-01,  1.94045797e-01, -1.18530668e-01,
        4.56915230e-01,  1.47728100e-01,  3.35945100e-01, -1.86959475e-01,
        2.45955020e-01, -

### What's the first value of the resulting vector?

In [17]:
embedding_model.encode(answer_llm)[0]

-0.42244658

In [7]:
answer_orig = df.iloc[0].answer_orig

In [8]:
embedding_model.encode(answer_orig)

array([-3.02140489e-02, -3.44438165e-01, -2.80762315e-01,  6.15037233e-02,
        1.76927131e-02,  5.21319546e-02,  1.84989437e-01, -5.07149585e-02,
        3.82892452e-02,  1.47848904e-01,  3.71525660e-02,  5.86897284e-02,
        3.51020508e-02,  3.09185445e-01,  3.27005714e-01,  3.46162766e-02,
       -1.22791436e-02,  1.83949620e-02, -1.30378753e-01,  4.87349182e-02,
        3.25932167e-02, -3.07790458e-01, -3.09783723e-02,  1.03366468e-02,
       -1.70875639e-01,  6.83628069e-03, -8.46391469e-02, -3.71599868e-02,
       -8.73600319e-02, -2.51988947e-01, -7.94990286e-02,  8.44478533e-02,
        4.63594273e-02,  1.19172730e-01, -9.33279007e-05, -2.66445100e-01,
       -1.49643242e-01,  1.63761787e-02, -8.01610649e-02,  2.73068309e-01,
       -8.50625057e-03,  1.55116975e-01, -2.76388675e-02,  4.56138626e-02,
       -3.55868816e-01,  4.71374718e-04, -2.13361401e-02, -3.46214212e-02,
       -2.68815737e-02,  1.81957066e-01,  3.38720202e-01, -2.63240337e-01,
        1.57496691e-01, -

In [9]:
def compute_similarity(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = embedding_model.encode(answer_llm)
    v_orig = embedding_model.encode(answer_orig)
    
    return v_llm.dot(v_orig)

In [10]:
compute_similarity(df.iloc[0,:])

17.515991

In [11]:
from tqdm.auto import tqdm

from concurrent.futures import ThreadPoolExecutor

pool = ThreadPoolExecutor(max_workers=6)

def map_progress(pool, seq, f):
    results = []

    with tqdm(total=len(seq)) as progress:
        futures = []

        for el in seq:
            future = pool.submit(f, el)
            future.add_done_callback(lambda p: progress.update())
            futures.append(future)

        for future in futures:
            result = future.result()
            results.append(result)

    return results

In [12]:
ground_truth = df.to_dict(orient='records')

In [13]:
evaluatons_async = map_progress(pool, ground_truth, compute_similarity)

  0%|          | 0/300 [00:00<?, ?it/s]

In [14]:
evaluations = []

for record in tqdm(ground_truth):
    sim = compute_similarity(record)
    evaluations.append(sim)

  0%|          | 0/300 [00:00<?, ?it/s]

In [15]:
assert evaluatons_async == evaluations

### Q2. Computing the dot product

In [16]:
df['dot'] = evaluations
df['dot'].describe()

count    300.000000
mean      27.495996
std        6.384742
min        4.547925
25%       24.307845
50%       28.336873
75%       31.674312
max       39.476013
Name: dot, dtype: float64

In [31]:
round(df['dot'].describe().iloc[-2],2)

31.67

## Q3. Computing the cosine

In [21]:
def compute_similarity_norm(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = normalise_vector(embedding_model.encode(answer_llm))
    v_orig = normalise_vector(embedding_model.encode(answer_orig))
    
    return v_llm.dot(v_orig)

In [22]:
def normalise_vector(v):
    norm = np.sqrt((v * v).sum())
    v_norm = v / norm
    return v_norm

In [23]:
cos_similarity_norm_list = map_progress(pool, ground_truth, compute_similarity_norm)

  0%|          | 0/300 [00:00<?, ?it/s]

In [24]:
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course,dot
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp,17.515991
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp,13.4184
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp,25.313255
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp,12.147417
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp,18.747738


In [25]:
df['dot_norm'] = cos_similarity_norm_list

In [26]:
df['dot_norm'].describe()

count    300.000000
mean       0.728393
std        0.157755
min        0.125357
25%        0.651273
50%        0.763761
75%        0.836235
max        0.958796
Name: dot_norm, dtype: float64

In [29]:
round(df['dot_norm'].describe().iloc[-2],2)

0.84

## Q4. Rouge

In [35]:
df.head(10)

Unnamed: 0,answer_llm,answer_orig,document,question,course,dot,dot_norm
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp,17.515991,0.506754
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp,13.4184,0.388549
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp,25.313255,0.718599
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp,12.147417,0.337266
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp,18.747738,0.521792
5,"The course videos are pre-recorded, and you ca...","The course videos are pre-recorded, you can st...",39fda9f0,Are the course videos live or pre-recorded?,machine-learning-zoomcamp,33.970406,0.830532
6,You can start watching the course videos right...,"The course videos are pre-recorded, you can st...",39fda9f0,When can I start watching the course videos?,machine-learning-zoomcamp,30.251701,0.746283
7,"Yes, the live office hours sessions are recorded.","The course videos are pre-recorded, you can st...",39fda9f0,Are the live office hours sessions recorded?,machine-learning-zoomcamp,29.52158,0.694406
8,You can find the office hours sessions in the ...,"The course videos are pre-recorded, you can st...",39fda9f0,Where can I find the office hours sessions?,machine-learning-zoomcamp,35.272198,0.846886
9,You can access the pre-recorded course videos ...,"The course videos are pre-recorded, you can st...",39fda9f0,Where can I access the pre-recorded course vid...,machine-learning-zoomcamp,27.75177,0.655908


In [34]:
from rouge import Rouge
rouge_scorer = Rouge()

In [39]:
rouge_scorer.get_scores(ground_truth[10]['answer_llm'], ground_truth[10]['answer_orig'])[0]

{'rouge-1': {'r': 0.45454545454545453,
  'p': 0.45454545454545453,
  'f': 0.45454544954545456},
 'rouge-2': {'r': 0.21621621621621623,
  'p': 0.21621621621621623,
  'f': 0.21621621121621637},
 'rouge-l': {'r': 0.3939393939393939,
  'p': 0.3939393939393939,
  'f': 0.393939388939394}}

### What's the F score for rouge-1?

In [43]:
round(rouge_scorer.get_scores(ground_truth[10]['answer_llm'], ground_truth[10]['answer_orig'])[0]['rouge-1']['f'],2)

0.45

## Q5. Average rouge score

In [51]:
def calculate_average_f_score(rouge_scores):
    f_scores = [
        rouge_scores['rouge-1']['f'],
        rouge_scores['rouge-2']['f'],
        rouge_scores['rouge-l']['f']
    ]
    average_f_score = sum(f_scores) / len(f_scores)
    return round(average_f_score, 3)

In [52]:
data = rouge_scorer.get_scores(ground_truth[10]['answer_llm'], ground_truth[10]['answer_orig'])[0]

In [53]:
calculate_average_f_score(data)

0.355

In [73]:
def calculate_rouge_scores(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    rouge_scores = rouge_scorer.get_scores(answer_llm, answer_orig)[0]
    
    rouge_1 = rouge_scores['rouge-1']['f']
    rouge_2 = rouge_scores['rouge-2']['f']
    rouge_l = rouge_scores['rouge-l']['f']
    average_f_score = round((rouge_1 + rouge_2 + rouge_l) / 3, 2)
    
    return {
        'rouge-1': rouge_1,
        'rouge-2': rouge_2,
        'rouge-l': rouge_l,
        'average': average_f_score
    }

In [75]:
rouge_scores = map_progress(pool, ground_truth, calculate_rouge_scores)

# Separate the scores
rouge_1_list = [score['rouge-1'] for score in rouge_scores]
rouge_2_list = [score['rouge-2'] for score in rouge_scores]
rouge_l_list = [score['rouge-l'] for score in rouge_scores]
average_list = [score['average'] for score in rouge_scores]

  0%|          | 0/300 [00:00<?, ?it/s]

In [79]:
df['rouge-1'] = rouge_1_list
df['rouge-2'] = rouge_2_list
df['rouge-l'] = rouge_l_list
df['rouge-avg'] = average_list

In [81]:
df['rouge-2'].mean()

0.20696501983423318

# end