In [1]:
import pandas as pd
github_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv'
url = f'{github_url}?raw=1'
df = pd.read_csv(url)


In [2]:
df = df.iloc[:300]

In [3]:
pip install sentence-transformers


Note: you may need to restart the kernel to use updated packages.


In [3]:
from sentence_transformers import SentenceTransformer


  from tqdm.autonotebook import tqdm, trange


In [4]:
model_name = 'multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(model_name)


In [5]:
answer_llm = df.iloc[0].answer_llm
embeddings = embedding_model.encode(answer_llm)
first_value = embeddings[0]
print(first_value)

-0.42244655


In [6]:
import numpy as np
def dot_product(a, b):
    return np.dot(a, b)
scores = []
for i, row in df.iterrows():
    answer_llm = row['answer_llm']
    answer_rag = row['answer_orig']
    
    embedding_llm = embedding_model.encode(answer_llm)
    embedding_rag = embedding_model.encode(answer_rag)
    
    score = dot_product(embedding_llm, embedding_rag)
    scores.append(score)
percentile_75 = np.percentile(scores, 75)
percentile_75

np.float32(31.674309)

In [7]:
print(percentile_75)

31.674309


In [8]:
print(df.columns)  
print(df.head())   


Index(['answer_llm', 'answer_orig', 'document', 'question', 'course'], dtype='object')
                                          answer_llm  \
0  You can sign up for the course by visiting the...   
1  You can sign up using the link provided in the...   
2  Yes, there is an FAQ for the Machine Learning ...   
3  The context does not provide any specific info...   
4  To structure your questions and answers for th...   

                                         answer_orig  document  \
0  Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
1  Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
2  Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
3  Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
4  Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   

                                            question  \
0                Where can I sign up for the course?   
1                 Can you provide a link to sign up?   
2  Is there

In [9]:
import numpy as np

def normalize_vector(v):
    norm = np.sqrt(np.sum(v * v))
    if norm == 0:
        return v
    return v / norm

def dot_product(a, b):
    return np.dot(a, b)
cosine_scores = []

for i, row in df.iterrows():
    answer_llm = row['answer_llm']
    if 'answer_orig' not in row or pd.isna(row['answer_orig']):
        continue
    
    answer_rag = row['answer_orig']
    embedding_llm = embedding_model.encode(answer_llm)
    embedding_rag = embedding_model.encode(answer_rag)
    embedding_llm_norm = normalize_vector(embedding_llm)
    embedding_rag_norm = normalize_vector(embedding_rag)
    score = dot_product(embedding_llm_norm, embedding_rag_norm)
    cosine_scores.append(score)
percentile_75 = np.percentile(cosine_scores, 75)
print(f"The 75th percentile of the cosine similarity scores is: {percentile_75}")


The 75th percentile of the cosine similarity scores is: 0.8362349271774292


In [10]:
!pip install rouge



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [11]:
from rouge import Rouge
rouge_scorer = Rouge()
r = df.loc[10]
scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]


In [12]:
f1_rouge_1 = scores['rouge-1']['f']
print(f"The F1 score for ROUGE-1 is: {f1_rouge_1}")

The F1 score for ROUGE-1 is: 0.45454544954545456


In [13]:
f1_rouge_2 = scores['rouge-2']['f']
f1_rouge_l = scores['rouge-l']['f']
average_f1 = (f1_rouge_1 + f1_rouge_2 + f1_rouge_l) / 3
print(f"The average F1 score for ROUGE-1, ROUGE-2, and ROUGE-L is: {average_f1}")

The average F1 score for ROUGE-1, ROUGE-2, and ROUGE-L is: 0.35490034990035496


In [16]:
rouge_1_scores = []
rouge_2_scores = []
rouge_l_scores = []
rouge_1_scores.append(scores['rouge-1']['f'])
rouge_2_scores.append(scores['rouge-2']['f'])
rouge_l_scores.append(scores['rouge-l']['f'])

In [17]:
rouge_df = pd.DataFrame({
    'rouge-1': rouge_1_scores,
    'rouge-2': rouge_2_scores,
    'rouge-l': rouge_l_scores
})
average_rouge_2 = rouge_df['rouge-2'].mean()
print(f"The average ROUGE-2 score across all records is: {average_rouge_2}")

The average ROUGE-2 score across all records is: 0.21621621121621637
