In [5]:
# !pip install rouge-score

# Week 4

In [7]:
import json
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from rouge_score import rouge_scorer

# Load JSON data from file
with open('output_week4.json', 'r') as f:
    output_week4 = json.load(f)

# Extract predicted answers and original responses
predicted_answers = [item["predicted_answer"] for item in output_week4]
original_responses = [item["original_response"] for item in output_week4]

# Initialize TfidfVectorizer
vectorizer = TfidfVectorizer()

# Fit and transform the text data
tfidf_matrix = vectorizer.fit_transform(predicted_answers + original_responses)

# Calculate cosine similarity
cos_sim = cosine_similarity(tfidf_matrix[:len(predicted_answers)], tfidf_matrix[len(predicted_answers):])

# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

# Compute ROUGE scores
rouge_scores = []
for predicted, original in zip(predicted_answers, original_responses):
    scores = scorer.score(predicted, original)
    rouge_scores.append(scores['rougeL'].fmeasure)

# Create DataFrame
data = []
for i, item in enumerate(output_week4):
    data.append({
        "question": item["question"],
        "image_path": item["image_path"],
        "predicted_answer": item["predicted_answer"],
        "original_response": item["original_response"],
        "cosine_similarity": cos_sim[i][0],
        "rouge_score": rouge_scores[i]
    })

df = pd.DataFrame(data)

# Overall performance
overall_cosine_similarity = df['cosine_similarity'].mean()
overall_rouge_score = df['rouge_score'].mean()

# Display DataFrame
print("DataFrame:")
df


DataFrame:


Unnamed: 0,question,image_path,predicted_answer,original_response,cosine_similarity,rouge_score
0,What topic will be discussed in today's lecture?,Week_4/week_04_page_001.png,Today's lecture will discuss the Logistic Regr...,Today's lecture will focus on the logistic reg...,0.463994,0.666667
1,Can you provide more details about the logisti...,Week_4/week_04_page_001.png,Logistic regression is a classification algori...,Today's lecture will delve into the logistic r...,0.096111,0.117647
2,What other topics besides logistic regression ...,Week_4/week_04_page_001.png,The lecture will also cover 'Least Squares'.,"Besides logistic regression, the lecture may c...",0.278334,0.272727
3,Can you summarize what will be covered in toda...,Week_4/week_04_page_001.png,Today's lecture will cover the Logistic Regres...,Today's lecture will primarily focus on the lo...,0.448147,0.529412
4,Can you provide an overview of the logistic re...,Week_4/week_04_page_001.png,Logistic regression is a classification algori...,The logistic regression model is a statistical...,0.096111,0.454545
...,...,...,...,...,...,...
1056,What does it mean for the regression line to '...,Week_4/week_04_page_054.png,It means the line best represents the relation...,For the regression line to 'fit' the observed ...,0.064543,0.271186
1057,What are the implications of a high value for ...,Week_4/week_04_page_054.png,A high value indicates the model is not fittin...,A high value for the object function in the al...,0.039475,0.382353
1058,How does the algorithm handle outliers in the ...,Week_4/week_04_page_054.png,"The algorithm can be sensitive to outliers, wh...",The algorithm may be sensitive to outliers in ...,0.027117,0.388060
1059,What is the significance of the term 'sum of s...,Week_4/week_04_page_054.png,It measures the difference between observed an...,The term 'sum of squared errors' in the algori...,0.027238,0.260870


In [8]:
print("\nOverall Performance:")
print(f"Mean Cosine Similarity: {overall_cosine_similarity}")
print(f"Mean ROUGE Score: {overall_rouge_score}")



Overall Performance:
Mean Cosine Similarity: 0.04330439472549059
Mean ROUGE Score: 0.39135742352837255


# Week 3

In [9]:
import json
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from rouge_score import rouge_scorer

# Load JSON data from file
with open('output_week3.json', 'r') as f:
    output_week4 = json.load(f)

# Extract predicted answers and original responses
predicted_answers = [item["predicted_answer"] for item in output_week4]
original_responses = [item["original_response"] for item in output_week4]

# Initialize TfidfVectorizer
vectorizer = TfidfVectorizer()

# Fit and transform the text data
tfidf_matrix = vectorizer.fit_transform(predicted_answers + original_responses)

# Calculate cosine similarity
cos_sim = cosine_similarity(tfidf_matrix[:len(predicted_answers)], tfidf_matrix[len(predicted_answers):])

# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

# Compute ROUGE scores
rouge_scores = []
for predicted, original in zip(predicted_answers, original_responses):
    scores = scorer.score(predicted, original)
    rouge_scores.append(scores['rougeL'].fmeasure)

# Create DataFrame
data = []
for i, item in enumerate(output_week4):
    data.append({
        "question": item["question"],
        "image_path": item["image_path"],
        "predicted_answer": item["predicted_answer"],
        "original_response": item["original_response"],
        "cosine_similarity": cos_sim[i][0],
        "rouge_score": rouge_scores[i]
    })

df = pd.DataFrame(data)

# Overall performance
overall_cosine_similarity = df['cosine_similarity'].mean()
overall_rouge_score = df['rouge_score'].mean()

# Display DataFrame
print("DataFrame:")
df


DataFrame:


Unnamed: 0,question,image_path,predicted_answer,original_response,cosine_similarity,rouge_score
0,What is the definition of probability?,Week_3/week_03_page_002.png,Probability is the likelihood of an event occu...,Probability is defined as the relative frequen...,0.186224,0.272727
1,How is the probability of an event calculated ...,Week_3/week_03_page_002.png,"In an actual repeated experiment, the probabil...","In an actual repeated experiment, such as reca...",0.318012,0.531250
2,Can you provide an example of an unbiased proc...,Week_3/week_03_page_002.png,An unbiased process is rolling a fair six-side...,An example of an unbiased process where probab...,0.117623,0.417910
3,How is a normal distribution used in modeling ...,Week_3/week_03_page_002.png,A normal distribution is used to represent the...,A normal distribution is chosen to model proba...,0.167038,0.392157
4,What does the term 'relative frequency' mean i...,Week_3/week_03_page_002.png,Relative frequency refers to the ratio of the ...,Relative frequency refers to the proportion of...,0.336442,0.500000
...,...,...,...,...,...,...
1225,Is there such thing as a true story?,Week_3/week_03_page_116.png,"Yes, there are true stories, but they are ofte...",,0.000000,0.000000
1226,Is there a difference between true or false?,Week_3/week_03_page_116.png,"Yes, there's a difference. The slide mentions ...",,0.043174,0.000000
1227,Is there a question about how to change your c...,Week_3/week_03_page_116.png,"Yes, there's a question about how to change a ...",we'll have another homework 3 for this week. T...,0.000000,0.139241
1228,Is there a way to change your classification p...,Week_3/week_03_page_116.png,"Yes, you can change your classification proble...",we'll have another homework 3 for this week. T...,0.000000,0.104575


In [10]:
print("\nOverall Performance:")
print(f"Mean Cosine Similarity: {overall_cosine_similarity}")
print(f"Mean ROUGE Score: {overall_rouge_score}")



Overall Performance:
Mean Cosine Similarity: 0.05048834765157943
Mean ROUGE Score: 0.29974764557593814
