In [1]:
# Load generated recipes files
import pandas as pd

# Load the data
manual = pd.read_json('../data/generated_recipes_manual_review.json', dtype={'correct': 'boolean'})
rag = pd.read_json('../data/generated_recipes_rag_review.json', dtype={'correct': 'boolean'})
moderator = pd.read_json('../data/generated_recipes_moderator_review.json', dtype={'correct': 'boolean'})
combined = pd.read_json('../data/generated_recipes_combined_review.json', dtype={'correct': 'boolean'})
manual['correct'] = manual['correct'].fillna(False)

rag.rename(columns={'correct': 'correct_rag'}, inplace=True)
moderator.rename(columns={'correct': 'correct_moderator'}, inplace=True)
combined.rename(columns={'correct': 'correct_combined'}, inplace=True)
totalset = pd.merge(manual, rag[['medische_vraag','correct_rag']], on='medische_vraag', how='left')
totalset = pd.merge(totalset, moderator[['medische_vraag','correct_moderator']], on='medische_vraag', how='left')
totalset = pd.merge(totalset, combined[['medische_vraag','correct_combined']], on='medische_vraag', how='left')


In [2]:
# Count the number of correct answers
correct_counts = totalset[['correct', 'correct_rag', 'correct_moderator', 'correct_combined']].sum()
# Print the counts
print("Number of correct answers:")
print(correct_counts)

false_positives_rag = totalset[(totalset['correct'] == False) & (totalset['correct_rag'] == True)]['medische_vraag'].count()
false_positives_moderator = totalset[(totalset['correct'] == False) & (totalset['correct_moderator'] == True)]['medische_vraag'].count()
false_positives_combined = totalset[(totalset['correct'] == False) & (totalset['correct_combined'] == True)]['medische_vraag'].count()

print(f"Number of false positives rag: {false_positives_rag}")
print(f"Number of false positives moderator: {false_positives_moderator}")
print(f"Number of false positives combined: {false_positives_combined}")

false_negatives_rag = totalset[(totalset['correct'] == True) & (totalset['correct_rag'] == False)]['medische_vraag'].count()
false_negatives_moderator = totalset[(totalset['correct'] == True) & (totalset['correct_moderator'] == False)]['medische_vraag'].count()
false_negatives_combined = totalset[(totalset['correct'] == True) & (totalset['correct_combined'] == False)]['medische_vraag'].count()
print(f"Number of false negatives rag: {false_negatives_rag}")
print(f"Number of false negatives moderator: {false_negatives_moderator}")
print(f"Number of false negatives combined: {false_negatives_combined}")

precision_rag = correct_counts['correct_rag'] / (correct_counts['correct_rag'] + false_positives_rag) if (correct_counts['correct_rag'] + false_positives_rag) > 0 else 0
recall_rag = correct_counts['correct_rag'] / (correct_counts['correct_rag'] + false_negatives_rag) if (correct_counts['correct_rag'] + false_negatives_rag) > 0 else 0
precision_moderator = correct_counts['correct_moderator'] / (correct_counts['correct_moderator'] + false_positives_moderator) if (correct_counts['correct_moderator'] + false_positives_moderator) > 0 else 0
recall_moderator = correct_counts['correct_moderator'] / (correct_counts['correct_moderator'] + false_negatives_moderator) if (correct_counts['correct_moderator'] + false_negatives_moderator) > 0 else 0
precision_combined = correct_counts['correct_combined'] / (correct_counts['correct_combined'] + false_positives_combined) if (correct_counts['correct_combined'] + false_positives_combined) > 0 else 0
recall_combined = correct_counts['correct_combined'] / (correct_counts['correct_combined'] + false_negatives_combined) if (correct_counts['correct_combined'] + false_negatives_combined) > 0 else 0

print(f"Precision rag: {precision_rag:.2f} Recall rag: {recall_rag:.2f}")
print(f"Precision moderator: {precision_moderator:.2f} Recall moderator: {recall_moderator:.2f}")
print(f"Precision combined: {precision_combined:.2f} Recall combined: {recall_combined:.2f}")

Number of correct answers:
correct               19
correct_rag          144
correct_moderator    185
correct_combined     147
dtype: Int64
Number of false positives rag: 129
Number of false positives moderator: 166
Number of false positives combined: 132
Number of false negatives rag: 4
Number of false negatives moderator: 0
Number of false negatives combined: 4
Precision rag: 0.53 Recall rag: 0.97
Precision moderator: 0.53 Recall moderator: 1.00
Precision combined: 0.53 Recall combined: 0.97
