In [1]:
import pandas as pd

df_test_meme = pd.read_csv('test_meme.csv', names=['query', 'label','directory'])
df_test_template = pd.read_csv('test_template.csv', names=['query', 'label','directory'])
df_test_meme

Unnamed: 0,query,label,directory
0,student life memes,2.0,test_images/meme_submissions_1490490.png
1,final exam memes,2.0,test_images/sad-baby_92.png
2,data science memes,2.0,test_images/John_Daly_and_Tiger_Woods_1.png
3,machine learning memes,1.0,test_images/batman-and-superman_20.png
4,math major memes,2.0,test_images/big-book-small-book_21.png
...,...,...,...
99,weekend lab session memes,0.0,test_images/meme_submissions_1440233.png
100,graduate defense memes,1.0,test_images/high-expectations-asian-father_62.png
101,confusing syllabus memes,2.0,test_images/john-daly-and-tiger-woods_58.png
102,email signature memes,1.0,test_images/meme_submissions_1295965.png


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# --- 1. IR Metrics ---

def mean_reciprocal_rank(rs):
    """rs: list of lists of binary relevance (1 or 0) for each query"""
    return np.mean([1/(np.where(r)[0][0]+1) if np.any(r) else 0 for r in rs])

def precision_at_k(r, k):
    """r: binary relevance list, k: int"""
    r = np.asarray(r)[:k]
    return np.mean(r)

def recall_at_k(r, k, total_relevant):
    """r: binary relevance list, k: int, total_relevant: int"""
    r = np.asarray(r)[:k]
    return np.sum(r) / total_relevant if total_relevant else 0

def average_precision(r):
    """r: binary relevance list"""
    r = np.asarray(r)
    out = [precision_at_k(r, k+1) for k in range(len(r)) if r[k]]
    return np.mean(out) if out else 0

def mean_average_precision(rs):
    """rs: list of binary relevance lists"""
    return np.mean([average_precision(r) for r in rs])

In [None]:
def evaluate_model(model, queries, relevance_judgments, k=5):
    all_precisions, all_recalls, all_ap, all_rr = [], [], [], []
    for q in queries:
        # ranked_meme_ids = model.retrieve(q)  # Implement your retrieval
        ranked_meme_ids = []  # Placeholder
        rels = [relevance_judgments.get(q, {}).get(mid, 0) for mid in ranked_meme_ids]
        binary_rels = [1 if r == 2 else 0 for r in rels]  # Only 'relevant' counts as 1
        total_relevant = sum(1 for v in relevance_judgments.get(q, {}).values() if v == 2)
        all_precisions.append(precision_at_k(binary_rels, k))
        all_recalls.append(recall_at_k(binary_rels, k, total_relevant))
        all_ap.append(average_precision(binary_rels))
        all_rr.append(1/(np.where(np.array(binary_rels)==1)[0][0]+1) if 1 in binary_rels else 0)
    return {
        "Precision@K": np.mean(all_precisions),
        "Recall@K": np.mean(all_recalls),
        "mAP": np.mean(all_ap),
        "MRR": np.mean(all_rr)
    }

In [None]:
def plot_precision_recall_curve(precisions, recalls):
    plt.plot(recalls, precisions, marker='o')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.show()