In [None]:
import pandas as pd
import numpy as np

In [None]:
model_pred_df = pd.read_csv("prediction_model_03.csv")

In [None]:
model_pred_df["predicted"].value_counts()

In [None]:
genres_df = pd.read_csv("genres.csv")

In [None]:
genre_list = genres_df["genre"].value_counts().index.tolist()
genre_true_counts = {row["genre"]:0 for idx,row in genres_df.iterrows()}
genre_tp_counts = {row["genre"]:0 for idx,row in genres_df.iterrows()}
genre_fp_counts = {row["genre"]:0 for idx,row in genres_df.iterrows()}

In [None]:
sorted(genre_list)

In [None]:
for idx,row in model_pred_df.iterrows():
    this_genres = eval(row["actual genres"])
    
    for true_g in this_genres:
        genre_true_counts[true_g] = genre_true_counts.get(true_g, 0) + 1
        
    pred_g = row["predicted"]
    if pred_g in this_genres:
        genre_tp_counts[pred_g] = genre_tp_counts.get(pred_g, 0) + 1
    else:
        genre_fp_counts[pred_g] = genre_fp_counts.get(pred_g, 0) + 1

In [None]:
# prec = tp / (tp + fp)
# recall = tp / (tp + fn)
tp = 0
fp = 0
fn = 0
for genre in genre_list:
    tp += genre_tp_counts[genre]
    fp += genre_fp_counts[genre]
    fn += (genre_true_counts[genre] - genre_tp_counts[genre])
    
prec = tp / (tp + fp)
recall = tp / (tp + fn)

print("Micro-Precision:", prec)
print("Micro-Recall:", recall)
print("Micro-F1:", (2 * (prec * recall) / (prec + recall)))

In [None]:
macro_prec_list = []
macro_recall_list = []
macro_f1_list = []

for genre in genre_list:
    local_tp = genre_tp_counts[genre]
    local_fp = genre_fp_counts[genre]
    local_fn = (genre_true_counts[genre] - genre_tp_counts[genre])
    
    print(genre)
    
    local_prec = 0.0
    local_recall = 0.0
    
    if local_tp > 0:
        local_prec = local_tp / (local_tp + local_fp)
        local_recall = local_tp / (local_tp + local_fn)
    local_f1 = 0.0
    if (local_prec + local_recall) > 0:
        local_f1 = (2 * (local_prec * local_recall) / (local_prec + local_recall))
        
    macro_prec_list.append(local_prec)
    macro_recall_list.append(local_recall)
    macro_f1_list.append(local_f1)
    
    print("\tPrecision:", local_prec)
    print("\tRecall:", local_recall)
    print("\tF1:", local_f1)
    
print("-"*20)
print("Macro-Precision:", np.mean(macro_prec_list))
print("Macro-Recall:", np.mean(macro_recall_list))
print("Macro-F1:", np.mean(macro_f1_list))

## SKLearn's Metrics

In [None]:
pred_rows = []
true_rows = []

for idx,row in model_pred_df.iterrows():
    this_genres = eval(row["actual genres"])
    pred_g = {row["predicted"]}

    true_rows.append({
        g:1 if g in this_genres else 0 for g in genre_list
    })
    
    pred_rows.append({
        g:1 if g in pred_g else 0 for g in genre_list
    })
    
pred_matrix = pd.DataFrame(pred_rows)
true_matrix = pd.DataFrame(true_rows)

In [None]:
from sklearn.metrics import precision_recall_fscore_support

In [None]:
macro_prec, macro_rec, macro_f1, _ = precision_recall_fscore_support(true_matrix, pred_matrix, average="macro")

print("-"*20)
print("Macro-Precision:", macro_prec)
print("Macro-Recall:", macro_rec)
print("Macro-F1:", macro_f1)

In [None]:
micro_prec, micro_rec, micro_f1, _ = precision_recall_fscore_support(true_matrix, pred_matrix, average="micro")

print("-"*20)
print("Micro-Precision:", micro_prec)
print("Micro-Recall:", micro_rec)
print("Micro-F1:", micro_f1)

In [None]:
true_matrix


In [None]:
pred_matrix