In [46]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import json
from scipy.stats import mode
import random

from sklearn import metrics

%matplotlib inline

In [47]:
def compute_metrics(y_labels, y_preds_proba, y_preds):
    metrics_results = {}

    fpr, tpr, thresholds = metrics.roc_curve(
        y_labels, y_preds_proba, pos_label=1
    )
    

    metrics_results["fpr"] = fpr
    metrics_results["tpr"] = tpr
    metrics_results["thresholds"] = thresholds
    metrics_results["acc"] = metrics.accuracy_score(y_labels, y_preds)
    metrics_results["auc"] = metrics.auc(fpr, tpr)
    metrics_results["precision"] = metrics.precision_score(y_labels, y_preds)
    metrics_results["recall"] = metrics.recall_score(y_labels, y_preds)
    metrics_results["f1"] = metrics.f1_score(y_labels, y_preds)

    return metrics_results

In [9]:
csv_path = '../results/dvlog-baseline-model-size-ablation+hard-thr/temporal-evaluator:dvlog-baseline-model-size-ablation:pt-0.25-spw-5-nl-8-nh8-hd32-run-1:test.csv'
df = pd.read_csv(csv_path)
df

Unnamed: 0,name,run_id,f1,recall,precision,auc,accuracy,dataset,dataset_kind,model,seconds_per_window,presence_threshold,modalities,model_args.num_layers,model_args.self_attn_num_heads,model_args.self_attn_dim_head,prediction_kind
0,dvlog-baseline-model-size-ablation:pt-0.25-spw...,1,0.56701,0.533981,0.604396,0.603409,0.555556,d-vlog,test,baseline,5,0.25,"['audio_embeddings', 'face_embeddings']",8,8,32,last
1,dvlog-baseline-model-size-ablation:pt-0.25-spw...,1,0.745283,0.76699,0.724771,0.794197,0.714286,d-vlog,test,baseline,5,0.25,"['audio_embeddings', 'face_embeddings']",8,8,32,mean
2,dvlog-baseline-model-size-ablation:pt-0.25-spw...,1,0.740385,0.747573,0.733333,0.794197,0.714286,d-vlog,test,baseline,5,0.25,"['audio_embeddings', 'face_embeddings']",8,8,32,mode
3,dvlog-baseline-model-size-ablation:pt-0.25-spw...,1,0.772727,0.825243,0.726496,0.784376,0.73545,d-vlog,test,baseline,5,0.25,"['audio_embeddings', 'face_embeddings']",8,8,32,threshold


In [70]:
min_threshold = 0.3
max_threshold = 0.6

json_path = '../results/dvlog-baseline-model-size-ablation+hard-thr/temporal-evaluator:dvlog-baseline-model-size-ablation:pt-0.25-spw-5-nl-8-nh8-hd32-run-1:over-time:test.json'
with open(json_path, 'rt') as f:
    data = json.load(f)

true_labels = []
y_preds_proba_over_time_threshold = []
for videoID in list(data.keys())[:-2]:
    pred_threshold = []
    preds = data[videoID]["preds"]
    true_labels.append( data[videoID]["true_label"] )

    # getting probas that fullfill the threshold condition
    for proba in preds:
        if proba > max_threshold or proba < min_threshold:
            pred_threshold.append(proba)
    
    if len(pred_threshold) > 0:
        y_preds_proba_over_time_threshold.append( np.mean(pred_threshold) )
    else:
        y_preds_proba_over_time_threshold.append( np.mean(preds) )

true_labels = np.array(true_labels)
y_preds_proba_over_time_threshold = np.array(y_preds_proba_over_time_threshold)
y_preds_over_time_np_threshold = y_preds_proba_over_time_threshold.round()     

In [71]:
metrics_results = compute_metrics(true_labels, y_preds_proba_over_time_threshold, y_preds_over_time_np_threshold)

print(f"f1: {metrics_results['f1']}\nrecall: {metrics_results['recall']}\nprecision: {metrics_results['precision']}\nauc: {metrics_results['auc']}\naccuracy: {metrics_results['acc']}")

f1: 0.7417840375586854
recall: 0.7669902912621359
precision: 0.7181818181818181
auc: 0.7921652743282908
accuracy: 0.708994708994709
