In [2]:
import numpy as np
import pandas as pd
import copy
from sklearn.metrics import confusion_matrix, roc_auc_score

# Accuracy

In [3]:
def accuracy_score(df, mode="overall", threshold=0.5):
    if mode != "overall":
        try:
            mode = str(mode)
            new_mode = mode + "_pred"
            df = df["ID", "text", "tag", mode, new_mode]
            
            df[new_mode] = df[new_mode] >= threshold
    
            num_correct = len(df.loc[df[new_mode] == df[mode]])
            num_samples = len(df)
            
            return float(num_correct) / float(num_samples)
        except Exception as e:
            print(e)
            return False
    else:
        try:
            num_correct = 0
            num_samples = len(df) * 8
            
            for i in range(1, 9):
                new_mode = str(i) + "_pred"
                df[new_mode] >= threshold
                
                num_correct += len(df.loc[df[new_mode] == df[i]])
            
            return float(num_correct) / float(num_samples)
        except Exception as e:
            print(e)
            return False

In [4]:
def f1_score(df, mode="overall", label=1, threshold=0.5):
    if mode != "overall":
        try:
            mode = str(mode)
            new_mode = mode + "_pred"
            df = df["ID", "text", "tag", mode, new_mode]
            
            df[new_mode] = df[new_mode] >= threshold
            
            tn, fp, fn, tp = confusion_matrix(df[mode], df[new_mode]).ravel()
            
            precision = 0
            recall = 0
            
            if label == 1:
                precision = float(tp) / float(tp + fp + 1e-12)
                recall = float(tp) / float(tp + fn + 1e-12)
            else:
                precision = float(tn) / float(tn + fn + 1e-12)
                recall = float(tn) / float(tn + fp + 1e-12)
             
            f1_score_total = (2 * precision * recall) / (precision + recall + 1e-12)
            
            return f1_score_total
            
        except Exception as e:
            print(e)
            return False
    else:
        try:
            tn, fp, fn, tp = 0, 0, 0, 0
            
            for i in range(1, 9):
                new_mode = str(i) + "_pred"
                df[new_mode] >= threshold
                
                tn_i, fp_i, fn_i, tp_i = confusion_matrix(df[i], df[new_mode]).ravel()
                
                tn += tn_i
                tp += tp_i
                fn += fn_i
                fp += fp_i
            
            precision = 0
            recall = 0
            
            if label == 1:
                precision = float(tp) / float(tp + fp + 1e-12)
                recall = float(tp) / float(tp + fn + 1e-12)
            else:
                precision = float(tn) / float(tn + fn + 1e-12)
                recall = float(tn) / float(tn + fp + 1e-12)
             
            f1_score_total = (2 * precision * recall) / (precision + recall + 1e-12)
            
            return f1_score_total
        except Exception as e:
            print(e)
            return False

In [5]:
def roc_auc_score(df, mode="overall"):
    if mode != "overall":
        try:
            mode = str(mode)
            new_mode = mode + "_pred"
            df = df["ID", "text", "tag", mode, new_mode]
            
            return roc_auc_score(df[mode], df[new_mode])
        except Exception as e:
            print(e)
            return False
    else:
        try:
            tn, fp, fn, tp = 0, 0, 0, 0
            true_label = []
            prob_label = []
            
            for i in range(1, 9):
                new_mode = str(i) + "_pred"
                
                true_label.extend(df[str(i)].values)
                prob_label.extend(df[new_mode].values)
            
            return roc_auc_score(true_label, prob_label)
        except Exception as e:
            print(e)
            return False