In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef

In [2]:
platform = 'haptik'
bot_name = 'curekart_subset'

In [3]:
def get_inscope_accuracy(actual_node, pred_node):
    total = 0
    correct = 0
    for act, pred in zip(actual_node, pred_node):
        if act == 'NO_NODES_DETECTED':
            continue
        total += 1
        if act == pred:
            correct += 1
    return correct/total

def get_oos_recall(actual_node, pred_node):
    total = 0
    correct = 0
    for act, pred in zip(actual_node, pred_node):
        if act != 'NO_NODES_DETECTED':
            continue
        total += 1
        if act == pred:
            correct += 1
    return correct/total

def get_metrics(filepath, thresh):
    df = pd.read_csv(filepath)
    pred_node = []
    for index, row in df.iterrows(): 
        if row['predicted_node_score'] < thresh:
            pred_node.append('NO_NODES_DETECTED')
        else:
            pred_node.append(row['predicted_node'])
    actual_node = list(df['label'])
    accuracy = accuracy_score(actual_node, pred_node)
    overall_f1 = f1_score(actual_node, pred_node, labels=list(set(actual_node)), average='weighted')
    inscope_accuracy = get_inscope_accuracy(actual_node, pred_node)
    oos_recall = get_oos_recall(actual_node, pred_node)
    mcc = matthews_corrcoef(actual_node, pred_node)
    return accuracy, overall_f1, inscope_accuracy, oos_recall, mcc

In [4]:
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
accuracy_over_thresh = []
overall_f1_over_thresh = []
inscope_recall_over_thresh = []
oos_recall_over_thresh = []
mcc_over_thresh = []
for thresh in thresholds:
    accuracy, overall_f1, inscope_recall, oos_recall, mcc = get_metrics(f'preds/{platform}_{bot_name}.csv', thresh)
    accuracy_over_thresh.append(accuracy)
    overall_f1_over_thresh.append(overall_f1)
    inscope_recall_over_thresh.append(inscope_recall)
    oos_recall_over_thresh.append(oos_recall)
    mcc_over_thresh.append(mcc)
df_metrics = pd.DataFrame({'Threshold': thresholds,
                           'Accuracy': accuracy_over_thresh,
                           'Weighted F1': overall_f1_over_thresh,
                          'Inscope Accuracy': inscope_recall_over_thresh,
                          'OOS Recall': oos_recall_over_thresh,
                          'MCC': mcc_over_thresh})
df_metrics.head(10)

Unnamed: 0,Threshold,Accuracy,Weighted F1,Inscope Accuracy,OOS Recall,MCC
0,0.1,0.412714,0.348371,0.798673,0.089054,0.396726
1,0.2,0.546922,0.552749,0.780973,0.350649,0.477374
2,0.3,0.61554,0.633123,0.74115,0.510204,0.505403
3,0.4,0.708375,0.720889,0.701327,0.714286,0.578202
4,0.5,0.739657,0.743461,0.670354,0.797774,0.603908
5,0.6,0.735621,0.731715,0.60177,0.847866,0.582559
6,0.7,0.744702,0.728362,0.539823,0.916512,0.589812
7,0.8,0.683148,0.626261,0.334071,0.975881,0.486639
8,0.9,0.600404,0.485569,0.132743,0.992579,0.30913


In [5]:
df_metrics.to_csv(f'results/{platform}_{bot_name}.csv', index=False)

In [6]:
df_res = pd.read_csv(f'preds/{platform}_{bot_name}.csv')
set(df_res['predicted_node']) - set(df_res['label'])

{'IMMUNITY',
 'INTERNATIONAL_SHIPPING',
 'MODES_OF_PAYMENTS',
 'PORTAL_ISSUE',
 'REFER_EARN',
 'START_OVER',
 'WORK_FROM_HOME'}