In [161]:
import pandas as pd
import numpy as np
import sklearn

from sklearn.metrics import accuracy_score,f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc

from matplotlib import pyplot
import matplotlib.pyplot as plt 

In [164]:
def Evaluation(dfr, dfu, dfo, dfr_time, dfu_time, dfo_time, filename, method):
    unlabeled=['0','10','20','50','90','95']
    
    f1s_r = {}
    f1s_u = {}
    f1s_o = {}
    
    as_r = {}
    as_u = {}
    as_o = {}
    
    auc_r = {}
    auc_u = {}
    auc_o = {}
    
    time_r = {}
    time_u = {}
    time_o = {}
    
    for i in range(len(unlabeled)):
        un = unlabeled[i]
        f1s_r['f1_'+un] = round(f1_score(dfr['y_true'], dfr['y_'+un],average='weighted')*100.0, 3)
        f1s_u['f1_'+un] = round(f1_score(dfu['y_true'], dfu['y_'+un],average='weighted')*100.0, 3)
        f1s_o['f1_'+un] = round(f1_score(dfo['y_true'], dfo['y_'+un],average='weighted')*100.0, 3)
        as_r['accuracy_'+un] = round(accuracy_score(dfr['y_true'], dfr['y_'+un])*100.0, 3)
        as_u['accuracy_'+un] = round(accuracy_score(dfu['y_true'], dfu['y_'+un])*100.0, 3)
        as_o['accuracy_'+un] = round(accuracy_score(dfo['y_true'], dfo['y_'+un])*100.0, 3)
        auc_r['auc_'+un] = round(roc_auc_score(dfr['y_true'], dfr['y_'+un+'_prob'])*100.0, 3)
        auc_u['auc_'+un] = round(roc_auc_score(dfu['y_true'], dfu['y_'+un+'_prob'])*100.0, 3)
        auc_o['auc_'+un] = round(roc_auc_score(dfo['y_true'], dfo['y_'+un+'_prob'])*100.0, 3)
        time_r['time_'+un] = round(dfr_time.loc[i,'time'], 4)
        time_u['time_'+un] = round(dfu_time.loc[i,'time'], 4)
        time_o['time_'+un] = round(dfo_time.loc[i,'time'], 4)
    
    r = f1s_r.copy()
    r.update(as_r)
    r.update(auc_r)
    r.update(time_r)
    
    u = f1s_u.copy()
    u.update(as_u)
    u.update(auc_u)
    u.update(time_u)
    
    o = f1s_r.copy()
    o.update(as_o)
    o.update(auc_o)
    o.update(time_o)
    
    f_data = {'noresampling':r, 'undersampling':u, 'oversampling':o}
    f_df = pd.DataFrame(f_data)
    f_df.to_csv("../pred/"+method+"/"+filename+"_metrics.csv")

In [163]:
def ROCcurve(df, resampling, filename, method):
    unlabeled=['0','10','20','50','90','95']
    
    # Generating ROC Curve Plot
    
    y_true = df['y_true']

    # ROC area for each class
    auc_0 = roc_auc_score(y_true, df['y_0_prob'])
    auc_10 = roc_auc_score(y_true, df['y_10_prob'])
    auc_20 = roc_auc_score(y_true, df['y_20_prob'])
    auc_50 = roc_auc_score(y_true, df['y_50_prob'])
    auc_90 = roc_auc_score(y_true, df['y_90_prob'])
    auc_95 = roc_auc_score(y_true, df['y_95_prob'])

    # For the straight line
    probs_straight = [0 for _ in range(len(y_true))]
    fpr_straight, tpr_straight, _ = roc_curve(y_true, probs_straight)
    pyplot.plot(fpr_straight, tpr_straight, linestyle='--', label="TPR=FPR")

    # For 0 Unlabelled (supervised)
    fpr_0, tpr_0, _ = roc_curve(y_true, df['y_0_prob'])
    pyplot.plot(fpr_0, tpr_0, linestyle='--', label='0% ({:.2f})'.format(auc_0))
    
    # For 10 Unlabelled
    fpr_10, tpr_10, _ = roc_curve(y_true, df['y_10_prob'])
    pyplot.plot(fpr_10, tpr_10, linestyle='--', label='10% ({:.2f})'.format(auc_10))
    
    # For 20 Unlabelled
    fpr_20, tpr_20, _ = roc_curve(y_true, df['y_20_prob'])
    pyplot.plot(fpr_20, tpr_20, linestyle='--', label='20% ({:.2f})'.format(auc_20))
    
    # For 50 Unlabelled
    fpr_50, tpr_50, _ = roc_curve(y_true, df['y_50_prob'])
    pyplot.plot(fpr_50, tpr_50, linestyle='--', label='50% ({:.2f})'.format(auc_50))
    
    # For 90 Unlabelled
    fpr_90, tpr_90, _ = roc_curve(y_true, df['y_90_prob'])
    pyplot.plot(fpr_90, tpr_90, linestyle='--', label='90% ({:.2f})'.format(auc_90))
    
    # For 95 Unlabelled
    fpr_95, tpr_95, _ = roc_curve(y_true, df['y_95_prob'])
    pyplot.plot(fpr_95, tpr_95, linestyle='--', label='95% ({:.2f})'.format(auc_95))
    
    # To Plot
    pyplot.xlabel('False Positive Rate')
    pyplot.ylabel('True Positive Rate')
    pyplot.legend()
    pyplot.savefig("../roc/"+method+"/"+filename+"_"+resampling+".png")
    pyplot.show()