In [None]:
%load_ext autoreload
%autoreload 2

In [16]:
import pandas as pd 
import os 
import numpy as np 

In [23]:
outputdir = "../output/evaluation/processed"
projects = ['Lang', 'Math', 'Time', 'Closure', 'Cli', 'Compress', 'Codec', 'Collections', 'Csv', 
    'JacksonCore', 'JacksonXml', 'JxPath', 'Jsoup']
featuredir = "../output/evaluation/features"
seeds = np.arange(0,10,1)
decimal_points = 3

# Latent Mutant Prediction

In [6]:
def eval_pred(pred_df, decimal_points = 4, with_cnt:bool =False):
    from sklearn.metrics import multilabel_confusion_matrix
    from sklearn.metrics import accuracy_score, balanced_accuracy_score
    out = dict()
    # acc
    acc = accuracy_score(pred_df.label.values, pred_df.pred_label.values)
    acc = np.round(acc, decimals=decimal_points)
    line = f"{acc}"
    # store
    out['acc'] = acc

    # acc per label 
    labels = [0,1,2]
    cm = multilabel_confusion_matrix(pred_df.label.values, pred_df.pred_label.values, labels = labels)
    acc_pclass = []
    for i, _ in enumerate(labels):
        tn, fp, fn, tp = cm[i].ravel()
        acc = (tp + tn)/(tn + fp + fn + tp)
        acc = np.round(acc, decimals=decimal_points)
        acc_pclass.append(acc)
    acc_pclass_l = "/".join([str(v) for v in acc_pclass])
    line += f" ({acc_pclass_l})"
    # store
    out['acc_pclass'] = acc_pclass

    # bal acc
    bal_acc = balanced_accuracy_score(pred_df.label.values, pred_df.pred_label.values)
    bal_acc = np.round(bal_acc, decimals=decimal_points)
    line += f" & {bal_acc}"
    # store
    out['bal_acc'] = bal_acc
    
    # MAP 
    aps = []
    cnt_none, cnt_all, cnt_studied = 0, 0, 0
    for bid, df in pred_df.groupby('bid'):      
        bid = int(bid)
        predcs = df['prob_0'].values 
        n_reveal = (df.status == 'reveal').sum()
        if n_reveal == 0: cnt_none += 1; continue 
        elif n_reveal == len(df): cnt_all += 1; continue 
        cnt_studied += 1
        ps, cnt  = [], 0 
        status_vs = df.status.values 
        for i, (status, _) in enumerate(sorted(zip(status_vs, predcs), key = lambda v:v[1], reverse=True)):
            if status == 'reveal':
                cnt += 1; ps.append(cnt/(i+1))
        # compite AP 
        aps.append(np.mean(ps))
    if len(aps) > 0:
        map_v= np.round(np.mean(aps), decimals= decimal_points)
    else:
        map_v = "-"
    line += f" & {map_v}" 
    # store
    out['map'] = map_v

    if with_cnt:
        line = f"({cnt_none}/{cnt_all}/{cnt_studied}) & " + line 
        # store
        out["cnt"] = [cnt_none, cnt_all, cnt_studied]

    out['n'] = pred_df[['bid', 'project']].drop_duplicates().shape[0]
    return line, out

def printout_mdl_perf(outs, decimal_points:int = 3, with_cnt:bool =False):
    acc_vs = [out['acc'] for out in outs.values()]
    avg_acc = np.round(np.mean(acc_vs), decimals=decimal_points)
    #
    acc_pclass_vs = np.array([out['acc_pclass'] for out in outs.values()])
    avg_acc_pclass = np.round(np.mean(acc_pclass_vs, axis = 0), decimals=2)#decimal_points)
    #
    bal_acc_vs = [out['bal_acc'] for out in outs.values()]
    avg_bal_acc = np.round(np.mean(bal_acc_vs), decimals=decimal_points)
    #
    map_vs = [out['map'] for out in outs.values()]
    if '-' in map_vs:
        uniq_map_v = list(set(map_vs))
        assert (uniq_map_v[0] == '-') and (len(uniq_map_v) == 1)
        avg_map = '-'
    else:
        avg_map = np.round(np.mean(map_vs), decimals=decimal_points)
    line = f"{avg_acc} ({avg_acc_pclass[0]}/{avg_acc_pclass[1]}/{avg_acc_pclass[2]}) & {avg_bal_acc} & {avg_map}"
    if with_cnt:
        cnt_vs = outs[0]['cnt']
        cnt_none, cnt_all, cnt_studied = cnt_vs
        line = f"({cnt_none}/{cnt_all}/{cnt_studied}) & " + line 
    return line

In [17]:
def get_pred_results(pred_dir, seeds, decimal_points, fpath, with_cnt = False):
    pred_results = {}
    for seed in seeds:
        pred_df = pd.read_csv(os.path.join(pred_dir, str(seed), fpath))
        pred_results[seed] = pred_df

    decimal_points = 3
    eval_pred_results = {project:{} for project in projects + ['Total']}
    for seed in seeds:
        pred_df = pred_results[seed]
        pred_df_pproj = pred_df.groupby('project')
        for project in projects:
            pred_df_proj = pred_df_pproj.get_group(project)
            _, rf_output = eval_pred(pred_df_proj, decimal_points = decimal_points, with_cnt=with_cnt)
            eval_pred_results[project][seed] = rf_output
        _, rf_output = eval_pred(pred_df, decimal_points = decimal_points, with_cnt=with_cnt)
        eval_pred_results['Total'][seed] = rf_output
    return pred_results, eval_pred_results

In [18]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
rf_pred_dir = f"../output/evaluation/pred/debt_time_thr365/all"
wo_mutop_rf_pred_dir = f"../output/evaluation/pred/debt_time_thr365/wo_mutop"
rd_pred_dir = f"../output/evaluation/pred/debt_time_thr365/rd/all"

pred_results, outs_pproj = get_pred_results(rf_pred_dir, seeds, decimal_points, 'rf_pred.csv')
_, wo_mut_op_outs_pproj = get_pred_results(wo_mutop_rf_pred_dir, seeds, decimal_points, 'rf_pred.csv')
_, rd_outs_pproj = get_pred_results(rd_pred_dir, seeds, decimal_points, 'rd_pred.csv')

decimal_points = 2
for project in projects + ['Total']:
    rf_line = printout_mdl_perf(outs_pproj[project], decimal_points = 2)
    wo_mutop_rf_line = printout_mdl_perf(wo_mut_op_outs_pproj[project], decimal_points = 2)
    rd_line = printout_mdl_perf(rd_outs_pproj[project], decimal_points = 2)
    n = outs_pproj[project][0]['n']
    print (project + " & " + str(n) + " & " + rf_line + " & " + wo_mutop_rf_line + " & " + rd_line + " \\\\")

Lang & 27 & 0.89 (0.94/0.91/0.92) & 0.65 & 0.37 & 0.84 (0.94/0.87/0.87) & 0.61 & 0.37 & 0.34 (0.65/0.42/0.6) & 0.33 & 0.33 \\
Math & 79 & 0.82 (0.9/0.86/0.89) & 0.67 & 0.35 & 0.82 (0.89/0.85/0.89) & 0.63 & 0.31 & 0.34 (0.64/0.48/0.56) & 0.34 & 0.29 \\
Time & 10 & 0.89 (0.95/0.89/0.94) & 0.38 & 0.92 & 0.89 (0.95/0.89/0.95) & 0.37 & 0.92 & 0.35 (0.68/0.39/0.64) & 0.39 & 0.25 \\
Closure & 100 & 0.83 (0.99/0.83/0.84) & 0.63 & 0.45 & 0.77 (0.98/0.77/0.79) & 0.52 & 0.31 & 0.34 (0.66/0.42/0.59) & 0.33 & 0.15 \\
Cli & 22 & 0.86 (0.94/0.87/0.92) & 0.64 & 0.42 & 0.85 (0.94/0.85/0.91) & 0.62 & 0.39 & 0.34 (0.66/0.41/0.61) & 0.35 & 0.45 \\
Compress & 43 & 0.79 (0.97/0.8/0.8) & 0.59 & 0.3 & 0.78 (0.97/0.79/0.79) & 0.58 & 0.23 & 0.34 (0.66/0.5/0.51) & 0.34 & 0.25 \\
Codec & 16 & 0.89 (0.97/0.91/0.9) & 0.73 & 0.83 & 0.9 (0.97/0.91/0.91) & 0.76 & 0.81 & 0.33 (0.65/0.48/0.53) & 0.34 & 0.18 \\
Collections & 2 & 0.98 (1.0/0.98/0.98) & 0.98 & - & 1.0 (1.0/1.0/1.0) & 1.0 & - & 0.38 (0.7/0.38/0.68) & 0.38 &