In [1]:
from my_util import *
from sklearn.metrics import confusion_matrix, roc_auc_score, matthews_corrcoef, precision_recall_fscore_support, balanced_accuracy_score
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import pandas as pd
import time, pickle, warnings, operator, math, os
# import dill
warnings.filterwarnings('ignore')

import matplotlib

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
RF_data_path = './text_metric_data/'
ngram_data_path = './for_ngram_model/'

projects = ['openstack','qt']

top_k_tokens = np.arange(10,201,10)
agg_methods = ['avg','median','sum']

score_cols = [agg+'-top-'+str(k)+'-tokens' for agg in agg_methods for k in top_k_tokens] + [agg+'-all-tokens' for agg in agg_methods]
line_score_df_col_name = ['commit_id', 'line_level_label'] + score_cols

if not os.path.exists('./text_metric_line_eval_result/'):
    os.makedirs('/text_metric_line_eval_result/')

In [10]:
def create_tmp_df(all_commits,agg_methods):
    df = pd.DataFrame(columns = ['commit_id']+agg_methods)
    df['commit_id'] = all_commits
    df = df.set_index('commit_id')
    return df

def get_line_level_metrics(line_score,label):
    scaler = MinMaxScaler()
    line_score = scaler.fit_transform(np.array(line_score).reshape(-1, 1)) # cannot pass line_score as list T-T
    pred = np.round(line_score)
    
    line_df = pd.DataFrame()
    line_df['scr'] = [float(val) for val in list(line_score)]
    line_df['label'] = label
    line_df = line_df.sort_values(by='scr',ascending=False)
    line_df['row'] = np.arange(1, len(line_df)+1)

    real_buggy_lines = line_df[line_df['label'] == 1]
    
    p, r, f1, _ = precision_recall_fscore_support(label, pred, average='binary')
    auc = roc_auc_score(label, line_score)
    mcc = matthews_corrcoef(label, pred)
    bal_acc = balanced_accuracy_score(label, pred)
    
    top_10_acc = 0
    
    if len(real_buggy_lines) < 1:
        IFA = len(line_df)
        top_20_percent_LOC_recall = 0
        effort_at_20_percent_LOC_recall = math.ceil(0.2*len(line_df))
        
    else:
        IFA = line_df[line_df['label'] == 1].iloc[0]['row']-1
        label_list = list(line_df['label'])

        all_rows = len(label_list)
        
        # find top-10 accuracy
        if all_rows < 10:
            top_10_acc = np.sum(label_list[:all_rows])/len(label_list[:all_rows])
        else:
            top_10_acc = np.sum(label_list[:10])/len(label_list[:10])

        # find recall
        LOC_20_percent = line_df.head(int(0.2*len(line_df)))
        buggy_line_num = LOC_20_percent[LOC_20_percent['label'] == 1]
        top_20_percent_LOC_recall = float(len(buggy_line_num))/float(len(real_buggy_lines))

        # find effort @20% LOC recall

        buggy_20_percent = real_buggy_lines.head(math.ceil(0.2 * len(real_buggy_lines)))
        buggy_20_percent_row_num = buggy_20_percent.iloc[-1]['row']
        effort_at_20_percent_LOC_recall = int(buggy_20_percent_row_num) / float(len(line_df))

    return IFA, top_20_percent_LOC_recall, effort_at_20_percent_LOC_recall, top_10_acc, p, r, f1, auc, mcc, bal_acc

def eval_line_level(cur_proj):
    RF_result = pd.read_csv(RF_data_path+cur_proj+'_line_level.csv')
    ngram_result = pd.read_csv(ngram_data_path+cur_proj+'_DE_SMOTE_ngram_score.txt',sep='\t')

    RF_result = RF_result[line_score_df_col_name]

    all_commits = list(RF_result['commit_id'].unique())

    IFA_df = create_tmp_df(all_commits, score_cols+['ngram'])
    recall_20_percent_effort_df = create_tmp_df(all_commits, score_cols+['ngram']) 
    effort_20_percent_recall_df = create_tmp_df(all_commits, score_cols+['ngram'])
    precision_df = create_tmp_df(all_commits, score_cols+['ngram'])
    recall_df = create_tmp_df(all_commits, score_cols+['ngram'])
    f1_df = create_tmp_df(all_commits, score_cols+['ngram'])
    AUC_df = create_tmp_df(all_commits, score_cols+['ngram'])
    top_10_acc_df = create_tmp_df(all_commits, score_cols+['ngram'])
    MCC_df = create_tmp_df(all_commits, score_cols+['ngram'])
    bal_ACC_df = create_tmp_df(all_commits, score_cols+['ngram'])

    for commit in all_commits:
        IFA_list = []
        recall_20_percent_effort_list = []
        effort_20_percent_recall_list = []
        precision_list = []
        recall_list = []
        f1_list = []
        AUC_list = []
        top_10_acc_list = []
        all_MCC_list = []
        all_bal_ACC_df_list = []

        cur_RF_result = RF_result[RF_result['commit_id']==commit]

        line_label = list(cur_RF_result['line_level_label'])
        cur_ngram_result = ngram_result[ngram_result['File-Name']==commit]

        for n, agg_method in enumerate(score_cols):
            
            RF_line_scr = list(cur_RF_result[agg_method])
            
            IFA, top_20_percent_LOC_recall, effort_at_20_percent_LOC_recall, top_10_acc, p, r, f1, auc, mcc, bal_acc = get_line_level_metrics(RF_line_scr, line_label)

            IFA_list.append(IFA)
            recall_20_percent_effort_list.append(top_20_percent_LOC_recall)
            effort_20_percent_recall_list.append(effort_at_20_percent_LOC_recall)
            precision_list.append(p)
            recall_list.append(r)
            f1_list.append(f1)
            AUC_list.append(auc)
            top_10_acc_list.append(top_10_acc)
            all_MCC_list.append(mcc)
            all_bal_ACC_df_list.append(bal_acc)

        ngram_line_scr = list(cur_ngram_result['N-Gram-Score'])

        IFA, top_20_percent_LOC_recall, effort_at_20_percent_LOC_recall, top_10_acc, p, r, f1, auc, mcc, bal_acc = get_line_level_metrics(ngram_line_scr, line_label)

        IFA_list.append(IFA)
        recall_20_percent_effort_list.append(top_20_percent_LOC_recall)
        effort_20_percent_recall_list.append(effort_at_20_percent_LOC_recall)
        precision_list.append(p)
        recall_list.append(r)
        f1_list.append(f1)
        AUC_list.append(auc)
        top_10_acc_list.append(top_10_acc)
        all_MCC_list.append(mcc)
        all_bal_ACC_df_list.append(bal_acc)

        IFA_df.loc[commit] = IFA_list
        recall_20_percent_effort_df.loc[commit] = recall_20_percent_effort_list
        effort_20_percent_recall_df.loc[commit] = effort_20_percent_recall_list
        precision_df.loc[commit] = precision_list
        recall_df.loc[commit] = recall_list
        f1_df.loc[commit] = f1_list
        AUC_df.loc[commit] = AUC_list
        top_10_acc_df.loc[commit] = top_10_acc_list
        MCC_df.loc[commit] = all_MCC_list
        bal_ACC_df.loc[commit] = all_bal_ACC_df_list

    IFA_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_IFA.csv',index=False)
    recall_20_percent_effort_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_recall_20_percent_effort.csv',index=False) 
    effort_20_percent_recall_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_effort_20_percent_recall.csv',index=False)
    precision_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_precision.csv',index=False)
    recall_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_recall.csv',index=False)
    f1_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_f1.csv',index=False)
    AUC_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_AUC.csv',index=False)
    top_10_acc_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_top_10_acc.csv',index=False)
    MCC_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_MCC.csv',index=False)
    bal_ACC_df.to_csv('./text_metric_line_eval_result/'+cur_proj+'_bal_ACC.csv',index=False)
    
    print('finish', cur_proj)

In [11]:
eval_line_level(projects[0])
eval_line_level(projects[1])

finish qt
