In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.metrics import r2_score, precision_recall_fscore_support, balanced_accuracy_score

import pandas as pd
import numpy as np

from pyexplainer.pyexplainer_pyexplainer import PyExplainer
import matplotlib.pyplot as plt

import os, pickle, time, re
from datetime import datetime

from my_util import *

In [50]:
data_path = './dataset/'
result_dir = './eval_result/'
dump_dataframe_dir = './dump_df/'
pyExp_dir = './pyExplainer_obj/'
other_object_dir = './other_object/'
generated_instance_dir = './generated_instance/'

if not os.path.exists(generated_instance_dir):
    os.makedirs(generated_instance_dir)
    
# change param here, no need to loop :)
proj_name = 'openstack' # ['openstack','qt']
instance_generation_mode = 'randompertubation' # ['randompertubation','crossoverinterpolation']
explainer_type = 'rulefit' # ['rulefit', LRR', 'BRCG']

In [79]:
'''
    Note: this function is for rulefit only
    
    rule: rule object from RuleFit
    instance: generated instance
    pred: prediction of generated instance
'''
def eval_rule(rule, instance, pred):
    
    # check which instance contains the rule
    # may change how to get this...
    rule_result = rule.transform(instance.values) # this is p 
#     print(rule_result)
#     prediction_from_rule = int(round(rule.prediction_value))
#     print('prediction_from_rule:', rule.prediction_value)
#     prediction_from_rule = 1 if prediction_from_rule >= 0.5 else 0 
    
    
#     rule_result_df = pd.DataFrame({'rule_result':rule_result, 'consequence':consequence})
    all_rows = len(rule_result)
    support_p = np.sum(rule_result)/all_rows # this is coverage
    
#     if prediction_from_rule == 1:
    support_q = np.sum(pred)/all_rows
#         support_pq = np.sum((rule_result==1) & (consequence==True))/all_rows
#     else:
#         consequence = (np.array([prediction_from_rule]*len(instance))==0) & (pred==0) # how many q is found in prediction from global model
#         support_q = (len(pred)-np.sum(pred))/all_rows
        
    support_pq = np.sum((rule_result==1) & (pred==1))/all_rows

    confidence = support_pq/support_p if support_p > 0 else 0
    lift = support_pq/(support_p*support_q) if support_p > 0 and support_q > 0 else 0
    
#     print(support_p, support_q, support_pq, confidence, lift)
    return support_p, confidence, lift

'''
    input:
        rules: RuleFit.rule_ensemble.rules object (set of rules)
        the_best_defective_rule_str: string of defective rule object
    return:
        rule object that matchs the_best_defective_rule_str
'''
def get_the_best_rule_obj(rules, the_best_defective_rule_str):
    rule_obj = None
    for j in range(0,len(rules)):
        rule = rules[j]
        rule_str = str(rule).strip()
        
#         print(rule_str)
        if rule_str == the_best_defective_rule_str.strip():
            rule_obj = rule
#             print('found', rule_str)
            break
    return rule_obj

def is_in_top_k_global_features(top_k_global_features, the_best_defective_rule_str):
    # remove numeric value
    new_the_best_defective_rule_str = re.sub('\d+','', the_best_defective_rule_str)
    
    # remove special characters
    new_the_best_defective_rule_str = re.sub('\W+',' ',new_the_best_defective_rule_str)
    splitted_rule = new_the_best_defective_rule_str.split()
#     print(the_best_defective_rule_str)
#     print(new_the_best_defective_rule_str)
#     print(new_the_best_defective_rule_str.split())

    ret = False
    for tok in splitted_rule:
        if tok.strip() in top_k_global_features:
            ret = True
            break
    return ret

def get_top_k_global_features(global_model, indep_col, top_k_global_feature_num = 5):
    global_feature_df = pd.DataFrame()
    global_feature_df['feature'] = indep
    global_feature_df['importance'] = global_model.feature_importances_

    global_feature_df = global_feature_df.sort_values(by='importance',ascending=False)
    
    top_k_global_features = list(global_feature_df['feature'])[:top_k_global_feature_num]
#     display(global_feature_df)
#     print(top_k_global_features)

    return top_k_global_features

def getpy_obj_file_str_list(proj_name, explainer_type, instance_generation_mode):
    all_pyExp_obj_name = os.listdir(pyExp_dir)
    r = re.compile(proj_name+'_'+explainer_type+'_'+instance_generation_mode+'_*')
    py_obj_file_str_list = list(filter(r.match, all_pyExp_obj_name))

In [80]:
# print(pyExp.generate_instance_random_pertubation)
'''
    correctly_predicted_result: dataframe
    pyExp: PyExplainer object
'''
def find_quality_of_generated_neighbors(proj_name, instance_generation_mode, correctly_predicted_result,
                                       pyExp):
    cos_sim_df = pd.DataFrame([])
    euc_dist_df = pd.DataFrame([])

    cos_sim_mean = []
    cos_sim_med = []
    cos_sim_max = []

    euclid_dist_mean = []
    euclid_dist_med = []
    euclid_dist_max = []

    method = ''
    if instance_generation_mode == 'randompertubation':
        method = 'rand'
    elif instance_generation_mode == 'crossoverinterpolation':
        method = 'crossover'

    for i in range(0,len(correctly_predicted_result)):
        serie_name = str(correctly_predicted_result.iloc[i].name)
        sample_instance = feature_df.iloc[[i]]
#         print(sample_instance.columns)
        y_explain = test_label.iloc[[i]]

    #     synthetic_instances = pickle.load(open('./generated_instance_for_RuleFit/'+proj_name+'_'+method+'_'+serie_name+'.pkl','rb'))

        file_path = generated_instance_dir+proj_name+'_'+instance_generation_mode+'_'+serie_name+'.pkl'
        if os.path.exists(file_path):
            synthetic_instances = pickle.load(open(file_path,'rb'))
#             print('load from file')
        
        else:
            if instance_generation_mode == 'randompertubation':
                synthetic_instances = pyExp.generate_instance_random_pertubation(sample_instance, y_explain)['synthetic_data']
            elif instance_generation_mode == 'crossoverinterpolation':
                synthetic_instances = pyExp.generate_instance_crossover_interpolation(sample_instance, y_explain)['synthetic_data']

            pickle.dump(synthetic_instances, open(file_path,'wb'))
            
        cos_sim = cosine_similarity(sample_instance.values.reshape(1,-1), synthetic_instances.values)[0]
        euclid_dist = euclidean_distances(sample_instance.values.reshape(1,-1), synthetic_instances.values)[0]

        cos_sim_mean.append(np.mean(cos_sim))
        cos_sim_med.append(np.median(cos_sim))
        cos_sim_max.append(np.max(cos_sim))

        euclid_dist_mean.append(np.mean(euclid_dist))
        euclid_dist_med.append(np.median(euclid_dist))
        euclid_dist_max.append(np.max(euclid_dist))

    cos_sim_df[method+'_mean'] = cos_sim_mean
    cos_sim_df[method+'_med'] = cos_sim_med
    cos_sim_df[method+'_max'] = cos_sim_max

    euc_dist_df[method+'_mean'] = euclid_dist_mean
    euc_dist_df[method+'_med'] = euclid_dist_med
    euc_dist_df[method+'_max'] = euclid_dist_max
    
    cos_sim_df.to_csv(dump_dataframe_dir+proj_name+'_'+instance_generation_mode+'_cosine_similarity.csv',index=False)
    euc_dist_df.to_csv(dump_dataframe_dir+proj_name+'_'+instance_generation_mode+'_euclidean_dist.csv',index=False)
    
    print('finished evaluating quality of generated neighbors of {} project, {} method'.format(proj_name, instance_generation_mode))

In [81]:
def find_prob_of__global_and_local_model(proj_name, instance_generation_mode, 
                                         global_model, py_obj_file_str, explainer_type='rulefit'):
    global_model_prob_list = []
    local_model_prob_list = []

    py_obj_file_str = getpy_obj_file_str_list(proj_name, explainer_type, instance_generation_mode)
    
    for obj_name in py_obj_file_str:
    #     cov_list, conf_list, lift_list = [], [], [] 

        pyExp_obj = pickle.load(open(pyExp_dir+obj_name,'rb')) 
    #     print(pyExp_obj.keys())

        local_model = pyExp_obj['local_model']
        synt_inst = pyExp_obj['synthetic_data']
        synt_label = pyExp_obj['synthetic_predictions']
        x_explain = pyExp_obj['X_explain']
    #     top_k_positive_rules = pyExp_obj['top_k_positive_rules']

        # for RQ 2
        global_model_prob = float(global_model.predict_proba(x_explain)[:,1])
        local_model_prob = float(local_model.predict_proba(x_explain.values)[:,1])

        global_model_prob_list.append(global_model_prob)
        local_model_prob_list.append(local_model_prob)

    global_prob_vs_local_prob = pd.DataFrame()
    global_prob_vs_local_prob['global'] = global_model_prob_list
    global_prob_vs_local_prob['local'] = local_model_prob_list
    global_prob_vs_local_prob['label'] = [1]*len(global_prob_vs_local_prob)

#     display(global_prob_vs_local_prob)

    global_prob_vs_local_prob.to_csv(dump_dataframe_dir+proj_name+'_'+instance_generation_mode+'_global_prob_vs_local_prob.csv',index=False)
    
    print('finished evaluating global model vs local model of {} project, {} method'.format(proj_name, instance_generation_mode))

In [82]:

def eval_rule_from_local_model(proj_name, instance_generation_mode, py_obj_file_str, 
                               top_k_global_features,  explainer_type='rulefit'):
    cov_list, conf_list, lift_list = [], [], []
    count_is_appear_in_global_features = 0
    
    py_obj_file_str = getpy_obj_file_str_list(proj_name, explainer_type, instance_generation_mode)
    
    for obj_name in py_obj_file_str:
        pyExp_obj = pickle.load(open(pyExp_dir+obj_name,'rb')) 
    #     print(pyExp_obj.keys())

        local_model = pyExp_obj['local_model']
        synt_inst = pyExp_obj['synthetic_data']
        synt_label = pyExp_obj['synthetic_predictions']
        x_explain = pyExp_obj['X_explain']


        # for RQ3 (find coverage, confidence, lift)
        # but find the best defective rule for RQ5, too. :)
        rules = local_model.rule_ensemble.rules

        rule_df = local_model.get_rules()
        top_k = 5
        top_k_positive_rules = rule_df[(rule_df.coef > 0) & (rule_df.type=='rule')].sort_values("importance", ascending=False).head(top_k)

        the_best_defective_rule_str = list(top_k_positive_rules['rule'])[0]
        def_rule = get_the_best_rule_obj(rules,the_best_defective_rule_str)

    #     display(top_k_positive_rules)
    #     print(the_best_defective_rule_str)
    #     print(def_rule)
    #     print('-'*100)
        coverage, confidence, lift = eval_rule(def_rule, synt_inst, synt_label)
    #     print(coverage, confidence, lift)
        cov_list.append(coverage)
        conf_list.append(confidence)
        lift_list.append(lift)

        is_appear_in_global_features = is_in_top_k_global_features(top_k_global_features, the_best_defective_rule_str)

        if is_appear_in_global_features:
            count_is_appear_in_global_features = count_is_appear_in_global_features+1
#     print('-'*100)
#     break
    rule_eval_df = pd.DataFrame()
    rule_eval_df['coverage'] = cov_list
    rule_eval_df['confidence'] = conf_list
    rule_eval_df['lift'] = lift_list

    rule_eval_df.to_csv(dump_dataframe_dir+proj_name+'_'+instance_generation_mode+'_rule_eval_result.csv',index=False)

    print('total rules of which features overlap global features is',count_is_appear_in_global_features)
    
    print('finish rule evaluation of {} project, {} method'.format(proj_name, instance_generation_mode))

In [83]:
'''
    proj_name: 'openstack' or 'qt'
    instance_generation_mode: 'randompertubation' or 'crossoverinterpolation'
    explainer_type: 'rulefit'
'''
def eval_explainer(proj_name, instance_generation_mode, explainer_type='rulefit', top_k_global_feature_num=5):
    # prepare required data and variable
    x_train, y_train = prepare_data(proj_name, mode = 'train')

    global_model = pickle.load((open(proj_name+'_global_model.pkl','rb')))

    '''
        prediction result is in 'pred' column
        prediction prob of buggy commit is in 'defective_prob' column
        label is in 'defect' column
    '''
    correctly_predicted_result = pd.read_csv(dump_dataframe_dir+proj_name+'_correctly_predict_as_defective.csv')
    correctly_predicted_result = correctly_predicted_result.set_index('Unnamed: 0')

    dep = 'defect'
    indep = correctly_predicted_result.columns[:-3]
    class_label = ['clean', 'defect']

    feature_df = correctly_predicted_result.loc[:, indep]
    test_label = correctly_predicted_result.loc[:, dep]

    pyExp = PyExplainer(x_train, y_train, indep, dep, class_label, blackbox_model = global_model, categorical_features = ['self'])
    
    top_k_global_features = get_top_k_global_features(global_model, indep, top_k_global_feature_num = top_k_global_feature_num)
    
    py_obj_file_str_list = getpy_obj_file_str_list(proj_name, explainer_type, instance_generation_mode)


    # find cosine similarity and euclidean distance of test instance and generated instance
#     find_quality_of_generated_neighbors(proj_name, instance_generation_mode, correctly_predicted_result,
#                                         pyExp)
    
#     find_prob_of__global_and_local_model(proj_name, instance_generation_mode, 
#                                          global_model, py_obj_file_str_list)
    
    eval_rule_from_local_model(proj_name, instance_generation_mode, py_obj_file_str_list, top_k_global_features)

In [84]:
eval_explainer('openstack', 'crossoverinterpolation', explainer_type='rulefit', top_k_global_feature_num=5)

total rules of which features overlap global features is 213
finish rule evaluation of openstack project, crossoverinterpolation method


## global model explainer evaluation step

1. load global model
2. load prediction result
3. create pyExplainer object (for later use)
4. load object obained from pyExplainer
5. perform evaluation of each research question

### prepare required data and variable

In [18]:
x_train, y_train = prepare_data(proj_name, mode = 'train')

global_model = pickle.load((open(proj_name+'_global_model.pkl','rb')))

'''
    prediction result is in 'pred' column
    prediction prob of buggy commit is in 'defective_prob' column
    label is in 'defect' column
'''
correctly_predicted_result = pd.read_csv(dump_dataframe_dir+proj_name+'_correctly_predict_as_defective.csv')
correctly_predicted_result = correctly_predicted_result.set_index('Unnamed: 0')

dep = 'defect'
indep = correctly_predicted_result.columns[:-3]
class_label = ['clean', 'defect']

feature_df = correctly_predicted_result.loc[:, indep]
test_label = correctly_predicted_result.loc[:, dep]

pyExp = PyExplainer(x_train,
            y_train,
            indep,
            dep,
            class_label,
            blackbox_model = global_model,
            categorical_features = ['self'])

In [20]:
global_feature_df = pd.DataFrame()
global_feature_df['feature'] = indep
global_feature_df['importance'] = global_model.feature_importances_

global_feature_df = global_feature_df.sort_values(by='importance',ascending=False)
top_k_global_feature_num = 5
top_k_global_features = list(global_feature_df['feature'])[:top_k_global_feature_num]
display(global_feature_df)
print(top_k_global_features)

Unnamed: 0,feature,importance
0,la,0.194749
3,ent,0.155741
4,nrev,0.138389
9,app,0.104206
5,rtime,0.071951
10,rrexp,0.055987
12,rsawr,0.054935
8,age,0.054821
11,asawr,0.054157
7,ndev,0.050504


['la', 'ent', 'nrev', 'app', 'rtime']


In [12]:
display(correctly_predicted_result.head())

Unnamed: 0_level_0,la,nd,ns,ent,nrev,rtime,self,ndev,age,app,rrexp,asawr,rsawr,pred,defective_prob,defect
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
9703,190,7,1,0.83947,25.0,1311692.0,0,164.0,71983.0,2.0,1931.0,0.12234,0.361879,True,0.74,True
23348,121,2,1,0.758523,2.0,30882.0,0,11.0,430579.0,2.0,812.0,0.057416,0.873206,True,0.7,True
25135,226,3,1,0.705487,7.0,2001345.0,0,68.0,189694.333333,3.0,1476.0,0.007438,0.277138,True,0.54,True
15814,121,5,1,0.863629,7.0,1646302.0,0,4.0,685614.0,5.0,1364.0,0.197917,0.911458,True,0.65,True
1650,186,9,1,0.757325,10.0,1709877.0,0,161.0,204972.230769,2.0,538.0,0.122838,0.093364,True,0.6,True


In [13]:
# print(pyExp.generate_instance_crossover_interpolation)

### Find quality of generated neighbors

In [None]:
# boxplot can be found in plot_result.ipynb
# find_quality_of_generated_neighbors('randompertubation')
# find_quality_of_generated_neighbors('crossoverinterpolation')

finished randompertubation


### evaluate performance of model 
- faithfulness
- quality of explanation
- number of explanation

Note: start from RuleFit first

In [None]:
'''get pyExplainer object filename'''

all_pyExp_obj_name = os.listdir(pyExp_dir)
r = re.compile(proj_name+'_'+explainer_type+'_'+instance_generation_mode+'_*')
py_obj_file_str = list(filter(r.match, all_pyExp_obj_name))
print('total file:', len(py_obj_file_str))
# print(all_pyExp_obj_name[:10])

In [None]:
'''
    for each test commit
        get prediction of test commit from local model and global model
        get rules from local model
        get label of generated instances
        get number of rules that lead to defective commit (coef > 0)
        for each rule
            find which generated instances that the rule applies
            find confidence, coverage, lift
'''

In [203]:
# print(len(py_obj_file_str)) # 75

In [31]:
'''note: this code is for rulefit only, need more condition for AIX360 explainers'''

# for RQ2: prediction from global model vs prediction from local model
global_model_prob_list = []
local_model_prob_list = []

for obj_name in py_obj_file_str:
#     cov_list, conf_list, lift_list = [], [], [] 
    
    pyExp_obj = pickle.load(open(pyExp_dir+obj_name,'rb')) 
#     print(pyExp_obj.keys())
    
    local_model = pyExp_obj['local_model']
    synt_inst = pyExp_obj['synthetic_data']
    synt_label = pyExp_obj['synthetic_predictions']
    x_explain = pyExp_obj['X_explain']
#     top_k_positive_rules = pyExp_obj['top_k_positive_rules']
    
    # for RQ 2
    global_model_prob = float(global_model.predict_proba(x_explain)[:,1])
    local_model_prob = float(local_model.predict_proba(x_explain.values)[:,1])
    
    global_model_prob_list.append(global_model_prob)
    local_model_prob_list.append(local_model_prob)
    

In [33]:
global_prob_vs_local_prob = pd.DataFrame()
global_prob_vs_local_prob['global'] = global_model_prob_list
global_prob_vs_local_prob['local'] = local_model_prob_list
global_prob_vs_local_prob['label'] = [1]*len(global_pred_vs_local_prob)

display(global_prob_vs_local_prob)

global_prob_vs_local_prob.to_csv(dump_dataframe_dir+proj_name+'_'+instance_generation_mode+'_global_prob_vs_local_prob.csv',index=False)

Unnamed: 0,global,local,label
0,0.56,0.007189,1
1,0.69,0.011819,1
2,0.7,0.038561,1
3,0.54,0.001035,1
4,0.58,0.024598,1
5,0.57,0.001407,1
6,0.56,0.024775,1
7,0.56,0.03066,1
8,0.68,0.000719,1
9,0.74,0.000558,1


In [219]:
print(top_k_global_features) # just for ref

['la', 'ent', 'nrev', 'app', 'hcmt']


In [43]:
cov_list, conf_list, lift_list = [], [], []

count_is_appear_in_global_features = 0

for obj_name in py_obj_file_str:
    pyExp_obj = pickle.load(open(pyExp_dir+obj_name,'rb')) 
#     print(pyExp_obj.keys())
    
    local_model = pyExp_obj['local_model']
    synt_inst = pyExp_obj['synthetic_data']
    synt_label = pyExp_obj['synthetic_predictions']
    x_explain = pyExp_obj['X_explain']

    
    # for RQ3 (find coverage, confidence, lift)
    # but find the best defective rule for RQ5, too. :)
    rules = local_model.rule_ensemble.rules
    
    rule_df = local_model.get_rules()
    top_k = 5
    top_k_positive_rules = rule_df[(rule_df.coef > 0) & (rule_df.type=='rule')].sort_values("importance", ascending=False).head(top_k)
    
    the_best_defective_rule_str = list(top_k_positive_rules['rule'])[0]
    def_rule = get_the_best_rule_obj(rules,the_best_defective_rule_str)

#     display(top_k_positive_rules)
#     print(the_best_defective_rule_str)
#     print(def_rule)
#     print('-'*100)
    coverage, confidence, lift = eval_rule(def_rule, synt_inst, synt_label)
#     print(coverage, confidence, lift)
    cov_list.append(coverage)
    conf_list.append(confidence)
    lift_list.append(lift)
    
    is_appear_in_global_features = is_in_top_k_global_features(the_best_defective_rule_str)
    
    if is_appear_in_global_features:
        count_is_appear_in_global_features = count_is_appear_in_global_features+1
#     print('-'*100)
#     break

In [49]:
rule_eval_df = pd.DataFrame()
rule_eval_df['coverage'] = cov_list
rule_eval_df['confidence'] = conf_list
rule_eval_df['lift'] = lift_list

rule_eval_df.to_csv(dump_dataframe_dir+proj_name+'_'+instance_generation_mode+'_rule_eval_result.csv',index=False)
display(rule_eval_df.head())

Unnamed: 0,coverage,confidence,lift
0,0.028,1.0,29.411765
1,0.358,0.011173,2.793296
2,0.032,1.0,31.25
3,0.216,0.00463,4.62963
4,0.013,1.0,76.923077


In [46]:
print(count_is_appear_in_global_features/len(py_obj_file_str))

0.5454545454545454


In [208]:
# # split this loop from RQ2 (in case top-k global features change)

# cov_list, conf_list, lift_list = [], [], []

# for obj_name in py_obj_file_str:
#     pyExp_obj = pickle.load(open(pyExp_dir+obj_name,'rb')) 
# #     print(pyExp_obj.keys())
    
#     local_model = pyExp_obj['local_model']
#     synt_inst = pyExp_obj['synthetic_data']
#     synt_label = pyExp_obj['synthetic_predictions']
#     x_explain = pyExp_obj['X_explain']

    
#     # for RQ3 (find coverage, confidence, lift)
#     # but find the best defective rule for RQ5, too. :)
#     rules = local_model.rule_ensemble.rules
    
#     rule_df = local_model.get_rules()
#     top_k = 10
#     top_k_positive_rules = rule_df[rule_df.coef > 0].sort_values("importance", ascending=False).head(top_k)
    
#     the_best_defective_rule_str = list(top_k_positive_rules['rule'])[0]
#     def_rule = get_the_best_rule_obj(rules,the_best_defective_rule_str)

#     coverage, confidence, lift = eval_rule(rule, synt_inst, synt_label)
#     cov_list.append(coverage)
#     conf_list.append(confidence)
#     lift_list.append(lift)

    
# #     print('rule str is',the_best_defective_rule_str)
# #     print('found the rule', str(def_rule))
    
# #     if def_rule is not None:
# #         def_rule_found.append(def_rule)
# #     print(the_best_defective_rule)
    
# #     print(len(top_k_positive_rules))
# #     display(top_k_positive_rules)
# #     print(global_model_prob, local_model_prob)

# #     rule_df = local_model.get_rules()
    
# #     top_k = 3
# #     top_k_positive_rules = rule_df[rule_df.coef > 0].sort_values("importance", ascending=False).head(top_k)
# #     top_k_negative_rules = rule_df[rule_df.coef < 0].sort_values("importance", ascending=False).head(top_k)
        
# #     print('top-3 positive rules')
# #     display(top_k_positive_rules)
    
# #     print('top-3 negative rules')
# #     display(top_k_negative_rules)
# #     important_rule_df = rule_df[(rule_df['importance']>0) & 
# #                                 (rule_df['type']=='rule') &
# #                                 (rule_df['coef']>0)]
    
# #     display(important_rule_df)
# #     important_rule_list = list(important_rule_df['rule'])
# #     rule_dict = {r:'rule_'+str(idx+1) for idx, r in enumerate(important_rule_list)}
# #     rules = local_model.rule_ensemble.rules
    
# #     for j in range(0,len(rules)):
# #         # just focus on important rules
# #         rule = rules[j]

# #         if str(rule) not in important_rule_list:
# #             continue

# #         coverage, confidence, lift = eval_rule(rule, synt_inst, synt_label)
# #         cov.append(coverage)
# #         conf.append(confidence)
# #         lift_list.append(lift)
        
# #         print(coverage, confidence, lift)
            
# #         break
# #     display(important_rule_df)
#     break

rule str is rexp <= 0.37656036019325256 & nuc <= 2.812765955924988 & nd <= 0.7401773929595947
rule str is nf <= 1.5076102018356323 & ent > 0.0010973589378409088
rule str is rsexp <= 2.233006477355957 & asawr > -1.4663293957710266 & hcmt <= 5.109342336654663
rule str is rrexp > -1.157329261302948 & nf > -0.6784309148788452 & app > -0.6585927903652191
rule str is aexp <= 0.9041111171245575 & rtime <= 0.803420215845108 & nrev > 5.3014702796936035
rule str is aexp <= 0.36203552782535553 & aexp > -1.0507871508598328 & hcmt <= 1.0604354739189148
rule str is nrev > 3.9014196395874023 & rsexp <= 1.9689120650291443
rule str is ld > -1.084667146205902 & asawr > -1.3764382004737854 & rsawr <= 0.8949232995510101
rule str is rrexp > -0.7415623068809509 & nf > -0.8043252229690552 & app > -0.6723003685474396
rule str is ld <= 1.6012953519821167 & hcmt > -0.493151992559433
rule str is app <= 0.8605083525180817 & rexp > -0.4595395028591156
rule str is ndev > -0.3146034926176071 & asexp <= 1.46031069755

### prediction from global model vs prediction from local model

In [157]:
proj_name = 'qt' # ['openstack','qt']
instance_generation_mode = 'randompertubation' # ['randompertubation','crossoverinterpolation']
explainer_type = 'rulefit' # ['rulefit', LRR', 'BRCG']

all_pyExp_obj_name = os.listdir(pyExp_dir)
r = re.compile(proj_name+'_'+explainer_type+'_'+instance_generation_mode+'_*')
py_obj_file_str = list(filter(r.match, all_pyExp_obj_name))
print('total file:', len(py_obj_file_str))


# global_model_prob_list = []
# local_model_prob_list = []
r2_list = []
f1_list = []
precision_list = []
recall_list = []
bal_acc_list = []

for obj_name in py_obj_file_str:
    
    pyExp_obj = pickle.load(open(pyExp_dir+obj_name,'rb')) 
#     print(pyExp_obj.keys())
    
    local_model = pyExp_obj['local_model']
    synt_inst = pyExp_obj['synthetic_data']
    synt_label = pyExp_obj['synthetic_predictions']
    synt_label = np.array([1 if val else 0 for val in synt_label])
    pred = local_model.predict(synt_inst.values)
    pred = np.array([1 if val else 0 for val in pred])
#     label = np.array([1]*len(synt_pred))
    x_explain = pyExp_obj['X_explain']
    
#     global_model_prob = float(global_model.predict_proba(x_explain)[:,1])
#     local_model_prob = float(local_model.predict_proba(x_explain.values)[:,1])
    
#     global_model_prob_list.append(global_model_prob)
#     local_model_prob_list.append(local_model_prob)
    
    r2 = r2_score(synt_label, pred)
    precision, recall, f1, _ = precision_recall_fscore_support(synt_label, pred, average='binary')
    bal_acc = balanced_accuracy_score(synt_label, pred)
    
    r2_list.append(r2)
    f1_list.append(f1)
    precision_list.append(precision)
    recall_list.append(recall)
    bal_acc_list.append(bal_acc)
#     print(precision, recall, f1)
#     break

eval_df = pd.DataFrame()
eval_df['precision'] = precision_list
eval_df['recall'] = recall_list
eval_df['f1'] = f1_list
eval_df['balanced_acc'] = bal_acc_list
eval_df['R2'] = r2_list

total file: 75


In [158]:
eval_df.to_csv(result_dir+proj_name+'_'+instance_generation_mode+'_'+explainer_type+'_explainer_eval.csv',index=False)

In [134]:
# pickle.dump(r2_list, open(other_object_dir+proj_name+'_'+instance_generation_mode+'_'+explainer_type+'_r2_list.pkl','wb'))