### In this notebook, we investigate local explanations for predictions in four different categories: false positive, false negative, true positive, and true negative.

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import eli5
FEATURES = ['query_num_of_columns', 'query_num_of_rows', 'query_row_column_ratio', 'query_max_skewness', 
            'query_max_kurtosis', 'query_max_unique', 'candidate_num_rows', 'candidate_row_column_ratio', 
            'candidate_max_skewness', 'candidate_max_kurtosis', 'candidate_max_unique', 'query_target_max_pearson', 
            'query_target_max_spearman', 'query_target_max_covariance', 'query_target_max_mutual_info', 
            'candidate_target_max_pearson', 'candidate_target_max_spearman', 'candidate_target_max_covariance', 
            'candidate_target_max_mutual_info', 'max_pearson_difference', 'containment_fraction']

In [3]:
dataset_2 = pd.read_csv('training-simplified-data-generation.csv')
dataset_2['class'] = [1 if row['gain_in_r2_score'] > 0 else -1 for index, row in dataset_2.iterrows()]
college = pd.read_csv('college-debt-records-features-single-column-w-class')
college['class'] = [1 if row['gain_in_r2_score'] > 0 else -1 for index, row in college.iterrows()]

In [5]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(dataset_2[FEATURES], dataset_2['class'])
preds = rf.predict(college[FEATURES])
college['pred'] = preds
false_positive = college.loc[(college['class'] == -1) & (college['pred'] == 1)]
false_negative = college.loc[(college['class'] == 1) & (college['pred'] == -1)]
true_positive = college.loc[(college['class'] == 1) & (college['pred'] == 1)]
true_negative = college.loc[(college['class'] == -1) & (college['pred'] == -1)]

In [6]:
false_positive.shape

(627, 40)

In [7]:
false_negative.shape

(5, 40)

In [8]:
true_positive.shape

(125, 40)

In [9]:
true_negative.shape

(346, 40)

In [35]:
sample_fp = false_positive.sample(2)
sample_tp = true_positive.sample(2)
sample_fn = false_negative.sample(2)
sample_tn = true_negative.sample(2)

In [36]:
sample_fp.shape

(2, 40)

In [37]:
sample_fp

Unnamed: 0,query,target,candidate,query_num_of_columns,query_num_of_rows,query_row_column_ratio,query_max_mean,query_max_outlier_percentage,query_max_skewness,query_max_kurtosis,...,decrease_in_mse,decrease_in_medae,gain_in_r2_score,r2_score_before,r2_score_after,class,p(gain),p(loss),eval,pred
1044,/Users/fchirigati/projects/dataset-ranking/use...,DEBT_EARNINGS_RATIO,/Users/fchirigati/projects/dataset-ranking/use...,12.0,4990.0,415.833333,3141.540889,0.009218,5.838487,50.966273,...,0.005377,-0.002182,-0.008213,0.395656,0.392406,-1,0.7,0.3,fp,1
182,/Users/fchirigati/projects/dataset-ranking/use...,DEBT_EARNINGS_RATIO,/Users/fchirigati/projects/dataset-ranking/use...,12.0,4990.0,415.833333,3141.540889,0.009218,5.838487,50.966273,...,0.000992,-0.000496,-0.001515,0.395656,0.395057,-1,0.86,0.14,fp,1


In [20]:
no_bias = lambda feature_name, feature_value: feature_name != '<BIAS>'
eli5.show_prediction(rf, sample_fp.iloc[0][FEATURES], 
                     feature_names=FEATURES, 
                     show_feature_values=True, feature_filter=no_bias)

Contribution?,Feature,Value
0.089,max_pearson_difference,0.46
0.026,query_max_unique,3512.0
0.014,candidate_max_unique,99.0
0.013,query_target_max_mutual_info,0.668
0.007,candidate_max_kurtosis,1.31
0.007,candidate_target_max_mutual_info,1.551
0.004,query_target_max_spearman,0.457
0.003,containment_fraction,0.0
-0.001,query_num_of_rows,4990.0
-0.004,query_row_column_ratio,415.833


In [22]:
eli5.show_prediction(rf, sample_fp.iloc[1][FEATURES], 
                     feature_names=FEATURES, 
                     show_feature_values=True, feature_filter=no_bias)

Contribution?,Feature,Value
0.094,max_pearson_difference,0.268
0.043,query_row_column_ratio,415.833
0.026,candidate_target_max_spearman,0.189
0.019,query_num_of_rows,4990.0
0.018,query_max_unique,3512.0
0.017,query_target_max_covariance,23744.186
0.012,candidate_target_max_pearson,0.191
0.01,candidate_max_kurtosis,1.326
0.01,candidate_max_skewness,1.117
0.01,candidate_num_rows,7703.0


#### It looks like max_pearson_difference was the most responsible feature in the misclassification of these two samples (false positives). Now let's see the explanations for the true positives.

In [23]:
eli5.show_prediction(rf, sample_tp.iloc[0][FEATURES], 
                     feature_names=FEATURES, 
                     show_feature_values=True, feature_filter=no_bias)

Contribution?,Feature,Value
0.117,max_pearson_difference,0.237
0.044,candidate_target_max_pearson,0.222
0.03,query_row_column_ratio,415.833
0.026,query_max_unique,3512.0
0.025,candidate_max_skewness,0.003
0.021,candidate_target_max_covariance,1.311
0.02,candidate_target_max_spearman,0.18
0.009,query_num_of_rows,4990.0
0.009,candidate_row_column_ratio,7703.0
0.005,candidate_max_unique,4458.0


In [24]:
eli5.show_prediction(rf, sample_tp.iloc[1][FEATURES], 
                     feature_names=FEATURES, 
                     show_feature_values=True, feature_filter=no_bias)

Contribution?,Feature,Value
0.104,max_pearson_difference,0.454
0.04,query_max_unique,3512.0
0.027,query_target_max_mutual_info,0.668
0.019,query_row_column_ratio,415.833
0.017,candidate_max_kurtosis,0.68
0.011,query_max_skewness,5.838
0.01,query_num_of_rows,4990.0
0.005,candidate_num_rows,7703.0
0.005,containment_fraction,0.924
0.004,candidate_target_max_spearman,0.065


In [41]:
eli5.show_prediction(rf, sample_fn.iloc[0][FEATURES], 
                     feature_names=FEATURES, 
                     show_feature_values=True, feature_filter=no_bias)

Contribution?,Feature,Value
0.046,candidate_target_max_covariance,0.001
0.036,candidate_target_max_spearman,0.027
0.036,candidate_max_unique,4.0
0.035,candidate_target_max_pearson,0.004
0.029,query_num_of_columns,12.0
0.029,query_target_max_spearman,0.457
0.024,candidate_max_kurtosis,22.352
0.02,query_row_column_ratio,415.833
0.015,query_max_kurtosis,50.966
0.015,query_num_of_rows,4990.0


In [39]:
eli5.show_prediction(rf, sample_fn.iloc[1][FEATURES], 
                     feature_names=FEATURES, 
                     show_feature_values=True, feature_filter=no_bias)

Contribution?,Feature,Value
0.034,candidate_max_unique,3.0
0.033,candidate_target_max_covariance,0.0
0.028,candidate_target_max_pearson,0.001
0.028,query_num_of_columns,12.0
0.028,candidate_target_max_spearman,0.025
0.018,query_target_max_spearman,0.457
0.016,query_target_max_covariance,23744.186
0.016,query_num_of_rows,4990.0
0.013,containment_fraction,0.002
0.013,query_max_kurtosis,50.966


In [42]:
eli5.show_prediction(rf, sample_tn.iloc[0][FEATURES], 
                     feature_names=FEATURES, 
                     show_feature_values=True, feature_filter=no_bias)

Contribution?,Feature,Value
0.091,candidate_target_max_spearman,0.0
0.022,query_num_of_columns,12.0
0.02,query_target_max_spearman,0.457
0.02,candidate_row_column_ratio,1242.0
0.018,candidate_target_max_covariance,0.0
0.017,candidate_target_max_pearson,0.0
0.015,candidate_max_skewness,11.208
0.013,candidate_max_kurtosis,148.533
0.01,query_max_kurtosis,50.966
0.006,candidate_num_rows,1242.0


In [43]:
eli5.show_prediction(rf, sample_tn.iloc[1][FEATURES], 
                     feature_names=FEATURES, 
                     show_feature_values=True, feature_filter=no_bias)

Contribution?,Feature,Value
0.091,candidate_target_max_spearman,0.0
0.031,candidate_target_max_covariance,0.0
0.028,candidate_max_skewness,13.035
0.02,query_target_max_spearman,0.457
0.019,query_num_of_columns,12.0
0.018,candidate_target_max_pearson,0.0
0.016,candidate_row_column_ratio,4288.0
0.014,query_target_max_pearson,0.46
0.013,query_target_max_mutual_info,0.668
0.008,query_max_kurtosis,50.966


In [46]:
eli5.show_weights(rf, feature_names=FEATURES, target_names=[0,1])

Weight,Feature
0.0791  ± 0.0347,candidate_target_max_spearman
0.0724  ± 0.0311,max_pearson_difference
0.0724  ± 0.0365,candidate_target_max_pearson
0.0627  ± 0.0204,candidate_target_max_covariance
0.0621  ± 0.0164,query_row_column_ratio
0.0571  ± 0.0121,candidate_max_skewness
0.0555  ± 0.0131,candidate_max_kurtosis
0.0530  ± 0.0146,query_max_kurtosis
0.0526  ± 0.0124,query_num_of_columns
0.0492  ± 0.0130,query_target_max_covariance


In [48]:
synth_test = pd.read_csv('test-simplified-data-generation.csv')
synth_test['class'] = [1 if row['gain_in_r2_score'] > 0 else -1 for index, row in synth_test.iterrows()]
preds = rf.predict(synth_test[FEATURES])
synth_test['pred'] = preds
false_positive = synth_test.loc[(synth_test['class'] == -1) & (synth_test['pred'] == 1)]
false_negative = synth_test.loc[(synth_test['class'] == 1) & (synth_test['pred'] == -1)]
true_positive = synth_test.loc[(synth_test['class'] == 1) & (synth_test['pred'] == 1)]
true_negative = synth_test.loc[(synth_test['class'] == -1) & (synth_test['pred'] == -1)]