# Model fidelity
## Ranking data: knn_ranking_v2
Compare rec result of black-box vs explainable: EBM/ DPEBM vs FM models 

In [1]:
import numpy as np
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Run this for reproduce
# Gets the current working directory
import os
cwd = os.getcwd()
print("Working directory:", cwd)
# Go up one directory from working directory
os.chdir("..")

Working directory: /home/jovyan/1_UT THESIS/CB12_MAIN/model_fidelity


# Load black-box rec result

In [4]:
rec20_logreg = pd.read_csv('./output_topN_tabular/rec_result_logreg_ranking_v2.csv')
rec20_dt = pd.read_csv('./output_topN_tabular/rec_result_dt_ranking_v2.csv')
rec20_xgbt = pd.read_csv('./output_topN_tabular/rec_result_xgbt_ranking_v2.csv')
rec20_nb = pd.read_csv('./output_topN_tabular/rec_result_nb_ranking_v2.csv')
rec20_ada = pd.read_csv('./output_topN_tabular/rec_result_ada_ranking_v2.csv')
rec20_lda = pd.read_csv('./output_topN_tabular/rec_result_lda_ranking_v2.csv')

In [5]:
rec20_qda = pd.read_csv('./output_topN_tabular/rec_result_qda_ranking_v2.csv')

In [6]:
evaluation_knn_tfidf = pd.read_csv('./nb_recsys_tabular/evaluation_knn_tfidf_v2.csv')

# Load EBM rec result

In [7]:
rec20_ebm_location = pd.read_csv('./output_topN_ebm/rec_result_location_v2.csv')
rec20_ebm_extended = pd.read_csv('./output_topN_ebm/rec_result_extended_v2.csv')
rec20_ebm_side_info = pd.read_csv('./output_topN_ebm/rec_result_side_info_v2.csv')

In [8]:
evaluation_knn_lda = pd.read_csv('./nb_recsys_ebm/ranking_data_knn_lda_v2.csv')

# Load FM rec result

In [9]:
rec20_fm = pd.read_csv('./output_topN_myfm/rec20_fm_v2.csv')
rec20_fm_match = pd.read_csv('./output_topN_myfm/rec20_fm_v2.csv')

In [10]:
rec20_fm_side_info = pd.read_csv('./output_topN_myfm/rec20_fm_side_info_v2.csv')
rec20_fm_extended = pd.read_csv('./output_topN_myfm/rec20_fm_extended_v2.csv')

# Load DPEBM rec result

In [11]:
rec20_dpebm_location = pd.read_csv('./output_topN_ebm/rec_result_dpebm_location_v2.csv')
rec20_dpebm_extended = pd.read_csv('./output_topN_ebm/rec_result_dpebm_extended_v2.csv')
rec20_dpebm_side_info = pd.read_csv('./output_topN_ebm/rec_result_dpebm_side_info_v2.csv')

# Function for model fidelity

In [12]:
user_ids_tfidf = list(evaluation_knn_tfidf.groupby('UserID').UserID.unique().astype('int'))

In [13]:
user_ids_lda = list(evaluation_knn_lda.groupby('UserID').UserID.unique().astype('int'))

In [14]:
len(user_ids_tfidf) - len(user_ids_lda)

25

In [15]:
final_users = set(user_ids_tfidf) & set(user_ids_lda)

In [16]:
len(final_users)

3691

In [17]:
final_users = list(final_users)

In [18]:
u_id = final_users[0]
u_id

876546

In [19]:
source_rec = rec20_logreg[rec20_logreg.UserID==u_id].JobID.values
xai_rec = rec20_ebm_extended[rec20_ebm_extended.UserID==u_id].JobID.values

In [20]:
source_rec

array([ 543031,   67239,    6164,  517422,  419313,  767935,  278301,
        168362,  699278, 1048645,  253056,  791786,  965826,  357736,
        908477,   76585,  161066,  634722,  934005,   70068])

In [21]:
xai_rec

array([ 705799,   11246,  176089,  878808,  554533,  792168,  274070,
        970076,   16341,  133447,  382523,   49926,  162485,  320457,
        338269, 1060090,  205210,  562713,  683191,  531223])

In [22]:
fidelity = set(source_rec) & set(xai_rec)

In [23]:
fidelity_rate = len(fidelity)/len(source_rec)

In [24]:
fidelity_rate

0.0

In [25]:
def get_fidelity_rate(source_rec, xai_rec, u_id):
    source_rec = source_rec[source_rec.UserID==u_id].JobID.values
    xai_rec = xai_rec[xai_rec.UserID==u_id].JobID.values
    fidelity = set(source_rec) & set(xai_rec)
    fidelity_rate = len(fidelity)/len(source_rec)
    return fidelity_rate

In [26]:
# Test function
fidelity_all = []
for u_id in final_users[:5]:
    print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec = rec20_logreg, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

Evaluation users:  876546
Fidelity rate for this user: 0.0
Evaluation users:  1359874
Fidelity rate for this user: 0.0
Evaluation users:  1302532
Fidelity rate for this user: 0.0
Evaluation users:  819206
Fidelity rate for this user: 0.0
Evaluation users:  57352
Fidelity rate for this user: 0.0


In [27]:
# Test function
fidelity_all = []
for u_id in final_users[:5]:
    print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_extended, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

Evaluation users:  876546
Fidelity rate for this user: 0.35
Evaluation users:  1359874
Fidelity rate for this user: 0.2
Evaluation users:  1302532
Fidelity rate for this user: 0.35
Evaluation users:  819206
Fidelity rate for this user: 0.7
Evaluation users:  57352
Fidelity rate for this user: 0.35


In [28]:
source_recs = []
xai_recs = []
avg_fidelity = []

# EBM models vs FM models

## ebm_extended vs. fm_extended

In [29]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_extended, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.38 s, sys: 0 ns, total: 2.38 s
Wall time: 2.38 s


In [30]:
# Average fidelity
fidelity_extended = np.mean(fidelity_all)
print(fidelity_extended)

0.46327553508534275


In [31]:
source_rec =  'fm_extended'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_extended)

## ebm_side_info vs. fm_side_info

In [32]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_side_info, 
                                      xai_rec = rec20_ebm_side_info, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.32 s, sys: 0 ns, total: 2.32 s
Wall time: 2.32 s


In [33]:
# Average fidelity
fidelity_side_info = np.mean(fidelity_all)
print(fidelity_side_info)

0.31150094825250607


In [34]:
source_rec =  'fm_side_info'
xai_rec = 'ebm_side_info'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_side_info)

## ebm location vs. fm_location

In [35]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_match, 
                                      xai_rec = rec20_ebm_location, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 3.16 s, sys: 0 ns, total: 3.16 s
Wall time: 3.15 s


In [36]:
# Average fidelity
fidelity_location = np.mean(fidelity_all)
print(fidelity_location)

0.21784069357897587


In [37]:
source_rec =  'fm_match'
xai_rec = 'ebm_location'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_location)

# DPEBM models vs. FM models

## dpebm_extended vs. fm_extended

In [38]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_extended, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 3.06 s, sys: 0 ns, total: 3.06 s
Wall time: 3.05 s


In [39]:
# Average fidelity
fidelity_dpebm_extended = np.mean(fidelity_all)
print(fidelity_dpebm_extended)

0.37900298022216206


In [40]:
source_rec =  'fm_extended'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_dpebm_extended)

## dpebm_side_info vs. fm_side_info

In [41]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_side_info, 
                                      xai_rec = rec20_dpebm_side_info, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.65 s, sys: 0 ns, total: 2.65 s
Wall time: 2.65 s


In [42]:
# Average fidelity
fidelity_dpebm_side_info = np.mean(fidelity_all)
print(fidelity_dpebm_side_info)

0.21216472500677322


In [43]:
source_rec =  'fm_side_info'
xai_rec = 'dpebm_side_info'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_dpebm_side_info)

## dpebm_side_location vs. fm_location (fm_match)

In [44]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_match, 
                                      xai_rec = rec20_dpebm_location, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.74 s, sys: 2.71 ms, total: 2.74 s
Wall time: 2.73 s


In [45]:
# Average fidelity
fidelity_dpebm_location = np.mean(fidelity_all)
print(fidelity_dpebm_location)

0.21782714711460308


In [46]:
source_rec =  'fm_match'
xai_rec = 'dpebm_location'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_dpebm_location)

# ebm_extended vs. black-box models

## ebm_extended vs. xgbt

In [47]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_xgbt, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

# Average fidelity
fidelity_xgbt = np.mean(fidelity_all)
print(fidelity_xgbt)

0.011528041181251692
CPU times: user 2.42 s, sys: 1.86 ms, total: 2.42 s
Wall time: 2.41 s


In [48]:
source_rec =  'xgbt'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_xgbt)

## ebm_extended vs. ada

In [49]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_ada, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

# Average fidelity
fidelity_ada = np.mean(fidelity_all)
print(fidelity_ada)

0.011284204822541319
CPU times: user 2.34 s, sys: 0 ns, total: 2.34 s
Wall time: 2.33 s


In [50]:
source_rec =  'ada'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_ada)

## ebm_extended vs. lda

In [51]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_lda, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)
    
# Average fidelity
fidelity_lda = np.mean(fidelity_all)
print(fidelity_lda)

0.011379030073150907
CPU times: user 2.33 s, sys: 0 ns, total: 2.33 s
Wall time: 2.32 s


In [52]:
source_rec =  'lda'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_lda)

## ebm_extended vs. qda

In [53]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_qda, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)
    
# Average fidelity
fidelity_qda = np.mean(fidelity_all)
print(fidelity_qda)

0.01009211595773503
CPU times: user 3.17 s, sys: 4.16 ms, total: 3.17 s
Wall time: 3.16 s


In [54]:
source_rec =  'qda'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_qda)

# dpebm_extended vs. black-box models

In [55]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_xgbt, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

    # Average fidelity
fidelity_xgbt_dpebm = np.mean(fidelity_all)
print(fidelity_xgbt_dpebm)

0.012083446220536443
CPU times: user 2.4 s, sys: 13.4 ms, total: 2.41 s
Wall time: 2.4 s


In [56]:
source_rec =  'xgbt'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_xgbt_dpebm)

In [57]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_ada, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

    # Average fidelity
fidelity_ada_dpebm = np.mean(fidelity_all)
print(fidelity_ada_dpebm)

0.010796532105120563
CPU times: user 2.35 s, sys: 1.94 ms, total: 2.35 s
Wall time: 2.35 s


In [58]:
source_rec =  'ada'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_ada_dpebm)

In [59]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_lda, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

    # Average fidelity
fidelity_lda_dpebm = np.mean(fidelity_all)
print(fidelity_lda_dpebm)

0.010755892712002169
CPU times: user 2.3 s, sys: 0 ns, total: 2.3 s
Wall time: 2.3 s


In [60]:
source_rec =  'lda'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_lda_dpebm)

In [61]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_qda, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

    # Average fidelity
fidelity_qda_dpebm = np.mean(fidelity_all)
print(fidelity_qda_dpebm)

0.009658629097805474
CPU times: user 2.32 s, sys: 5.89 ms, total: 2.33 s
Wall time: 2.32 s


In [62]:
source_rec =  'qda'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_qda_dpebm)

# Summary

In [63]:
len(source_recs), len(xai_recs), len(avg_fidelity)

(14, 14, 14)

In [64]:
fidelity_df = pd.DataFrame(columns = ["source_rec","xai_rec", "avg_fidelity"])
fidelity_df.source_rec = source_recs
fidelity_df.xai_rec = xai_recs
fidelity_df.avg_fidelity = avg_fidelity

In [65]:
fidelity_df

Unnamed: 0,source_rec,xai_rec,avg_fidelity
0,fm_extended,ebm_extended,0.463276
1,fm_side_info,ebm_side_info,0.311501
2,fm_match,ebm_location,0.217841
3,fm_extended,dpebm_extended,0.379003
4,fm_side_info,dpebm_side_info,0.212165
5,fm_match,dpebm_location,0.217827
6,xgbt,ebm_extended,0.011528
7,ada,ebm_extended,0.011284
8,lda,ebm_extended,0.011379
9,qda,ebm_extended,0.010092


In [66]:
fidelity_df.to_csv('./model_fidelity/fidelity_ranking_knn.csv', header=True, index=False)