# Model fidelity
## Ranking data: random ranking data
Compare rec result of black-box vs explainable: EBM/ DPEBM vs FM models 

In [1]:
import numpy as np
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Run this for reproduce
# Gets the current working directory
import os
cwd = os.getcwd()
print("Working directory:", cwd)
# Go up one directory from working directory
os.chdir("..")

Working directory: /home/jovyan/1_UT THESIS/CB12_MAIN/model_fidelity


# Load black-box rec result

In [4]:
rec20_logreg = pd.read_csv('./output_topN_tfidf_vector/rec20_logreg_tfidf_random_ranking.csv')
rec20_xgbt = pd.read_csv('./output_topN_tfidf_vector/rec20_xgbt_tfidf_random_ranking.csv')
rec20_ada = pd.read_csv('./output_topN_tfidf_vector/rec20_ada_tfidf_random_ranking.csv')
rec20_lda = pd.read_csv('./output_topN_tfidf_vector/rec20_lda_tfidf_random_ranking.csv')
rec20_qda = pd.read_csv('./output_topN_tfidf_vector/rec20_qda_tfidf_random_ranking.csv')

In [5]:
len(rec20_logreg), len(rec20_xgbt), len(rec20_ada), len(rec20_lda), len(rec20_qda)

(74320, 74320, 74320, 74320, 74320)

In [6]:
# evaluation_knn_tfidf = pd.read_csv('./nb_recsys_tabular/evaluation_knn_tfidf_v2.csv')

# Load EBM rec result

In [7]:
rec20_ebm_location = pd.read_csv('./output_topN_ebm/rec_result_location_v3.csv')
rec20_ebm_extended = pd.read_csv('./output_topN_ebm/rec_result_ebm_extended_v3.csv')
rec20_ebm_side_info = pd.read_csv('./output_topN_ebm/rec_result_side_info_v3.csv')

In [8]:
len(rec20_ebm_location), len(rec20_ebm_extended), len(rec20_ebm_side_info)

(74320, 74320, 74320)

# Load FM rec result

In [9]:
rec20_fm = pd.read_csv('./output_topN_myfm/rec20_fm_v3.csv')
rec20_fm_match = pd.read_csv('./output_topN_myfm/rec20_fm_v3.csv')
rec20_fm_side_info = pd.read_csv('./output_topN_myfm/rec20_fm_side_info_v3.csv')
rec20_fm_extended = pd.read_csv('./output_topN_myfm/rec20_fm_extended_v3.csv')

In [10]:
len(rec20_fm), len(rec20_fm_match), len(rec20_fm_side_info), len(rec20_fm_extended)

(74320, 74320, 73820, 73820)

# Load DPEBM rec result

In [11]:
rec20_dpebm_location = pd.read_csv('./output_topN_ebm/rec_result_dpebm_location_v3.csv')
rec20_dpebm_side_info = pd.read_csv('./output_topN_ebm/rec_result_dpebm_side_info_v3.csv')

In [12]:
rec20_dpebm_extended = pd.read_csv('./output_topN_ebm/rec_result_dpebm_extended_v3.csv')
# len(rec20_dpebm_location), len(rec20_dpebm_side_info), len(rec20_dpebm_extended)

In [13]:
len(rec20_dpebm_location), len(rec20_dpebm_side_info)

(74320, 74320)

# Function for model fidelity

In [14]:
# Load ranking random data
ranking_data_random = pd.read_csv('./nb_recsys_myfm/ranking_data_random.csv')

In [15]:
len(rec20_fm_side_info.UserID.unique())

3691

In [16]:
fm_extended_users = rec20_fm_extended.UserID.unique()

In [17]:
test_users = ranking_data_random.UserID.unique()

In [18]:
final_users = set(fm_extended_users) & set(test_users)

In [19]:
# final_users = ranking_data_random.UserID.unique()

In [20]:
len(final_users)

3691

In [21]:
final_users = list(final_users)

In [22]:
u_id = final_users[0]
u_id

876546

In [23]:
source_rec = rec20_logreg[rec20_logreg.UserID==u_id].JobID.values
xai_rec = rec20_ebm_extended[rec20_ebm_extended.UserID==u_id].JobID.values

In [24]:
source_rec

array([584459, 821090, 451671, 562084,  15909, 993166, 662374, 915274,
       348399, 948182, 342091, 536522, 953308,   6676, 582285, 795140,
       197354, 693077, 758425, 164399])

In [25]:
xai_rec

array([584459, 777681, 465805, 502462, 789667, 877454, 662374, 276830,
       826936, 150700, 342091, 443744, 764131, 164399, 849895, 466519,
        54279, 451671, 299134, 296122])

In [26]:
fidelity = set(source_rec) & set(xai_rec)

In [27]:
fidelity_rate = len(fidelity)/len(source_rec)

In [28]:
fidelity_rate

0.25

In [29]:
def get_fidelity_rate(source_rec, xai_rec, u_id):
    source_rec = source_rec[source_rec.UserID==u_id].JobID.values
    xai_rec = xai_rec[xai_rec.UserID==u_id].JobID.values
    fidelity = set(source_rec) & set(xai_rec)
    fidelity_rate = len(fidelity)/len(source_rec)
    return fidelity_rate

In [30]:
# Test function
fidelity_all = []
for u_id in final_users[:5]:
    print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec = rec20_logreg, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

Evaluation users:  876546
Fidelity rate for this user: 0.25
Evaluation users:  1359874
Fidelity rate for this user: 0.35
Evaluation users:  1302532
Fidelity rate for this user: 0.1
Evaluation users:  819206
Fidelity rate for this user: 0.25
Evaluation users:  57352
Fidelity rate for this user: 0.2


In [31]:
# Test function
fidelity_all = []
for u_id in final_users[:5]:
    print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_extended, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

Evaluation users:  876546
Fidelity rate for this user: 0.55
Evaluation users:  1359874
Fidelity rate for this user: 0.35
Evaluation users:  1302532
Fidelity rate for this user: 0.5
Evaluation users:  819206
Fidelity rate for this user: 0.7
Evaluation users:  57352
Fidelity rate for this user: 0.7


In [32]:
source_recs = []
xai_recs = []
avg_fidelity = []

# EBM models vs FM models

## ebm_extended vs. fm_extended

In [33]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_extended, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 3.89 s, sys: 0 ns, total: 3.89 s
Wall time: 3.88 s


In [34]:
# # Test function
# fidelity_all = []
# for u_id in final_users[:5]:
#     print('Evaluation users: ', u_id)
#     fidelity_rate = get_fidelity_rate(source_rec = rec20_fm_extended, 
#                                       xai_rec = rec20_ebm_extended, 
#                                       u_id = u_id)
#     print('Fidelity rate for this user:', fidelity_rate)
#     fidelity_all.append(fidelity_rate)

In [35]:
# def get_fidelity_rate_detail(source_rec, xai_rec, u_id):
#     source_rec = source_rec[source_rec.UserID==u_id].JobID.values
#     xai_rec = xai_rec[xai_rec.UserID==u_id].JobID.values
#     print('source_rec:', source_rec)
#     print('xai_rec:', xai_rec)
#     fidelity = set(source_rec) & set(xai_rec)
#     fidelity_rate = len(fidelity)/len(source_rec)
#     return fidelity_rate

In [36]:
# get_fidelity_rate(source_rec = rec20_fm_extended, 
#                   xai_rec = rec20_ebm_extended, 
#                   u_id = 13)

In [37]:
# Average fidelity
fidelity_extended = np.mean(fidelity_all)
print(fidelity_extended)

0.5439311839609862


In [38]:
source_rec =  'fm_extended'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_extended)

## ebm_side_info vs. fm_side_info

In [39]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_side_info, 
                                      xai_rec = rec20_ebm_side_info, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 3.05 s, sys: 0 ns, total: 3.05 s
Wall time: 3.04 s


In [40]:
# Average fidelity
fidelity_side_info = np.mean(fidelity_all)
print(fidelity_side_info)

0.5426713627743159


In [41]:
source_rec =  'fm_side_info'
xai_rec = 'ebm_side_info'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_side_info)

## ebm location vs. fm_location

In [42]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_match, 
                                      xai_rec = rec20_ebm_location, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.05 s, sys: 0 ns, total: 2.05 s
Wall time: 2.05 s


In [43]:
# Average fidelity
fidelity_location = np.mean(fidelity_all)
print(fidelity_location)

0.39758872934164186


In [44]:
source_rec =  'fm_match'
xai_rec = 'ebm_location'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_location)

### Check if calculating on all test users

In [45]:
%%time
fidelity_all = []
for u_id in test_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_match, 
                                      xai_rec = rec20_ebm_location, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.05 s, sys: 0 ns, total: 2.05 s
Wall time: 2.05 s


In [46]:
# Average fidelity
fidelity_location = np.mean(fidelity_all)
print(fidelity_location)

0.3975914962325081


# DPEBM models vs. FM models

## dpebm_extended vs. fm_extended

In [47]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_extended, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.04 s, sys: 0 ns, total: 2.04 s
Wall time: 2.04 s


In [48]:
# Average fidelity
fidelity_dpebm_extended = np.mean(fidelity_all)
print(fidelity_dpebm_extended)

0.24497426171769168


In [49]:
source_rec =  'fm_extended'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_dpebm_extended)

## dpebm_side_info vs. fm_side_info

In [50]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_side_info, 
                                      xai_rec = rec20_dpebm_side_info, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.04 s, sys: 0 ns, total: 2.04 s
Wall time: 2.04 s


In [51]:
# Average fidelity
fidelity_dpebm_side_info = np.mean(fidelity_all)
print(fidelity_dpebm_side_info)

0.23091303169872662


In [52]:
source_rec =  'fm_side_info'
xai_rec = 'dpebm_side_info'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_dpebm_side_info)

## dpebm_side_location vs. fm_location (fm_match)

In [53]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_fm_match, 
                                      xai_rec = rec20_dpebm_location, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

CPU times: user 2.04 s, sys: 0 ns, total: 2.04 s
Wall time: 2.04 s


In [54]:
# Average fidelity
fidelity_dpebm_location = np.mean(fidelity_all)
print(fidelity_dpebm_location)

0.39758872934164186


In [55]:
source_rec =  'fm_match'
xai_rec = 'dpebm_location'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_dpebm_location)

# ebm_extended vs. black-box models

## ebm_extended vs. xgbt

In [56]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_xgbt, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

# Average fidelity
fidelity_xgbt = np.mean(fidelity_all)
print(fidelity_xgbt)

0.24123543755079924
CPU times: user 2.06 s, sys: 0 ns, total: 2.06 s
Wall time: 2.06 s


In [57]:
source_rec =  'xgbt'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_xgbt)

In [58]:
%%time
# Check on whole list of test users
fidelity_all = []
for u_id in test_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_xgbt, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

# Average fidelity
fidelity_xgbt = np.mean(fidelity_all)
print(fidelity_xgbt)

0.24132131324004305
CPU times: user 2.06 s, sys: 0 ns, total: 2.06 s
Wall time: 2.06 s


## ebm_extended vs. ada

In [59]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_ada, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

# Average fidelity
fidelity_ada = np.mean(fidelity_all)
print(fidelity_ada)

0.22963966404768352
CPU times: user 2.04 s, sys: 3.42 ms, total: 2.04 s
Wall time: 2.04 s


In [60]:
source_rec =  'ada'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_ada)

In [61]:
%%time
# Check on whole list of test users
fidelity_all = []
for u_id in test_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_ada, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

# Average fidelity
fidelity_ada = np.mean(fidelity_all)
print(fidelity_ada)

0.22957481162540366
CPU times: user 2.06 s, sys: 0 ns, total: 2.06 s
Wall time: 2.06 s


## ebm_extended vs. lda

In [62]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_lda, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)
    
# Average fidelity
fidelity_lda = np.mean(fidelity_all)
print(fidelity_lda)

0.22939582768897318
CPU times: user 2.04 s, sys: 3.18 ms, total: 2.05 s
Wall time: 2.05 s


In [63]:
source_rec =  'lda'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_lda)

## ebm_extended vs. qda

In [64]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_qda, 
                                      xai_rec = rec20_ebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)
    
# Average fidelity
fidelity_qda = np.mean(fidelity_all)
print(fidelity_qda)

0.22851530750474128
CPU times: user 2.05 s, sys: 0 ns, total: 2.05 s
Wall time: 2.04 s


In [65]:
source_rec =  'qda'
xai_rec = 'ebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_qda)

# dpebm_extended vs. black-box models

In [66]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_xgbt, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

    # Average fidelity
fidelity_xgbt_dpebm = np.mean(fidelity_all)
print(fidelity_xgbt_dpebm)

1.0
CPU times: user 2.03 s, sys: 9.76 ms, total: 2.04 s
Wall time: 2.04 s


In [67]:
source_rec =  'xgbt'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_xgbt_dpebm)

In [68]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_ada, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

    # Average fidelity
fidelity_ada_dpebm = np.mean(fidelity_all)
print(fidelity_ada_dpebm)

0.2665266865348144
CPU times: user 2.04 s, sys: 10.7 ms, total: 2.05 s
Wall time: 2.05 s


In [69]:
source_rec =  'ada'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_ada_dpebm)

In [70]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_lda, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

    # Average fidelity
fidelity_lda_dpebm = np.mean(fidelity_all)
print(fidelity_lda_dpebm)

0.25353562720130046
CPU times: user 2.03 s, sys: 0 ns, total: 2.03 s
Wall time: 2.03 s


In [71]:
source_rec =  'lda'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_lda_dpebm)

In [72]:
%%time
fidelity_all = []
for u_id in final_users:
    # print('Evaluation users: ', u_id)
    fidelity_rate = get_fidelity_rate(source_rec =  rec20_qda, 
                                      xai_rec = rec20_dpebm_extended, 
                                      u_id = u_id)
    # print('Fidelity rate for this user:', fidelity_rate)
    fidelity_all.append(fidelity_rate)

    # Average fidelity
fidelity_qda_dpebm = np.mean(fidelity_all)
print(fidelity_qda_dpebm)

0.2657138986724465
CPU times: user 2.04 s, sys: 84 µs, total: 2.04 s
Wall time: 2.04 s


In [73]:
source_rec =  'qda'
xai_rec = 'dpebm_extended'
source_recs.append(source_rec)
xai_recs.append(xai_rec)
avg_fidelity.append(fidelity_qda_dpebm)

# Summary

In [74]:
len(source_recs), len(xai_recs), len(avg_fidelity)

(14, 14, 14)

In [75]:
fidelity_df = pd.DataFrame(columns = ["source_rec","xai_rec", "avg_fidelity"])
fidelity_df.source_rec = source_recs
fidelity_df.xai_rec = xai_recs
fidelity_df.avg_fidelity = avg_fidelity

In [76]:
fidelity_df

Unnamed: 0,source_rec,xai_rec,avg_fidelity
0,fm_extended,ebm_extended,0.543931
1,fm_side_info,ebm_side_info,0.542671
2,fm_match,ebm_location,0.397589
3,fm_extended,dpebm_extended,0.244974
4,fm_side_info,dpebm_side_info,0.230913
5,fm_match,dpebm_location,0.397589
6,xgbt,ebm_extended,0.241235
7,ada,ebm_extended,0.22964
8,lda,ebm_extended,0.229396
9,qda,ebm_extended,0.228515


In [77]:
fidelity_df.to_csv('./model_fidelity/fidelity_ranking_random.csv', header=True, index=False)