In [1]:
import numpy as np
import lime
import subprocess
import os
os.chdir(os.path.expanduser('..'))
import scipy.stats as stats
from multiprocessing import Pool
from utils.rerank import write_average, rerank_ndcg,write_tau,write_ratio
from utils.readdata import get_microsoft_data, rewrite
from utils.separate_set import separate_set
from utils.explainer_tools import rand_row, evaluate, get_rankedduculist, get_set_cover

In [2]:
def get_set_cover(shap_values):
    """
    get scores of the samples of this query and rank them according to the scores,
    we select the 10_top important features
    :param shap_values:
    :return:
    """
    shap_values =np.array([shap_values])
    sumvalue = np.sum(shap_values,axis=1)
    feature_index=((-sumvalue).argsort())[0][:10]
    return feature_index

In [3]:
def score(X):
    """
    The first if branch is training data, the next is for the single test data. First calling the subprocess of ranklib
    to get the scores, then rerank the scorefile according the original index. We also have to delete the produced
    files which used by the subprocess.
    :param X: input feature matrix
    :return: scores of q-d pairs
    """
    A = []
    scorefile_path = temp_path + 'scorefile_lime_{}.txt'.format(tmp_test_y_query[0].split(':')[-1].split()[0])
    restore_path = temp_path + 'restore_lime_{}.txt'.format(tmp_test_y_query[0].split(':')[-1].split()[0])
    rewrite(X, tmp_test_y_query, tmp_test_Query, restore_path)
    args = ['java', '-jar', 'RankLib-2.12.jar', '-rank', restore_path, '-load', model,
            '-indri', scorefile_path]
    subprocess.check_output(args, stderr=subprocess.STDOUT)

    with open(scorefile_path, 'r') as f:
        for line in f:
            A.append(float(line.split()[-2]))

    # reset the index to be original otherwise can not get the right NDCG
    restore_context = open(restore_path, 'r').readlines()
    with open(restore_path, 'w') as f:
        for lineindex in range(len(restore_context)):
            split = restore_context[lineindex].split()
            split[1] = 'qid:{}'.format(tmp_test_y_query[0].split(':')[-1].split()[0])
            newline = ''
            for i in range(len(split)):
                newline += (split[i] + ' ')
            f.write(newline + '\n')
    A = np.array(A)
    A.reshape(-1,1)
    return A


def loop_query(query_index):
    """
    loop for a query, get scores of the samples of this query and rank them according to the scores
    :param query_index: the index of query
    :return: ranklist file, delta NDCG file
    """
    # get data for this query
    global tmp_test_data
    global tmp_test_y_query
    global tmp_test_Query
    tmp_test_data =test_data[query_index]
    tmp_test_y_query = test_y_query[query_index]
    tmp_test_Query = test_Query[query_index]
    query_id = tmp_test_y_query[0].split(':')[-1].split()[0]

    # calculate the scores for the q-d pairs
    scores = score(tmp_test_data).reshape(-1,1)
    restore_path = temp_path + 'restore_lime_{}.txt'.format(query_id)
    scorefile_path = temp_path + 'scorefile_lime_{}.txt'.format(query_id)

    # reranking the test_data according to the scores and get the list of ranking
    test_data_score = np.append(tmp_test_data,scores,axis=1)
    ranked_test_data = (test_data_score[(-test_data_score[:,-1]).argsort()])[:,:-1]
    rankedduculist1 = get_rankedduculist(scores, query_index,q_d_len)
    NDCG_before =evaluate(model,restore_path)

    
    #query1_shap_values = explainer.shap_values(ranked_test_data[:k], nsamples=500)
    exp = explainer.explain_instance(ranked_test_data[0], score, num_features=136, num_samples=5000, distance_metric='euclidean', model_regressor=None)
    query1_lime_values = np.array([[x[1] for x in sorted(exp.local_exp[1])]])
    
    def feature_k_loop(feature_number,threshold_flag):
        top_k_idx  = get_set_cover(query1_lime_values)
        NDCG_file_name = NDCGdata_path + '{}_lime{}_{}features_threshold{}'.format(dataname,k,feature_number, threshold_flag) + modelname + '.txt'
        ranklist_file = NDCGdata_path + '{}_ranklist_lime{}_{}features_threshold{}'.format(dataname,k,feature_number, threshold_flag) + modelname + '.txt'
        features_to_change = tmp_test_data.copy()
        if len(top_k_idx)<= feature_number:
            feature_number = len(top_k_idx)
        features_to_change[:,top_k_idx[0:feature_number]] = expected_value[top_k_idx[0:feature_number]]
        # get scores of the changed features
        scores2 = score(features_to_change).reshape(-1,1)
        NDCG_after = evaluate(model,restore_path)
        delta_NDCG = abs(float(NDCG_before) - float(NDCG_after))
        if float(NDCG_before)  == 0:
            ratio_NDCG = 0
        else:
            ratio_NDCG = delta_NDCG/float(NDCG_before) 
            
        rankedduculist2 = get_rankedduculist(scores2, query_index,q_d_len)
        tau, p_value = stats.kendalltau(rankedduculist1, rankedduculist2)
        os.remove(scorefile_path)
        os.remove(restore_path)
        with open(NDCG_file_name,'a') as NDCG_FILE:
            NDCG_line = tmp_test_y_query[0].split(':')[-1]+'  ' + \
                        'changed feature:'+ str(top_k_idx[0:feature_number])+'  '+'kendalltau='+str(round(tau,4))+ '  '+'ratioNDCG:'+ str(round(ratio_NDCG,4))+'  '+'delta_NDCG ='+'  '+str(delta_NDCG)+"\n"
            NDCG_FILE.write(NDCG_line)
        with open(ranklist_file, 'a') as ranklist:
            ranklist_line = tmp_test_y_query[0].split(':')[-1] + '  ' + 'ranklist before:' + str(
                rankedduculist1) + '  ' + 'ranklist after:' + '  ' + str(rankedduculist2) + "\n"
            ranklist.write(ranklist_line)
                   
    for threshold_flag in threshold:
        feature_k_loop(5,threshold_flag)        
        feature_k_loop(10,threshold_flag)
        
        

In [4]:
if __name__ == '__main__':
    #parameters to be set
    model_path = 'model/'
    model_set = ['LambdaMART_model.txt']
    for MODEL in model_set:
        model = model_path + MODEL
        k_set = [1,5]  # k: shap k, we select the top kexample to do analysis
        threshold = [0]  

        for f in range(1,2):
        # the path of data
        #datapath = 'MQ2008/Fold1/'
            datapath = 'MQ2008/Fold{}/'.format(f)
            #datapath = 'MSLR-WEB10K/Fold{}/'.format(f)
            train_path = datapath + 'train.txt'
            test_path = datapath + 'test.txt'
            modelname = model.split("_")[0].split("/")[-1]
            dataname = datapath.split('/')[0] +'_'+ datapath.split('/')[1].split('Fold')[1]

            # saving path and save files
            NDCGdata_path = 'logs/'
            temp_path = 'temp_file/'

            
            # get train data and test data
            X_train, y_query_train, Query_train = get_microsoft_data(train_path)
            X_train = np.array(X_train)
            X_test, y_query_test, Query_test = get_microsoft_data(test_path)
            X_test = np.array(X_test)
            expected_value = np.mean(X_train, axis=0)

            # separate the test set
            test_data, test_y_query, test_Query, q_d_len = separate_set(y_query_test, X_test, Query_test)
            
            # creat a explainer
            
            explainer = lime.lime_tabular.LimeTabularExplainer(X_train,mode='regression',verbose=True,  discretize_continuous=False)
            
            resultfile_NDCG = 'resultfile/' + '{}_{}_lime_NDCG.txt'.format(dataname,modelname)
            resultfile_tau = 'resultfile/' + '{}_{}_lime_tau.txt'.format(dataname,modelname)
            resultfile_ratio =  'resultfile/' + '{}_{}_lime_ratio.txt'.format(dataname,modelname)
            for k in k_set:
                with Pool(1) as p:
                    print(p.map(loop_query, [query_index for query_index in range(len(test_data))]))
                for threshold_flag in threshold:
                    for feature_number in (5,10):
                        NDCG_file_name = NDCGdata_path + '{}_lime{}_{}features_threshold{}'.format(dataname,k,feature_number, threshold_flag) + modelname + '.txt'
                        ranklist_file = NDCGdata_path + '{}_ranklist_lime{}_{}features_threshold{}'.format(dataname,k,feature_number, threshold_flag) + modelname + '.txt'
                        rerank_ndcg(NDCG_file_name)
                        rerank_ndcg(ranklist_file)
                        tau = write_tau(NDCG_file_name)
                        NDCG = write_average(NDCG_file_name)
                        ratio = write_ratio(NDCG_file_name)
                        with open(resultfile_NDCG,'a') as NDCG_result:
                            NDCG_result_line  = str(NDCG) + "\n"
                            NDCG_result.write(NDCG_result_line)
                        with open(resultfile_tau,'a') as tau_result:
                            tau_result_line  = str(tau) + "\n" 
                            tau_result.write(tau_result_line)
                        with open(resultfile_ratio,'a') as ratio_result:
                            ratio_result_line  = str(ratio) + "\n" 
                            ratio_result.write(ratio_result_line)       

Intercept -2.4602650994421436
Prediction_local [0.18424462]
Right: 4.11688
Intercept -2.390166032212472
Prediction_local [-0.70293638]
Right: 2.58565
Intercept -2.445683882848595
Prediction_local [1.73389484]
Right: 3.46662
Intercept -2.419385845880704
Prediction_local [0.12134825]
Right: 1.35982
Intercept -2.4087269722146227
Prediction_local [0.07309109]
Right: 0.95334
Intercept -2.6414307896422846
Prediction_local [1.24850166]
Right: 2.96047
Intercept -2.463441589319419
Prediction_local [-0.97770752]
Right: 1.22565
Intercept -2.4292767179282517
Prediction_local [-1.74451555]
Right: 2.4173
Intercept -2.437698461004489
Prediction_local [-1.14774558]
Right: 2.61937
Intercept -2.471411438570187
Prediction_local [-1.37186905]
Right: -0.09235
Intercept -2.4509026795450057
Prediction_local [-1.50962568]
Right: 1.04305
Intercept -2.424169357119638
Prediction_local [-1.75233829]
Right: 2.36744
Intercept -2.4166585603467725
Prediction_local [-1.80167134]
Right: 4.0802
Intercept -2.475012756343

Prediction_local [-0.71317375]
Right: 0.52102
Intercept -2.4414890908796942
Prediction_local [-2.20187674]
Right: -1.37889
Intercept -2.4234970633740005
Prediction_local [-0.44948853]
Right: 1.32729
Intercept -2.4179150246357413
Prediction_local [-2.80223455]
Right: 0.77417
Intercept -2.4188867714002744
Prediction_local [0.01229777]
Right: 1.27993
Intercept -2.4475469844208773
Prediction_local [-1.15041238]
Right: 1.99307
Intercept -2.411071814986614
Prediction_local [-1.78192715]
Right: -0.15457
Intercept -2.464494601332395
Prediction_local [-1.75328942]
Right: 1.88934
Intercept -2.4405796142983003
Prediction_local [-1.25823035]
Right: -0.78743
Intercept -2.4308563834255392
Prediction_local [-2.37741101]
Right: 2.61903
Intercept -2.4790030931069387
Prediction_local [-1.99660554]
Right: -0.94572
Intercept -2.443290756937962
Prediction_local [-1.89148154]
Right: -0.02188
Intercept -2.4620643303953464
Prediction_local [0.95957334]
Right: 2.45634
Intercept -2.488857186713147
Prediction_lo