In [None]:
#Filter data to drop users and items with low number of interactions#
import random
import json
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from collections import defaultdict
from collections import Counter
import pickle as pkl
import pickle

# Preprocessing

In [None]:
def write_data(path, data):
    f = open(path, 'w')
    jsObj = json.dumps(data)
    f.write(jsObj)
    f.close()

def dataset_filtering(interaction, core):
    # filtering the dataset with core
    # movielens is filtered by only remaining only users with at least 20 interactions
    # we further filter the dataset by remaining users and items with at least 20 interactions
    user_id_dic = {}  # record the number of interaction for each user and item
    item_id_dic = {}
    for (user_id, item_id) in interaction:
        try:
            user_id_dic[user_id] += 1
        except:
            user_id_dic[user_id] = 1
        try:
            item_id_dic[item_id] += 1
        except:
            item_id_dic[item_id] = 1
    print ('#Original dataset')
    print('  User:', len(user_id_dic), 'Item:', len(item_id_dic))
    print('  User:', len(user_id_dic), 'Item:', len(item_id_dic), 'Interaction:', len(interaction), 'Sparsity:',
          100 - len(interaction) * 100.0 / len(user_id_dic) / len(item_id_dic), '%')
    sort_user = []
    sort_item = []
    for user_id in user_id_dic:
        sort_user.append((user_id, user_id_dic[user_id]))
    for item_id in item_id_dic:
        sort_item.append((item_id, item_id_dic[item_id]))
    sort_user.sort(key=lambda x: x[1])
    sort_item.sort(key=lambda x: x[1])
    print ('Fitering(core = ', core, '...', end = '')
    while sort_user[0][1] < core or sort_item[0][1] < core:
        # find out all users and items with less than core recorders
        user_LessThanCore = set()
        item_LessThanCore = set()
        for pair in sort_user:
            if pair[1] < core:
                user_LessThanCore.add(pair[0])
            else:
                break
        for pair in sort_item:
            if pair[1] < core:
                item_LessThanCore.add(pair[0])
            else:
                break
        # reconstruct the interaction record, remove the cool one
        interaction_filtered = []
        for (user_id, item_id) in interaction:
            if not (user_id in user_LessThanCore or item_id in item_LessThanCore):
                interaction_filtered.append((user_id, item_id))
        # update the record
        interaction = interaction_filtered

        user_id_dic = {}  # record the number of interaction for each user and item
        item_id_dic = {}
        for (user_id, item_id) in interaction:
            try:
                user_id_dic[user_id] += 1
            except:
                user_id_dic[user_id] = 1
            try:
                item_id_dic[item_id] += 1
            except:
                item_id_dic[item_id] = 1

        sort_user = []
        sort_item = []
        for user_id in user_id_dic:
            sort_user.append((user_id, user_id_dic[user_id]))
        for item_id in item_id_dic:
            sort_item.append((item_id, item_id_dic[item_id]))
        sort_user.sort(key=lambda x: x[1])
        sort_item.sort(key=lambda x: x[1])
        print (len(interaction), end = ' ')
    print()
    print ('#Filtered dataset')
    print ('  User:', len(user_id_dic), 'Item:', len(item_id_dic), 'Interaction:', len(interaction), 'Sparsity:', 100 - len(interaction) * 100.0 / len(user_id_dic) / len(item_id_dic), '%')
    return interaction



#read interaction data
core = 80
cold_thre = 15



path_read_inter = '../../../MovieLense/ml-100k/u.data'
f_inter = open(path_read_inter, "r")
data_inter = f_inter.read()
f_inter.close()
Interaction = []

data_inter = data_inter.split('\n')
for interactions in data_inter:
    interactions = interactions.split('\t')
    if len(interactions) > 1:
        Interaction.append((interactions[0], interactions[1]))

Interaction = dataset_filtering(Interaction, core)


remained_items=[]
for el in range(len(Interaction)):
    remained_items.append(Interaction[el][1])


In [None]:
#Find less popular movies

dataset = pd.read_csv('../../../MovieLense/ml-100k/u.data', sep='\t')
dataset.head()

movieDic={}
movie_list=[]


for i in range(len(Interaction)):    ##if want to decide the least popular after pruning cold users/items
    movie_list.append(Interaction[i][1])

res = Counter(movie_list)

least_rated= res.most_common()[-70:] #1340 used to be 1405
least_rated_movies=[]
for i in range(len(least_rated)):
    least_rated_movies.append((str(least_rated[i][0])))
FemaleID_str= least_rated_movies

In [None]:
# Write data

#processs interaction data
user_list= []
item_list= []
for i in range(len(Interaction)):
    user_list.append(int(Interaction[i][0]))
    item_list.append(int(Interaction[i][1]))
user_set= sorted(set(user_list))
item_set= sorted(set(item_list))


D={} #dictionaey with key=user and value=items that user interacted with
D = defaultdict(list)
for j in range(len(Interaction)):
    D[int(Interaction[j][0])].append(int(Interaction[j][1]))
#print (len(D))

#process user data and item data (dropped zipcode as it cannot be either scaler or onehotencoding)
data_user= pd.read_csv('../../../MovieLense/ml-100k/u.user', sep='|', 
                  names=["user_id", "age", "gender", "job","zipcode"])
data_user= data_user.drop(columns=['zipcode'])
print ("data_user", data_user)

data_item= pd.read_csv('../../../MovieLense/ml-100k/u.item', sep='|', names=["movie_id", "movie_title", "release_date", "video release_date", "IMDb URL", "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"], encoding='latin-1')
#print ("data_item",data_item)
data_item= data_item.drop(columns=['movie_title', 'video release_date','IMDb URL'])
data_item['release_date'] = data_item['release_date'].str[-4:]
#print ("data_item",data_item)


#add popularity column to data_item panda

data_item['unpopular']= data_item['movie_id'].map(lambda x: 1 if str(x) in FemaleID_str else 0)
print(data_item)

In [None]:
#Concatenate user-item features

DATA=[]
feature_list=[]
label_list=[]
for user in sorted(random.sample(D.keys(), 150)):  #choose 100 users randomly
    label_array=[]
    for item in sorted(random.sample(item_set, 150)): #choose 100 items randomly
        df_user= data_user.loc[data_user['user_id'] == user]
        df_user= df_user.reset_index(drop=True)
        df_item= data_item.loc[data_item['movie_id'] == item]
        df_item= df_item.reset_index(drop=True)
        feature_list.append(pd.concat([df_user,df_item], axis=1))

        if item in D[user]:
            label_array.append(1)  
        else:
            label_array.append(0)
    label_list.append(np.array(label_array))
    

In [None]:
feature_list = pd.concat(feature_list) #change the feature list that contains multiple dataframes, to a single dataframe with single head and multiplee rows(vertical) 
feature_list= feature_list.drop(columns=['user_id', 'movie_id'])
preprocess = make_column_transformer((StandardScaler(),['age','release_date']), 
                                     (OneHotEncoder(sparse=False), ['gender', 'job', 'unpopular']), remainder='passthrough')

feature_list = preprocess.fit_transform(feature_list)
feature_list= np.split(feature_list, 150)


In [None]:
#split data based on interaction (not user)

data_X = []
data_Y = []
test_X = []
test_Y = []
docID_test = []

for u in range(len(label_list)):
    all_index = list(range(len(label_list[u])))
    train_index = set(random.sample(all_index, int(0.8* len(all_index))))
    test_index = list(set(all_index) - train_index)
    train_index = list(train_index)
    data_X.append(feature_list[u][train_index])
    data_Y.append(label_list[u][train_index])
    test_X.append(feature_list[u][test_index])
    test_Y.append(label_list[u][test_index])
    docID_test.append(np.take(item_set, test_index).tolist())








#remove qid with sum_female=0 or sum_male=0
F_rel=0
M_rel=0
unwanted_u=[]

for u in range(len(docID_test)):
    #print ("u", u)
    F_rel=0
    M_rel=0
    for doc in range(len(docID_test[0])):
        #print ("doc", doc)
        if str(docID_test[u][doc]) in least_rated_movies:
            #print ("True")
            F_rel= F_rel+ test_Y[u][doc]  
        else:
            M_rel= M_rel+ test_Y[u][doc]
            #print ("False")
    if F_rel==0 or M_rel==0:
        print ("F_rel",F_rel)
        print ("M_rel",M_rel)
        unwanted_u.append(u)

test_X_ = []
test_Y_ = []
docID_test_=[]
for i in range(len(test_X)):
    if i not in unwanted_u:
        test_X_.append(test_X[i])
        test_Y_.append(test_Y[i])
        docID_test_.append(docID_test[i])
test_X= test_X_
test_Y= test_Y_
docID_test= docID_test_


pickle.dump(docID_test, open( "MovieLense_docID_test_25.txt", "wb" ) )

pkl.dump((data_X, data_Y), open("MovieLense_train_rank_25.pkl", "wb"))
pkl.dump((test_X, test_Y), open("MovieLense_test_rank_25.pkl", "wb"))

In [None]:
# read data

docID_test = pickle.load( open( "MovieLense_docID_test_25.txt", "rb" ) )

with open('MovieLense_train_rank_25.pkl', 'rb') as f:
    data_train = pkl.load(f)

    
with open('MovieLense_test_rank_25.pkl', 'rb') as f:
     data_test = pickle.load(f)

        

In [None]:
#Learn rel scores

from YahooDataReader import YahooDataReader
from sklearn.linear_model import LinearRegression
from sklearn import linear_model


dr = YahooDataReader(None)
dr.data = pkl.load(open("MovieLense_train_rank_25.pkl", "rb"))
vdr = YahooDataReader(None)
vdr.data = pkl.load(open("MovieLense_test_rank_25.pkl","rb"))                  
model = linear_model.LinearRegression(fit_intercept=False, normalize=False)
feats, rel = dr.data
feats = np.array([item for sublist in feats for item in sublist])
rel = np.array([item for sublist in rel for item in sublist])
model.fit(feats, rel)
feats, rel = vdr.data
se_sum = 0
length = 0
predicted_rels = []
for i, query in enumerate(feats):
    rel_pred = model.predict(query)
    predicted_rels.append(rel_pred)
    se_sum += np.sum((rel_pred - rel[i])**2)
    length += len(rel[i])

# Fair Ranking

In [None]:

docID_test_str=[[] for _ in range(len(docID_test))] #type=str
size_female_test = []
size_male_test=[]
sum_rel_female=[]
sum_rel_male= []
sum_rel_totall=[]
sum_rel_female_predicted=[]
sum_rel_male_predicted= []
sum_rel_totall_predicted=[]


for qid in range(len(docID_test)):
    F_size= 0
    M_size= 0
    F_rel=0
    M_rel=0
    F_rel_predicted=0
    M_rel_predicted=0
    for doc in range(len(docID_test[0])):
        docID_test_str[qid].append(str(docID_test[qid][doc]))
        if str(docID_test[qid][doc]) in FemaleID_str:
            F_size=F_size+1
            F_rel= F_rel+ data_test[1][qid][int (float(docID_test_str[qid][doc]))] # used only for evaluation (ML1M)
            F_rel_predicted= F_rel_predicted+ predicted_rels[qid][int (float(docID_test_str[qid][doc]))] #used in FORGE and LinkedIn algo (ML1M)
            
        else:
            M_size= M_size+1
            M_rel= M_rel+ data_test[1][qid][int (float(docID_test_str[qid][doc]))]
            M_rel_predicted= M_rel_predicted+ predicted_rels[qid][int (float(docID_test_str[qid][doc]))] 
    
    size_female_test.append(F_size) 
    size_male_test.append(M_size)
    totall_size= len (docID_test_str[qid])
    
    sum_rel_female.append(F_rel) 
    sum_rel_male.append(M_rel)
    sum_rel_totall.append(sum_rel_female[qid]+ sum_rel_male[qid])
    
    sum_rel_female_predicted.append(F_rel_predicted) 
    sum_rel_male_predicted.append(M_rel_predicted)
    sum_rel_totall_predicted.append(sum_rel_female_predicted[qid]+ sum_rel_male_predicted[qid])

for i in range(len(np.divide(sum_rel_female, size_female_test))):
    if np.divide(sum_rel_female_predicted, size_female_test)[i] > np.divide(sum_rel_male_predicted, size_male_test)[i]:
        print ("f")

In [None]:
def KL_Divergence(p1,p2,q1,q2): #can use jenson divergance that is bounded between 0 and 1#
    if p1!=0 and p2!=0:
        J=(    (p1*np.log((2*p1)/(p1+q1)))  +  (p2*np.log((2*p2)/(p2+q2)))    +  (q1*np.log((2*q1)/(p1+q1)))  +  (q2*np.log((2*q2)/(p2+q2)))    ) / 2
    elif p1==0:
        J=(    (p2*np.log((2*p2)/(p2+q2)))    +  (q1*np.log((2*q1)/(p1+q1)))  +  (q2*np.log((2*q2)/(p2+q2)))    ) / 2
    elif p2==0:
        J=(   (p1*np.log((2*p1)/(p1+q1)))  +  (q1*np.log((2*q1)/(p1+q1)))  +  (q2*np.log((2*q2)/(p2+q2)))    ) / 2
    else:
        J=(   (q1*np.log((2*q1)/(p1+q1)))  +  (q2*np.log((2*q2)/(p2+q2)))   )/ 2 
    return J

In [None]:
def fairness(size_docID, 
             size_female, 
             size_male, 
             current_male_CTR, 
             sofar_male_CTR, 
             current_female_CTR, 
             sofar_female_CTR,
             ):


    CTR_male = sofar_male_CTR + current_male_CTR
    CTR_female =  sofar_female_CTR + current_female_CTR
    CTR_total = CTR_male + CTR_female
    current_fairness= 1- KL_Divergence(CTR_male / CTR_total,
                                       CTR_female / CTR_total,
                                       size_male/size_docID, 
                                       size_female/size_docID
                                       )
    return current_fairness

In [None]:
def separate_and_sort(
    docID,
    rel_scores
    ):
    """
    Arg list:
    movieID: list of movie IDs. Here, list of all docID for a certain qid
    rel_scores: dict() that maps movie_ID -> rel_score
    """
    female_sorted_by_rel = []
    male_sorted_by_rel = []
    
    for i in docID:    
        if i in FemaleID_str:
            female_sorted_by_rel.append(i)
        else:
            male_sorted_by_rel.append(i)

    female_sorted_by_rel.sort(key = lambda x : rel_scores[x], reverse=True) 
    male_sorted_by_rel.sort(key = lambda x : rel_scores[x], reverse=True)

    return male_sorted_by_rel, female_sorted_by_rel

In [None]:

def interpolation_optimized(
    qid,
    docID,
    rel_scores,
    Z,
):
    """
    Arg list:
    docID: list of docIDs for each qid in test data
    rel_scores: dict() that maps doc_ID -> rel_score
    Z: Z value for interpolation
    """
    all_sorted_by_rel= sorted(docID, key = lambda x : rel_scores[x], reverse=True)
    S=[]
    sofar_female_CTR=0
    sofar_male_CTR=0
    sofar_DCG=0
    IDCG=0
    
    availablity = all_sorted_by_rel[:] #make a copy of all_sorted to avoid del make problem for IDCG
    while len (S)< 30:
        IDCG= IDCG+ (float(2**float(rel_scores[all_sorted_by_rel[len(S)]])-1) /  math.log2(1+len(S)+1))
        epsilon_1= 0.65
        epsilon_plus= 1- ((len(S)+2)/100)
        epsilon_minus= epsilon_1* (1/min( len(S)+1, 10) )
        max_intpol_score = 0
        max_item_data = None
        for item in availablity:
            DCG = sofar_DCG+ (float(2**float(rel_scores[item])-1) /  math.log2(1+len(S)+1))
            nDCG =  float (DCG)/ IDCG
            if item in FemaleID_str:
                current_male_CTR = 0
                current_female_CTR = float(  (rel_scores[item]* epsilon_plus) + ( (1-rel_scores[item]) * epsilon_minus)  ) * 1/math.log2(1+len(S)+1)
            else:
                current_male_CTR =   float(  (rel_scores[item]* epsilon_plus) + ( (1-rel_scores[item]) * epsilon_minus)  ) * 1/math.log2(1+len(S)+1)
                current_female_CTR = 0
            fair_metric = fairness(sum_rel_totall_predicted[qid], sum_rel_female_predicted[qid], sum_rel_male_predicted[qid], current_male_CTR, sofar_male_CTR, current_female_CTR, sofar_female_CTR)
            intpol_score = (1-Z) * nDCG + Z * fair_metric
            if intpol_score > max_intpol_score:
                max_intpol_score = intpol_score
                max_item_data = (item, current_female_CTR, current_male_CTR, DCG)



        S.append(max_item_data[0])
        availablity.remove(max_item_data[0])
        sofar_female_CTR += max_item_data[1]
        sofar_male_CTR += max_item_data[2]
        sofar_DCG = max_item_data[3]
        
    return S

In [None]:

def get_nDCG(docID, rel_scores, item_list):
    """
    gives the nDCG of ranking
    input:
    docID: id of documents
    rel_scores: relevant scores assuming predict = ground truth or not
    item_list: dictionary key= docID, value= relscores
    """
    sorted_docID= sorted(docID, key=lambda x:rel_scores [int(x)] , reverse=True) #for ML1M
    #print ("sorted_docID", sorted_docID)
    Denom= float(0)
    Nom= float(0)
    for i in range (len(item_list)):
        temp1= D_real[str(sorted_docID[i])] #D1 if want to assume predicted rel is ground truth, D_real otherwise#
        temp2= 2**(float (temp1))
        Denom= Denom+(   (temp2-1)  / (math.log2(i+2))    )
        temp3= D_real[str(item_list[i])]    #D1 if want to assume predicted rel is ground truth, D_real otherwise#
        temp4= 2**(float (temp3))
        Nom=Nom + (  (temp4-1)  / (math.log2(i+2))    )
    nDCG= (float(Nom)/float(Denom))
    return (nDCG, sorted_docID  )

In [None]:
def get_divergence (qid, docID, rel_scores):
    sofar_female_CTR=0
    sofar_male_CTR=0
    D_movie_list=[]
    D_male_list=[]
    D_female_list=[]
    for i in range(len(docID)):
        epsilon_1= 0.65
        epsilon_plus= 1- ((i+2)/100)
        epsilon_minus= epsilon_1* ( 1/min (i+1, 10) )
        if docID[i] in FemaleID_str:
            sofar_female_CTR += float(  (rel_scores[docID[i]]* epsilon_plus) + ( (1-rel_scores[docID[i]]) * epsilon_minus)  ) * 1/math.log2(i+2)
        else:
            sofar_male_CTR +=   float(  (rel_scores[docID[i]]* epsilon_plus) + ( (1-rel_scores[docID[i]]) * epsilon_minus)  ) * 1/math.log2(i+2)
        
        CTR_total= sofar_female_CTR+ sofar_male_CTR  
        D_movie_list.append(KL_Divergence(sofar_male_CTR/ CTR_total, sofar_female_CTR/ CTR_total , sum_rel_male[qid]/sum_rel_totall[qid], sum_rel_female[qid]/sum_rel_totall[qid]))
        D_male_list.append(float((sofar_male_CTR/ CTR_total)/(sum_rel_male[qid]/sum_rel_totall[qid])))
        D_female_list.append(float((sofar_female_CTR/ CTR_total)/(sum_rel_female[qid]/sum_rel_totall[qid])))
    return (D_movie_list, D_male_list, D_female_list)

In [None]:
def get_divergence_trust_DP (qid, docID, rel_scores):
    sofar_female_CTR=0
    sofar_male_CTR=0
    D_movie_list=[]
    for i in range(len(docID)):
        epsilon_1= 0.65
        epsilon_plus= 1- ((i+2)/100)
        epsilon_minus= epsilon_1* ( 1/min (i+1, 10) )
        if docID[i] in FemaleID_str:
            sofar_female_CTR += float(  (rel_scores[docID[i]]* epsilon_plus) + ( (1-rel_scores[docID[i]]) * epsilon_minus)  ) * 1/math.log2(i+2)
        else:
            sofar_male_CTR +=   float(  (rel_scores[docID[i]]* epsilon_plus) + ( (1-rel_scores[docID[i]]) * epsilon_minus)  ) * 1/math.log2(i+2)
        
        CTR_total= sofar_female_CTR+ sofar_male_CTR  
        D_movie_list.append(KL_Divergence(sofar_male_CTR/ CTR_total, sofar_female_CTR/ CTR_total , size_male_test[qid]/totall_size, size_female_test[qid]/totall_size))
    return (D_movie_list)

In [None]:
def get_divergence_pos_DT (qid, docID, rel_scores):
    sofar_female_exp=0
    sofar_male_exp=0
    D_movie_list=[]
    for i in range(len(docID)):
        if docID[i] in FemaleID_str:
            sofar_female_exp +=  1/math.log2(i+2)
        else:
            sofar_male_exp +=  1/math.log2(i+2)
        
        exp_total= sofar_female_exp+ sofar_male_exp  
        D_movie_list.append(KL_Divergence(sofar_male_exp/ exp_total, sofar_female_exp/ exp_total , sum_rel_male[qid]/sum_rel_totall[qid], sum_rel_female[qid]/sum_rel_totall[qid]))
    return (D_movie_list)

In [None]:
def get_divergence_pos_DP (qid, docID, rel_scores):
    sofar_female_exp=0
    sofar_male_exp=0
    D_movie_list=[]
    for i in range(len(docID)):
        if docID[i] in FemaleID_str:
            sofar_female_exp +=  1/math.log2(i+2)
        else:
            sofar_male_exp +=  1/math.log2(i+2)
        
        exp_total= sofar_female_exp+ sofar_male_exp  
        D_movie_list.append(KL_Divergence(sofar_male_exp/ exp_total, sofar_female_exp/ exp_total , size_male_test[qid]/totall_size, size_female_test[qid]/totall_size))
    return (D_movie_list)

In [None]:
#This function is fairness definition of joachims (difference) based on disparate treatment

def get_joachims_diff (qid, docID):  #gives a vector that has conv at each rank indx. can feed both item_list and sorted_item_list#
    
        
    sofar_female_expo=0
    sofar_male_expo=0
    sofar_indx_expo=0
    Diff_movie_list=[]
    for i in range(len(docID)):
        if docID[i] in FemaleID_str:
            sofar_female_expo= sofar_female_expo+(1/math.log2(i+2)) 
        else:
            sofar_male_expo= sofar_male_expo + (1/math.log2(i+2))   
        sofar_indx_expo= sofar_indx_expo+ (1/math.log2(i+2))
        if sum_rel_male[qid]/size_male_test[qid] > sum_rel_female[qid]/size_female_test[qid]:
            sign= 1
        else:
            sign=-1
        Diff_movie_list.append(max(0, sign*diff(sofar_male_expo/ size_male_test[qid], sofar_female_expo/ size_female_test[qid] , sum_rel_male[qid]/size_male_test[qid], sum_rel_female[qid]/size_female_test[qid])))

    return (Diff_movie_list)

In [None]:
def get_CPFair_diff (qid, docID):  #gives a vector that has conv at each rank indx. can feed both item_list and sorted_item_list#
    
        
    sofar_female_count=0
    sofar_male_count=0
    Diff_movie_list=[]
    for i in range(len(docID)):
        if docID[i] in FemaleID_str:
            sofar_female_count= sofar_female_count+1 
        else:
            sofar_male_count= sofar_male_count + 1   
        Diff_movie_list.append(abs(sofar_male_count- sofar_female_count))

    return (Diff_movie_list)

In [None]:
def get_FairMetric(DocID, DivOrDiff_item_list):    #do not need it if we have new trust and CTR
    Denom=float(0)
    Nom=float(0)
    for i in range (len(DocID)):
        Denom= Denom+ (1/ (math.log2(i+2)))
        Nom= Nom+ (1/ (math.log2(i+2)))*DivOrDiff_item_list[i]   
    n_item_list= float(Nom)/ float(Denom)
    return (n_item_list)

In [None]:

import math

interplotion_value=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.91, 0.92, 0.94, 0.96, 0.98, 0.99, 1]
for z in interplotion_value:
    D2={} # a dictionary: key= user id, value= S/item_list

    nDCG_all_q=[]
    D_item_list_all_q=[]
    male_Div_all_q=[]
    female_Div_all_q=[]
    
    D_item_list_trust_DP_all_q=[]
    D_item_list_pos_DT_all_q=[]
    D_item_list_pos_DP_all_q=[]
    joachims_diff_item_list_all_q=[]
    CPFair_diff_item_list_all_q=[]
    

    



    
    
    for qid in range(len(docID_test)):
        nDCG=[]
        nDiv=[]
        D1={} # a dictionary: key= movieID, value= predicted rel_scores
        D_real= {} # a dictionary: key= movieID, value= true rel_scores
        for i in range (len(docID_test[0])):
            #D1[docID_test_str[qid][i]]= predicted_rels[qid][i] # for German Compas Comcast and ML100K
            #D_real[docID_test_str[qid][i]]= data_test[1][qid][i] # for German Compas Comcast and ML100K
            
            D1[docID_test_str[qid][i]]= predicted_rels[qid][int (float(docID_test_str[qid][i]))] # for ML1M
            D_real[docID_test_str[qid][i]]= data_test[1][qid][int (float(docID_test_str[qid][i]))] # for ML1M

        item_list  = interpolation_optimized (qid,docID_test_str[qid], D1, z)
        for i in range(len(item_list)):
            nDCG.append(get_nDCG(docID_test[qid], data_test[1][qid], item_list[0:i+1])[0]) #predicted_rels[qid] if assume precietd rel is ground truth, data_test[1][qid] Otherwise
            D_item_list= get_divergence(qid, item_list[0:i+1], D_real)[0]  #D1 if we assume predicted=true rel, and D_real o.w
            male_Div= get_divergence(qid, item_list[0:i+1], D_real)[1]
            female_Div= get_divergence(qid, item_list[0:i+1], D_real)[2]
            
            D_item_list_trust_DP= get_divergence_trust_DP (qid, item_list[0:i+1], D_real)
            D_item_list_pos_DT= get_divergence_pos_DT (qid, item_list[0:i+1], D_real)
            D_item_list_pos_DP= get_divergence_pos_DP (qid, item_list[0:i+1], D_real)
            joachims_diff_item_list= get_joachims_diff (qid, item_list[0:i+1])
            CPFair_diff_item_list= get_CPFair_diff (qid, item_list[0:i+1])
            
            


        nDCG_all_q.append (nDCG)
        D_item_list_all_q.append(D_item_list)
        male_Div_all_q.append(male_Div)
        female_Div_all_q.append(female_Div)
        
        D_item_list_trust_DP_all_q.append(D_item_list_trust_DP)
        D_item_list_pos_DT_all_q.append(D_item_list_pos_DT)
        D_item_list_pos_DP_all_q.append(D_item_list_pos_DP)
        joachims_diff_item_list_all_q.append(joachims_diff_item_list)
        CPFair_diff_item_list_all_q.append(CPFair_diff_item_list)

        


    avg_nDCG_at_indx_all_q= np.mean(nDCG_all_q, axis=0)
    avg_D_item_list_at_indx_all_q= np.mean(D_item_list_all_q, axis=0)
    avg_male_Div_at_indx_all_q= np.mean(male_Div_all_q, axis=0)
    avg_female_Div_at_indx_all_q= np.mean(female_Div_all_q, axis=0)
    
    avg_D_item_list_trust_DP_at_indx_all_q= np.mean(D_item_list_trust_DP_all_q, axis=0)
    avg_D_item_list_pos_DT_at_indx_all_q= np.mean(D_item_list_pos_DT_all_q, axis=0)
    avg_D_item_list_pos_DP_at_indx_all_q= np.mean(D_item_list_pos_DP_all_q, axis=0)
    avg_joachims_diff_item_list_at_indx_all_q= np.mean(joachims_diff_item_list_all_q, axis=0)
    avg_CPFair_diff_item_list_at_indx_all_q= np.mean(CPFair_diff_item_list_all_q, axis=0)
    

    print ("K=5")
    print (avg_nDCG_at_indx_all_q[4], end='\t')
    print (avg_D_item_list_at_indx_all_q[4], end='\t')
    print (avg_male_Div_at_indx_all_q[4], end='\t')
    print (avg_female_Div_at_indx_all_q[4], end='\t')
    print (avg_D_item_list_trust_DP_at_indx_all_q[4], end='\t')
    print (avg_D_item_list_pos_DT_at_indx_all_q[4], end='\t')
    print (avg_D_item_list_pos_DP_at_indx_all_q[4], end='\t')
    print (avg_joachims_diff_item_list_at_indx_all_q[4], end='\t')
    print (avg_CPFair_diff_item_list_at_indx_all_q[4], end='\t')
    print ("********************")
    print ("********************")
    print ("K=10")
    print ( avg_nDCG_at_indx_all_q[9], end='\t')
    print ( avg_D_item_list_at_indx_all_q[9], end='\t')
    print ( avg_male_Div_at_indx_all_q[9], end='\t')
    print ( avg_female_Div_at_indx_all_q[9], end='\t')
    print (avg_D_item_list_trust_DP_at_indx_all_q[9], end='\t')
    print (avg_D_item_list_pos_DT_at_indx_all_q[9], end='\t')
    print (avg_D_item_list_pos_DP_at_indx_all_q[9], end='\t')
    print ( avg_joachims_diff_item_list_at_indx_all_q[9], end='\t')
    print ( avg_CPFair_diff_item_list_at_indx_all_q[9], end='\t')
    print ("********************")
    print ("********************")
    print ("K=30")
    print ( avg_nDCG_at_indx_all_q[29], end='\t')
    print ( avg_D_item_list_at_indx_all_q[29], end='\t')
    print ( avg_male_Div_at_indx_all_q[29], end='\t')
    print ( avg_female_Div_at_indx_all_q[29], end='\t')
    print ( avg_D_item_list_trust_DP_at_indx_all_q[29], end='\t')
    print ( avg_D_item_list_pos_DT_at_indx_all_q[29], end='\t')
    print ( avg_D_item_list_pos_DP_at_indx_all_q[29], end='\t')
    print ( avg_joachims_diff_item_list_at_indx_all_q[29], end='\t')
    print ( avg_CPFair_diff_item_list_at_indx_all_q[29], end='\t')
    print ("********************")
    print ("********************")