In [None]:
import numpy as np
import pandas as pd

import heapq  # for retrieval topK
import math

import torch
import torch.nn as nn
import torch.optim as optim

from numpy.random import choice

In [None]:
class neuralCollabFilter(nn.Module):
    def __init__(self, num_users, num_likes, embed_size, num_hidden, output_size):
        super(neuralCollabFilter, self).__init__()
        self.user_emb = nn.Embedding(num_users, embed_size)
        self.like_emb = nn.Embedding(num_likes, embed_size)
        self.fc1 = nn.Linear(embed_size * 2, num_hidden[0])
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(num_hidden[0], num_hidden[1])
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(num_hidden[1], num_hidden[2])
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(num_hidden[2], num_hidden[3])
        self.relu4 = nn.ReLU()
        self.outLayer = nn.Linear(num_hidden[3], output_size)
        self.out_act = nn.Sigmoid()

    def forward(self, u, v):
        U = self.user_emb(u)
        V = self.like_emb(v)
        out = torch.cat([U, V], dim=1)
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.relu3(out)
        out = self.fc4(out)
        out = self.relu4(out)
        out = self.outLayer(out)
        out = self.out_act(out)
        return out

In [None]:
def get_instances_with_random_neg_samples(train, num_items, num_negatives, device):
    user_input = np.zeros((len(train) + len(train) * num_negatives))
    item_input = np.zeros((len(train) + len(train) * num_negatives))
    labels = np.zeros((len(train) + len(train) * num_negatives))

    neg_samples = choice(num_items, size=(
        10 * len(train) * num_negatives,))  # multiply by 2 to make sure, we dont run out of negative samples
    neg_counter = 0
    i = 0
    for n in range(len(train)):
        # positive instance
        user_input[i] = train['user_id'][n]
        item_input[i] = train['like_id'][n]
        labels[i] = 1
        i += 1
        # negative instances
        checkList = list(train['like_id'][train['user_id'] == train['user_id'][n]])
        for t in range(num_negatives):
            j = neg_samples[neg_counter]
            while j in checkList:
                neg_counter += 1
                j = neg_samples[neg_counter]
            user_input[i] = train['user_id'][n]
            item_input[i] = j
            labels[i] = 0
            i += 1
            neg_counter += 1
    return torch.LongTensor(user_input).to(device), torch.LongTensor(item_input).to(device), torch.FloatTensor(
        labels).to(device)


def get_test_instances_with_random_samples(data, random_samples, num_items, device):
    user_input = np.zeros((random_samples + 1))
    item_input = np.zeros((random_samples + 1))

    # positive instance
    user_input[0] = data[0]
    item_input[0] = data[1]
    i = 1
    # negative instances
    checkList = data[1]
    for t in range(random_samples):
        j = np.random.randint(num_items)
        while j == checkList:
            j = np.random.randint(num_items)
        user_input[i] = data[0]
        item_input[i] = j
        i += 1
    return torch.LongTensor(user_input).to(device), torch.LongTensor(item_input).to(device)


def getHitRatio(ranklist, gtItem):
    for item in ranklist:
        if item == gtItem:
            return 1
    return 0


def getNDCG(ranklist, gtItem):
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtItem:
            return math.log(2) / math.log(i + 2)
    return 0


# The function below ensures that we seed all random generators with the same value to get reproducible results
def set_random_seed(state=1):
    gens = (np.random.seed, torch.manual_seed, torch.cuda.manual_seed)
    for set_state in gens:
        set_state(state)


RANDOM_STATE = 1
set_random_seed(RANDOM_STATE)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
def train_epochs(model, df_train, epochs, lr, batch_size, num_negatives, unsqueeze=False):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6)
    model.train()
    for i in range(epochs):
        permutation = torch.randperm(df_train.shape[0])
        for mini_batch in range(0, df_train.shape[0], batch_size):
            optimizer.zero_grad()
            print(mini_batch)
            indices = permutation[i:i+batch_size]
            print(indices)
            batch = df_train[indices].reset_index(drop=True)
            train_user_input, train_item_input, train_ratings = get_instances_with_random_neg_samples(
                batch,
                num_uniqueLikes,
                num_negatives,
                device
            )
            train_ratings = train_ratings.unsqueeze(1)
            y_hat = model(train_user_input, train_item_input)
            loss = criterion(y_hat, train_ratings)

            loss.backward()
            optimizer.step()
            if i%5==0:
                print('epoch: ', i, 'batch: ', mini_batch, 'out of: ', np.int64(np.floor(len(df_train) / batch_size)),
                      'average loss: ', loss.item())


# %% model evaluation: hit rate and NDCG
def evaluate_model(model, df_val, top_K, random_samples, num_items):
    model.eval()
    avg_HR = np.zeros((len(df_val), top_K))
    avg_NDCG = np.zeros((len(df_val), top_K))

    for i in range(len(df_val)):
        test_user_input, test_item_input = get_test_instances_with_random_samples(df_val[i], random_samples, num_items,
                                                                                  device)
        y_hat = model(test_user_input, test_item_input)
        y_hat = y_hat.cpu().detach().numpy().reshape((-1,))
        test_item_input = test_item_input.cpu().detach().numpy().reshape((-1,))
        map_item_score = {}
        for j in range(len(y_hat)):
            map_item_score[test_item_input[j]] = y_hat[j]
        for k in range(top_K):
            # Evaluate top rank list
            ranklist = heapq.nlargest(k, map_item_score, key=map_item_score.get)
            gtItem = test_item_input[0]
            avg_HR[i, k] = getHitRatio(ranklist, gtItem)
            avg_NDCG[i, k] = getNDCG(ranklist, gtItem)
    avg_HR = np.mean(avg_HR, axis=0)
    avg_NDCG = np.mean(avg_NDCG, axis=0)
    return avg_HR, avg_NDCG

In [None]:
# %% load data
train_data = pd.read_csv("train-test/train_userPages.csv")
test_data = pd.read_csv("train-test/test_userPages.csv")

# %% set hyperparameters
emb_size = 128
hidden_layers = np.array([emb_size, 64, 32, 16])
output_size = 1
num_epochs = 25
learning_rate = 0.001
batch_size = 2048
num_negatives = 5

random_samples = 100
top_K = 10

num_uniqueUsers = len(train_data.user_id.unique())
num_uniqueLikes = len(train_data.like_id.unique())

In [None]:
preTrained_NCF = neuralCollabFilter(num_uniqueUsers, num_uniqueLikes, emb_size, hidden_layers, output_size).to(device)
preTrained_NCF.cuda()
criterion = nn.BCELoss()
optimizer = optim.Adam(preTrained_NCF.parameters(), lr=learning_rate, weight_decay=1e-6)
preTrained_NCF.train()
data_batch = (train_data[0:(0 + batch_size)]).reset_index(drop=True)
# train_user_input, train_item_input, train_ratings = get_instances_with_neg_samples(data_batch, probabilities, num_negatives,device)
train_user_input, train_item_input, train_ratings = get_instances_with_random_neg_samples(
    data_batch,
    num_uniqueLikes,
    num_negatives,
    device
)
train_ratings = train_ratings.unsqueeze(1)
y_hat = preTrained_NCF(train_user_input, train_item_input)
loss = criterion(y_hat, train_ratings)
optimizer.zero_grad()
loss.backward()
optimizer.step()

In [None]:
train_data[train_data.user_id==4].like_id.sort_values(ascending=True)

In [None]:
preTrained_NCF = neuralCollabFilter(num_uniqueUsers, num_uniqueLikes, emb_size, hidden_layers, output_size).to(device)
preTrained_NCF.cuda()
train_epochs(preTrained_NCF, train_data, num_epochs, learning_rate, batch_size, num_negatives, unsqueeze=True)

In [None]:
# torch.save(preTrained_NCF.state_dict(), "trained-models/preTrained_NCF")

# %% evaluate the model

avg_HR_preTrain, avg_NDCG_preTrain = evaluate_model(preTrained_NCF, test_data.values, top_K, random_samples,
                                                    num_uniqueLikes)

# np.savetxt('results/avg_HR_preTrain.txt', avg_HR_preTrain)
# np.savetxt('results/avg_NDCG_preTrain.txt', avg_NDCG_preTrain)

# sys.stdout.close()


In [None]:
def compute_gender_direction(data, S, user_vectors):
    genderEmbed = np.zeros((2,user_vectors.shape[1]))
    # S = 0 indicates male and S = 1 indicates female
    num_users_per_group = np.zeros((2,1))
    for i in range(len(data)):
        u = data['user_id'][i]
        if S['gender'][i] == 0:
            genderEmbed[0] +=  user_vectors[u]
            num_users_per_group[0] += 1.0
        else:
            genderEmbed[1] +=  user_vectors[u]
            num_users_per_group[1] += 1.0

    genderEmbed = genderEmbed / num_users_per_group # average gender embedding
    return genderEmbed

def compute_bias_direction(gender_vectors):
    vBias= gender_vectors[1].reshape((1,-1))-gender_vectors[0].reshape((1,-1))
    vBias = vBias / np.linalg.norm(vBias,axis=1,keepdims=1)
    return vBias

def linear_projection(data,user_vectors,vBias):
    # linear projection: u - <u,v_b>v_b
    for i in range(len(data)):
        u = data['user_id'][i]
        user_vectors[u] = user_vectors[u] - (np.inner(user_vectors[u].reshape(1,-1),vBias)[0][0])*vBias
    return user_vectors

In [None]:
train_users= pd.read_csv("train-test/train_usersID.csv",names=['user_id'])
test_users = pd.read_csv("train-test/test_usersID.csv",names=['user_id'])

train_careers= pd.read_csv("train-test/train_concentrationsID.csv",names=['like_id'])
test_careers = pd.read_csv("train-test/test_concentrationsID.csv",names=['like_id'])

train_protected_attributes= pd.read_csv("train-test/train_protectedAttributes.csv")
test_protected_attributes = pd.read_csv("train-test/test_protectedAttributes.csv")

unique_careers= pd.read_csv("train-test/unique_careers.csv")
train_userPages = pd.read_csv("train-test/train_userPages.csv")

train_data = (pd.concat([train_users['user_id'],train_careers['like_id']],axis=1)).reset_index(drop=True)
test_data = (pd.concat([test_users['user_id'],test_careers['like_id']],axis=1)).reset_index(drop=True)

In [None]:
emb_size = 128
hidden_layers = np.array([emb_size, 64, 32, 16])
output_size = 1
num_epochs = 10
learning_rate = 0.001
batch_size = 256
num_negatives = 5

random_samples = 100
top_K = 25

# to load pre-train model correctly
num_uniqueUsers = len(train_userPages.user_id.unique())
num_uniqueLikes = len(train_userPages.like_id.unique())

# to fine tune career recommendation
num_uniqueCareers = len(train_data.like_id.unique())

In [None]:
debiased_NCF = neuralCollabFilter(num_uniqueUsers, num_uniqueLikes, emb_size, hidden_layers,output_size).to(device)
debiased_NCF.load_state_dict(torch.load("trained-models/preTrained_NCF"))
debiased_NCF.to(device)
users_embed = debiased_NCF.user_emb.weight.data.cpu().detach().numpy()
users_embed = users_embed.astype('float')
np.savetxt('results/users_embed.txt',users_embed)

In [None]:
gender_embed = compute_gender_direction(train_data, train_protected_attributes, users_embed)
# np.savetxt('results/gender_embed.txt',gender_embed)

vBias = compute_bias_direction(gender_embed)
# np.savetxt('results/vBias.txt',vBias)

# incorporate all users: debias train & test both
all_data = (pd.concat([train_data,test_data],axis=0)).reset_index(drop=True)
debias_users_embed = linear_projection(all_data,users_embed,vBias) # first debias training users
#debias_users_embed = linear_projection(test_data,debias_users_embed,vBias) # then debias test users
# np.savetxt('results/debias_users_embed.txt',debias_users_embed)


In [None]:
'''CAREER RECOMMEND'''
def criterionHinge(epsilonClass, epsilonBase):
    zeroTerm = torch.tensor(0.0).to(device)
    return torch.max(zeroTerm, (epsilonClass-epsilonBase))

def computeEDF(protectedAttributes,predictions,numClasses,item_input,device):
    # compute counts and probabilities
    S = np.unique(protectedAttributes) # number of gender: male = 0; female = 1
    countsClassOne = torch.zeros((numClasses,len(S)),dtype=torch.float).to(device) #each entry corresponds to an intersection, arrays sized by largest number of values
    countsTotal = torch.zeros((numClasses,len(S)),dtype=torch.float).to(device)

    concentrationParameter = 1.0
    dirichletAlpha = concentrationParameter/numClasses

    for i in range(len(predictions)):
        countsTotal[item_input[i],protectedAttributes[i]] = countsTotal[item_input[i],protectedAttributes[i]] + 1.0
        countsClassOne[item_input[i],protectedAttributes[i]] = countsClassOne[item_input[i],protectedAttributes[i]] + predictions[i]

    #probabilitiesClassOne = countsClassOne/countsTotal
    probabilitiesForDFSmoothed = (countsClassOne + dirichletAlpha) /(countsTotal + concentrationParameter)
    avg_epsilon = differentialFairnessMultiClass(probabilitiesForDFSmoothed,numClasses,device)
    return avg_epsilon
def differentialFairnessMultiClass(probabilitiesOfPositive,numClasses,device):
    # input: probabilitiesOfPositive = positive p(y|S) from ML algorithm
    # output: epsilon = differential fairness measure
    epsilonPerClass = torch.zeros(len(probabilitiesOfPositive),dtype=torch.float).to(device)
    for c in  range(len(probabilitiesOfPositive)):
        epsilon = torch.tensor(0.0).to(device) # initialization of DF
        for i in  range(len(probabilitiesOfPositive[c])):
            for j in range(len(probabilitiesOfPositive[c])):
                if i == j:
                    continue
                else:
                    epsilon = torch.max(epsilon,torch.abs(torch.log(probabilitiesOfPositive[c,i])-torch.log(probabilitiesOfPositive[c,j]))) # ratio of probabilities of positive outcome
        #                    epsilon = torch.max(epsilon,torch.abs((torch.log(1-probabilitiesOfPositive[c,i]))-(torch.log(1-probabilitiesOfPositive[c,j])))) # ratio of probabilities of negative outcome
        epsilonPerClass[c] = epsilon # overall DF of the algorithm
    avg_epsilon = torch.mean(epsilonPerClass)
    return avg_epsilon

def computeAbsoluteUnfairness(protectedAttributes,predictions,numClasses,item_input,device):
    # compute counts and probabilities
    S = np.unique(protectedAttributes) # number of gender: male = 0; female = 1
    scorePerGroupPerItem = torch.zeros((numClasses,len(S)),dtype=torch.float).to(device) #each entry corresponds to an intersection, arrays sized by largest number of values
    scorePerGroup = torch.zeros(len(S),dtype=torch.float).to(device)
    countPerItem = torch.zeros((numClasses,len(S)),dtype=torch.float).to(device)

    concentrationParameter = 1.0
    dirichletAlpha = concentrationParameter/numClasses

    for i in range(len(predictions)):
        scorePerGroupPerItem[item_input[i],protectedAttributes[i]] = scorePerGroupPerItem[item_input[i],protectedAttributes[i]] + predictions[i]
        countPerItem[item_input[i],protectedAttributes[i]] = countPerItem[item_input[i],protectedAttributes[i]] + 1.0
        scorePerGroup[protectedAttributes[i]] = scorePerGroup[protectedAttributes[i]] + predictions[i]
    #probabilitiesClassOne = countsClassOne/countsTotal
    avgScorePerGroupPerItem = (scorePerGroupPerItem + dirichletAlpha) /(countPerItem + concentrationParameter)
    avg_score = scorePerGroup/torch.sum(countPerItem,axis=0)  #torch.mean(avgScorePerGroupPerItem,axis=0)
    difference = torch.abs(avgScorePerGroupPerItem - avg_score)
    U_abs = torch.mean(torch.abs(difference[:,0]-difference[:,1]))
    return U_abs

In [None]:
def fair_fine_tune_model(model,df_train, epochs, lr,batch_size,num_negatives,num_items,protectedAttributes,lamda,epsilonBase,unsqueeze=False):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6)
    model.train()

    all_user_input = torch.LongTensor(df_train['user_id'].values).to(device)
    all_item_input = torch.LongTensor(df_train['like_id'].values).to(device)

    for i in range(epochs):
        j = 0
        for batch_i in range(0,np.int64(np.floor(len(df_train)/batch_size))*batch_size,batch_size):
            data_batch = (df_train[batch_i:(batch_i+batch_size)]).reset_index(drop=True)
            train_user_input, train_item_input, train_ratings = get_instances_with_random_neg_samples(data_batch, num_items, num_negatives,device)
            if unsqueeze:
                train_ratings = train_ratings.unsqueeze(1)
            y_hat = model(train_user_input, train_item_input)
            loss1 = criterion(y_hat, train_ratings)

            predicted_probs = model(all_user_input, all_item_input)
            avg_epsilon = computeEDF(protectedAttributes,predicted_probs,num_items,all_item_input,device)
            loss2 = criterionHinge(avg_epsilon, epsilonBase)

            loss = loss1 + lamda*loss2

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print('epoch: ', i, 'batch: ', j, 'out of: ',np.int64(np.floor(len(df_train)/batch_size)), 'average loss: ',loss.item())
            j = j+1

In [None]:
def evaluate_fine_tune(model,df_val,top_K,random_samples, num_items):
    model.eval()
    avg_HR = np.zeros((len(df_val),top_K))
    avg_NDCG = np.zeros((len(df_val),top_K))

    for i in range(len(df_val)):
        test_user_input, test_item_input = get_test_instances_with_random_samples(df_val[i], random_samples,num_items,device)
        y_hat = model(test_user_input, test_item_input)
        y_hat = y_hat.cpu().detach().numpy().reshape((-1,))
        test_item_input = test_item_input.cpu().detach().numpy().reshape((-1,))
        map_item_score = {}
        for j in range(len(y_hat)):
            map_item_score[test_item_input[j]] = y_hat[j]
        for k in range(top_K):
            # Evaluate top rank list
            ranklist = heapq.nlargest(k, map_item_score, key=map_item_score.get)
            gtItem = test_item_input[0]
            avg_HR[i,k] = getHitRatio(ranklist, gtItem)
            avg_NDCG[i,k] = getNDCG(ranklist, gtItem)
    avg_HR = np.mean(avg_HR, axis = 0)
    avg_NDCG = np.mean(avg_NDCG, axis = 0)
    return avg_HR, avg_NDCG

In [None]:
def fairness_measures(model,df_val,num_items,protectedAttributes):
    model.eval()
    user_input = torch.LongTensor(df_val['user_id'].values).to(device)
    item_input = torch.LongTensor(df_val['like_id'].values).to(device)
    y_hat = model(user_input, item_input)

    avg_epsilon = computeEDF(protectedAttributes,y_hat,num_items,item_input,device)
    U_abs = computeAbsoluteUnfairness(protectedAttributes,y_hat,num_items,item_input,device)

    avg_epsilon = avg_epsilon.cpu().detach().numpy().reshape((-1,)).item()
    print(f"average differential fairness: {avg_epsilon: .3f}")

    U_abs = U_abs.cpu().detach().numpy().reshape((-1,)).item()
    print(f"absolute unfairness: {U_abs: .3f}")

In [None]:
train_users= pd.read_csv("train-test/train_usersID.csv",names=['user_id'])
test_users = pd.read_csv("train-test/test_usersID.csv",names=['user_id'])

train_careers= pd.read_csv("train-test/train_concentrationsID.csv",names=['like_id'])
test_careers = pd.read_csv("train-test/test_concentrationsID.csv",names=['like_id'])

train_protected_attributes= pd.read_csv("train-test/train_protectedAttributes.csv")
test_protected_attributes = pd.read_csv("train-test/test_protectedAttributes.csv")

# =============================================================================
# train_labels= pd.read_csv("train-test/train_labels.csv",names=['labels'])
# test_labels = pd.read_csv("train-test/test_labels.csv",names=['labels'])
#
# unique_concentrations = (pd.concat([train_careers['like_id'],train_labels['labels']],axis=1)).reset_index(drop=True)
# unique_concentrations = unique_concentrations.drop_duplicates(subset='like_id', keep='first')
#
# unique_careers = unique_concentrations.sort_values(by=['like_id']).reset_index(drop=True)
# unique_careers.to_csv('train-test/unique_careers.csv',index=False)
# =============================================================================
unique_careers= pd.read_csv("train-test/unique_careers.csv")
train_userPages = pd.read_csv("train-test/train_userPages.csv")

train_data = (pd.concat([train_users['user_id'],train_careers['like_id']],axis=1)).reset_index(drop=True)
test_data = (pd.concat([test_users['user_id'],test_careers['like_id']],axis=1)).reset_index(drop=True)

In [None]:
emb_size = 128
hidden_layers = np.array([emb_size, 64, 32, 16])
output_size = 1
num_epochs = 10
learning_rate = 0.001
batch_size = 256
num_negatives = 5

random_samples = 15
top_K = 10

# to load pre-train model correctly
num_uniqueUsers = len(train_userPages.user_id.unique())
num_uniqueLikes = len(train_userPages.like_id.unique())

# to fine tune career recommendation
num_uniqueCareers = len(train_data.like_id.unique())

train_gender = train_protected_attributes['gender'].values
test_gender = test_protected_attributes['gender'].values

fairness_thres = torch.tensor(0.1).to(device)
epsilonBase = torch.tensor(0.0).to(device)

In [None]:
DF_NCF = neuralCollabFilter(num_uniqueUsers, num_uniqueLikes, emb_size, hidden_layers,output_size).to(device)

DF_NCF.load_state_dict(torch.load("trained-models/preTrained_NCF"))

DF_NCF.to(device)

In [None]:
# replace page items with career items
DF_NCF.like_emb = nn.Embedding(num_uniqueCareers,emb_size).to(device)
# freeze user embedding
DF_NCF.user_emb.weight.requires_grad=False
# load debiased user embeddings
debias_users_embed = np.loadtxt('results/debias_users_embed.txt')
# replace user embedding of the model with debiased embeddings
DF_NCF.user_emb.weight.data = torch.from_numpy(debias_users_embed.astype(np.float32)).to(device)

fair_fine_tune_model(DF_NCF,train_data, num_epochs, learning_rate,batch_size,num_negatives,num_uniqueCareers,train_gender,fairness_thres,epsilonBase, unsqueeze=True)

torch.save(DF_NCF.state_dict(), "trained-models/DF_NCF")

In [None]:
import sys
sys.stdout=open("NFCF_output.txt","w")

avg_HR_DF_NCF, avg_NDCG_DF_NCF = evaluate_fine_tune(DF_NCF,test_data.values,top_K,random_samples, num_uniqueCareers)

np.savetxt('results/avg_HR_NFCF.txt',avg_HR_DF_NCF)
np.savetxt('results/avg_NDCG_NFCF.txt',avg_NDCG_DF_NCF)

In [None]:
fairness_measures(DF_NCF,test_data,num_uniqueCareers,test_gender)


