In [2]:
import pandas as pd
import sys
import time
import copy
import math
import heapq
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim import Optimizer
from sklearn.model_selection import KFold
from torchvision import datasets, transforms
from math import sqrt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score, explained_variance_score,mean_squared_error
print (torch.cuda.is_available())
print (torch.version.cuda)
print (torch.cuda.get_device_name(torch.cuda.current_device()))

True
9.0.176
GeForce RTX 2080 Ti


In [3]:
#Evaluation of metrics
def getHitRatio(ranklist, gtItem):
    for item in ranklist:
        if item == gtItem:
            return 1
    return 0

def getNDCG(ranklist, gtItem):
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtItem:
            return math.log(2) / math.log(i+2)
    return 0
#dataset
class DataSet_1M(object):
    
    def __init__(self, negNum=2):
        self.trainList, self.shape = self._getTrainData()
        self.trainDict = self._getTrainDict()
        self.trainMat = self._getTrainMatrix()
        self.trainset = self._getInstances(negNum)#sample negative samples
        self.testset = self._getTest()
        
    def _getTrainData(self):
        data = []
        filePath = '/data/fjsdata/BNMF/ml-1m.train.rating'
        u = 0
        i = 0
        maxr = 0.0
        with open(filePath, 'r') as f:
            for line in f:
                if line:
                    lines = line[:-1].split("\t")
                    user = int(lines[0])
                    movie = int(lines[1])
                    score = float(lines[2])
                    data.append((user, movie, score))
                    if user > u:u = user
                    if movie > i:i = movie
                    if score > maxr:maxr = score
        self.maxRate = maxr
        print("Loading Success!\n"
                  "Data Info:\n"
                  "\tUser Num: {}\n"
                  "\tItem Num: {}\n"
                  "\tData Size: {}\n"
                  "\tSparsity: {}".format(u+1, i+1, len(data), len(data)/((u+1)*(i+1))))
        return data, [u+1, i+1]

    def _getTrainDict(self):
        dataDict = {}
        for i in self.trainList:
            dataDict[(i[0], i[1])] = i[2]
        return dataDict

    def _getTrainMatrix(self):
        train_matrix = np.zeros([self.shape[0], self.shape[1]], dtype=np.float32)
        for i in self.trainList:
            user = i[0]
            movie = i[1]
            rating = i[2]
            train_matrix[user][movie] = rating
        return np.array(train_matrix)

    def _getInstances(self, negNum):
        trainset = []
        for i in self.trainList:
            trainset.append([i[0],i[1],i[2]])
            for t in range(negNum):
                j = np.random.randint(self.shape[1])
                while (i[0], j) in self.trainDict:
                    j = np.random.randint(self.shape[1])
                trainset.append([i[0],j,0.0])
        print ('The length of Trainset: %d'%(len(trainset)))
        return trainset

    def _getTest(self):
        #loading data
        testset = []
        filePath = '/data/fjsdata/BNMF/ml-1m.test.negative'
        with open(filePath, 'r') as fd:
            line = fd.readline()
            while line != None and line != '':
                arr = line.split('\t')
                u = eval(arr[0])[0]
                testset.append([u, eval(arr[0])[1], 1.0])#first is one postive item
                for i in arr[1:]:
                    testset.append([u, int(i), 0.0]) #99 negative items
                line = fd.readline()
        print ('The length of Testset: %d'%(len(testset)))
        return testset

In [None]:
class BBP_Model(nn.Module):
    def __init__(self, input_dim_u, input_dim_i, factors_dim_k, num_units=[512]):
        super(BBP_Model, self).__init__()
        
        self.input_dim_u = input_dim_u #user vector
        self.input_dim_i = input_dim_i #item vector
        self.factors_dim_k = factors_dim_k #latent factors vector
        
        # network with three hidden and k output layer
        self.layer1_u = nn.Linear(input_dim_u, num_units[0])
        self.layer2_u = nn.Linear(num_units[0], factors_dim_k)
        
        self.layer1_i = nn.Linear(input_dim_i, num_units[0])
        self.layer2_i = nn.Linear(num_units[0], factors_dim_k)
        
        # activation to be used between hidden layers
        self.activation = nn.ReLU(inplace = True)
    
    def forward(self, x_u, x_i):
    
        x_u = x_u.view(-1, self.input_dim_u)
        x_i = x_i.view(-1, self.input_dim_i)
        #layer1
        x_u = self.layer1_u(x_u)
        x_u = self.activation(x_u)
        x_i = self.layer1_i(x_i)
        x_i = self.activation(x_i)
        #layer2
        x_u = self.layer2_u(x_u)
        x_u = self.activation(x_u)
        x_i = self.layer2_i(x_i)
        x_i = self.activation(x_i)
        
        output = torch.sum(torch.mul(x_u,x_i),1)#pxq
        
        return output
    
class BBP_Model_Wrapper:
    def __init__(self, network, learn_rate=1e-2):
        
        self.learn_rate = learn_rate
        self.network = network
        self.network.cuda()
        
        self.optimizer = torch.optim.Adam(self.network.parameters(), lr = self.learn_rate)
        self.loss_func = nn.MSELoss()
    
    def fit(self, x_u, x_i, y):
        x_u = torch.from_numpy(np.array(x_u)).type(torch.FloatTensor).cuda()
        x_i = torch.from_numpy(np.array(x_i)).type(torch.FloatTensor).cuda()
        y = torch.from_numpy(np.array(y)).type(torch.FloatTensor).cuda()
        # reset gradient and total loss
        self.optimizer.zero_grad()
        output = self.network(x_u, x_i)
        fit_loss = self.loss_func(output, y)
        
        fit_loss.backward()
        self.optimizer.step()

        return fit_loss
    
#training model
num_epochs = 1
batchSize = 10000
ds_1m = DataSet_1M()
trainset = ds_1m.trainset
trainMat = ds_1m.trainMat
testset = ds_1m.testset
shape = ds_1m.shape
for k in [5,10,15,20]:  
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    net = BBP_Model_Wrapper(network=BBP_Model(input_dim_u=shape[1], input_dim_i = shape[0],factors_dim_k=k))
    best_net, best_loss = None, float('inf')
    for epoch in range(num_epochs): #iteration
        shuffled_idx = np.random.permutation(np.arange(len(trainset)))
        trainset = np.array(trainset)[shuffled_idx].tolist()
        num_batches = len(trainset) // batchSize + 1 
        total_loss = []
        for i in range(num_batches):#batch
            min_idx = i * batchSize
            max_idx = np.min([len(trainset), (i+1)*batchSize])
            train_batch = trainset[min_idx: max_idx]
            x_u, x_i, y = [], [], []
            for uu,ii,rr in train_batch:
                x_u.append(trainMat[int(uu),:])
                x_i.append(trainMat[:,int(ii)])
                y.append(float(rr))
            _loss = net.fit(np.array(x_u), np.array(x_i), np.array(y))
            sys.stdout.write('\r {} / {} : loss = {}'.format(i, num_batches, float('%0.6f'%_loss.item())))
            sys.stdout.flush()
            total_loss.append(_loss.item())
        print("Epoch: %5d total_loss = %.6f" % (epoch + 1, np.mean(total_loss)))
        if np.mean(total_loss) < best_loss:
            best_loss = np.mean(total_loss)
            best_net = copy.deepcopy(net.network)
    #torch.save(best_net, "/data/tmpexec/BDMF_torch")
    #best_net = torch.load("/data/tmpexec/BDMF_torch").eval()
    #best_net = torch.load("/data/tmpexec/BDMF_torch").to('cuda:0')
    hits = []
    ndcgs = []
    for c in range(0,shape[0]):#6040
        scorelist = []
        gtItem = -1
        x_u, x_i, y_i = [], [], []
        for uu,ii,rr in testset[c*100:(c+1)*100]:#604000
            if rr == 1.0: 
                gtItem = ii
            x_u.append(np.array(trainMat[int(uu),:]))
            x_i.append(np.array(trainMat[:,int(ii)]))
            y_i.append(ii)
        x_u = torch.from_numpy(np.array(x_u)).type(torch.FloatTensor).cuda()
        x_i = torch.from_numpy(np.array(x_i)).type(torch.FloatTensor).cuda()
        output = best_net(x_u, x_i)
        output = output.cpu().data.numpy().tolist()
        for j in range(len(y_i)):
            scorelist.append([y_i[j],output[j]])
        map_item_score = {}
        for item, rate in scorelist: #turn dict
            map_item_score[item] = rate
        ranklist = heapq.nlargest(10, map_item_score, key=map_item_score.get)#topn=10
        hr = getHitRatio(ranklist, gtItem)
        hits.append(hr)
        ndcg = getNDCG(ranklist, gtItem)
        ndcgs.append(ndcg)
    hitratio,ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    print ("HR@%d=%.6f, NDCG@%d=%.6f" % (k, hitratio, k, ndcg))

Loading Success!
Data Info:
	User Num: 6040
	Item Num: 3706
	Data Size: 994169
	Sparsity: 0.04441379291858915
The length of Trainset: 2982507
The length of Testset: 604000
 298 / 299 : loss = 4.773833.0Epoch:     1 total_loss = 6141704.305227
HR@5=1.000000, NDCG@5=1.000000
 104 / 299 : loss = 4.681472.0

In [None]:
def log_gaussian_loss(output, target, sigma, no_dim, sum_reduce=True):
    exponent = -0.5*(target - output)**2/sigma**2
    log_coeff = -no_dim*torch.log(sigma) - 0.5*no_dim*np.log(2*np.pi)
    
    if sum_reduce:
        return -(log_coeff + exponent).sum()
    else:
        return -(log_coeff + exponent)
    
class gaussian:
    def __init__(self, mu, sigma):
        self.mu = mu
        self.sigma = sigma
        
    def loglik(self, weights):
        exponent = -0.5*(weights - self.mu)**2/self.sigma**2
        log_coeff = -0.5*(np.log(2*np.pi) + 2*np.log(self.sigma))
        
        return (exponent + log_coeff).sum()
    
class BayesLinear_Normalq(nn.Module):
    def __init__(self, input_dim, output_dim, prior):
        super(BayesLinear_Normalq, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.prior = prior
        
        self.weight_mus = nn.Parameter(torch.Tensor(self.input_dim, self.output_dim).uniform_(-0.01, 0.01))
        self.weight_rhos = nn.Parameter(torch.Tensor(self.input_dim, self.output_dim).uniform_(-3, -3))
        
    def forward(self, x):
        # sample gaussian noise for each weight
        weight_epsilons = Variable(self.weight_mus.data.new(self.weight_mus.size()).normal_())      
        # calculate the weight stds from the rho parameters
        weight_stds = torch.log(1 + torch.exp(self.weight_rhos))
        # calculate samples from the posterior from the sampled noise and mus/stds
        weight_sample = self.weight_mus + weight_epsilons*weight_stds
            
        torch.cuda.synchronize()
        output = torch.mm(x, weight_sample)
            
        # computing the KL loss term
        #reference: https://github.com/jojonki/AutoEncoders/blob/master/kl_divergence_between_two_gaussians.pdf
        prior_cov, varpost_cov = self.prior.sigma**2, weight_stds**2
        KL_loss = 0.5*(torch.log(prior_cov/varpost_cov)).sum() - 0.5*weight_stds.numel()
        KL_loss = KL_loss + 0.5*(varpost_cov/prior_cov).sum()
        KL_loss = KL_loss + 0.5*((self.weight_mus - self.prior.mu)**2/prior_cov).sum()
            
        return output, KL_loss
    
class BBP_Model(nn.Module):
    def __init__(self, input_dim_u, input_dim_i, factors_dim_k, num_units=[512]):
        super(BBP_Model, self).__init__()
        
        self.input_dim_u = input_dim_u #user vector
        self.input_dim_i = input_dim_i #item vector
        self.factors_dim_k = factors_dim_k #latent factors vector
        
        # network with three hidden and k output layer
        self.layer1_u = BayesLinear_Normalq(input_dim_u, num_units[0], gaussian(0, 3))
        self.layer2_u = BayesLinear_Normalq(num_units[0], factors_dim_k, gaussian(0, 3))
        
        self.layer1_i = BayesLinear_Normalq(input_dim_i, num_units[0], gaussian(0, 3))
        self.layer2_i = BayesLinear_Normalq(num_units[0], factors_dim_k, gaussian(0, 3))
        
        # activation to be used between hidden layers
        self.activation = nn.ReLU(inplace = True)
        # noise
        self.log_noise = nn.Parameter(torch.cuda.FloatTensor([3]))
    
    def forward(self, x_u, x_i):
        
        KL_loss_total = 0
        x_u = x_u.view(-1, self.input_dim_u)
        x_i = x_i.view(-1, self.input_dim_i)
        #layer1
        x_u, KL_loss_u = self.layer1_u(x_u)
        x_u = self.activation(x_u)
        x_i, KL_loss_i = self.layer1_i(x_i)
        x_i = self.activation(x_i)
        KL_loss_total = KL_loss_total + KL_loss_u + KL_loss_i
        #layer2
        x_u, KL_loss_u = self.layer2_u(x_u)
        x_u = self.activation(x_u)
        x_i, KL_loss_i = self.layer2_i(x_i)
        x_i = self.activation(x_i)
        KL_loss_total = KL_loss_total + KL_loss_u + KL_loss_i
        #pxq
        output = torch.sum(torch.mul(x_u,x_i),1)
        
        return output, KL_loss_total
    
class BBP_Model_Wrapper:
    def __init__(self, network, learn_rate=1e-2):
        
        self.learn_rate = learn_rate
        self.network = network
        self.network.cuda()
        
        self.optimizer = torch.optim.Adam(self.network.parameters(), lr = self.learn_rate)
        self.loss_func = log_gaussian_loss#nn.MSELoss() 
    
    def fit(self, x_u, x_i, y, no_samples):
        len_sql = y.shape[0]
        x_u = torch.from_numpy(np.array(x_u)).type(torch.FloatTensor).cuda()
        x_i = torch.from_numpy(np.array(x_i)).type(torch.FloatTensor).cuda()
        y = torch.from_numpy(np.array(y)).type(torch.FloatTensor).cuda()
        # reset gradient and total loss
        self.optimizer.zero_grad()
        fit_loss_total = 0
        KL_loss_total = 0
        for i in range(no_samples):
            output, KL_loss = self.network(x_u, x_i)
            KL_loss_total = KL_loss_total + KL_loss
            # calculate fit loss based on mean and standard deviation of output
            fit_loss = self.loss_func(output, y, self.network.log_noise.exp(), 1) 
            fit_loss_total = fit_loss_total + fit_loss
        
        total_loss = (fit_loss_total + KL_loss_total)/(no_samples*len_sql)
        total_loss.backward()
        self.optimizer.step()

        return total_loss
    
#training model
num_epochs = 10
batchSize = 10000
#ds_1m = DataSet_1M()
#trainset = ds_1m.trainset
#trainMat = ds_1m.trainMat
#testset = ds_1m.testset
#shape = ds_1m.shape
for k in [5,10,15,20]:  
    torch.cuda.empty_cache()
    net = BBP_Model_Wrapper(network=BBP_Model(input_dim_u=shape[1], input_dim_i = shape[0],factors_dim_k=k))
    best_net, best_loss = None, float('inf')
    for epoch in range(num_epochs): #iteration
        shuffled_idx = np.random.permutation(np.arange(len(trainset)))
        trainset = np.array(trainset)[shuffled_idx].tolist()
        num_batches = len(trainset) // batchSize + 1 
        total_loss = []
        for i in range(num_batches):#batch
            min_idx = i * batchSize
            max_idx = np.min([len(trainset), (i+1)*batchSize])
            train_batch = trainset[min_idx: max_idx]
            x_u, x_i, y = [], [], []
            for uu,ii,rr in train_batch:
                x_u.append(trainMat[int(uu),:])
                x_i.append(trainMat[:,int(ii)])
                y.append(float(rr))
            _loss = net.fit(np.array(x_u), np.array(x_i), np.array(y), no_samples=10)
            sys.stdout.write('\r {} / {} : loss = {}'.format(i, num_batches, float('%0.6f'%_loss.item())))
            sys.stdout.flush()
            total_loss.append(_loss.item())
        print("Epoch: %5d total_loss = %.6f" % (epoch + 1, np.mean(total_loss)))
        if np.mean(total_loss) < best_loss:
            best_loss = np.mean(total_loss)
            best_net = copy.deepcopy(net.network)
    #torch.save(best_net, "/data/tmpexec/BDMF_torch")
    #best_net = torch.load("/data/tmpexec/BDMF_torch").eval()
    #best_net = torch.load("/data/tmpexec/BDMF_torch").to('cuda:0')
    #performance   
    hits = []
    ndcgs = []
    for c in range(0,shape[0]):#6040
        scorelist = []
        gtItem = -1
        x_u, x_i, y_i = [], [], []
        for uu,ii,rr in testset[c*100:(c+1)*100]:#604000
            if rr == 1.0: 
                gtItem = ii #real hit item
            x_u.append(np.array(trainMat[int(uu),:]))
            x_i.append(np.array(trainMat[:,int(ii)]))
            y_i.append(ii)
        x_u = torch.from_numpy(np.array(x_u)).type(torch.FloatTensor).cuda()
        x_i = torch.from_numpy(np.array(x_i)).type(torch.FloatTensor).cuda()
        output,KL_loss = best_net(x_u, x_i)
        output = output.cpu().data.numpy().tolist()
        for j in range(len(y_i)):
            scorelist.append([y_i[j],output[j]])
        map_item_score = {}
        for item, rate in scorelist: #turn dict
            map_item_score[item] = rate
        ranklist = heapq.nlargest(10, map_item_score, key=map_item_score.get)##topn=10
        hr = getHitRatio(ranklist, gtItem)
        hits.append(hr)
        ndcg = getNDCG(ranklist, gtItem)
        ndcgs.append(ndcg)
    hitratio,ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    print ("HR@%d=%.6f, NDCG@%d=%.6f" % (k, hitratio, k, ndcg))

 298 / 299 : loss = 2411.391602Epoch:     1 total_loss = 1152.599930
 298 / 299 : loss = 827.132141Epoch:     2 total_loss = 361.045931
 298 / 299 : loss = 489.840424Epoch:     3 total_loss = 159.178854
 298 / 299 : loss = 392.186005Epoch:     4 total_loss = 111.871830
 298 / 299 : loss = 337.146179Epoch:     5 total_loss = 94.093116
 298 / 299 : loss = 295.023895Epoch:     6 total_loss = 81.599802
 298 / 299 : loss = 260.816528Epoch:     7 total_loss = 72.812910
 298 / 299 : loss = 231.683945Epoch:     8 total_loss = 64.020904
 298 / 299 : loss = 206.612305Epoch:     9 total_loss = 57.127743
 298 / 299 : loss = 184.69249Epoch:    10 total_loss = 51.144272
