In [1]:
import os
import time
import argparse
import csv

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.backends.cudnn as cudnn

import numpy as np 
import pandas as pd 
import scipy.sparse as sp
import random 
from collections import defaultdict

#Load Methods
from util.metrics import metrics
from data.load_data import load_all
from util.bpr import sbpr,bpr

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

cpu


## Prepare Dataset

In [3]:
train_data, test_data, user_num ,item_num, top_comm, users, items = load_all()

In [4]:
user_items = defaultdict(list)
item_users = defaultdict(list)
for user,item in train_data:
    user_items[user].append(item)
    item_users[item].append(user)
print(len(user_items), len(users), len(item_users), len(items))

1969 1969 13260 13260


In [5]:
bpr_dict = defaultdict(set)
bpr(users[1],bpr_dict,items,user_items)

3462

In [6]:
sbpr_negative_dict = defaultdict(list)
sbpr_friend_dict = defaultdict(list)
user_saved_items = defaultdict(set)
sbpr(users[1],user_saved_items,sbpr_friend_dict,sbpr_negative_dict,bpr_dict,top_comm,user_items,items)

(9080, 4458)

In [7]:
## DataLoader
class BPRDataset(data.Dataset):
    
    def __init__(self, features, num_item, num_ng=0, is_training=None):
        super(BPRDataset, self).__init__()
        """ Note that the labels are only useful when training, we thus 
            add them in the ng_sample() function.
        """
        self.features = features
        self.num_item = num_item
#         self.train_mat = train_mat
        self.num_ng = num_ng
        self.is_training = is_training

    def ng_sample(self):
        from util.bpr import sbpr
#         global user_saved_items#,sbpr_friend_dict,sbpr_negative_dict,bpr_dict,top_comm,user_items,items
        
        self.features_fill = []
        for x in self.features:
            u, i = x[0], x[1]
            for t in range(self.num_ng):
                j, k = sbpr(u,user_saved_items,sbpr_friend_dict,sbpr_negative_dict,bpr_dict,
                            top_comm,user_items,items)

                self.features_fill.append([u, i, j, k])

    def __len__(self):
        return self.num_ng * len(self.features)

    def __getitem__(self, idx):
        features = self.features_fill 
        user = features[idx][0]
        item_i = features[idx][1]
        item_j = features[idx][2]
        item_k = features[idx][3]
        if self.is_training:
            return user, item_i, item_j, item_k
        else:
            return user, item_i, item_k

In [8]:

# construct the train and test datasets
train_dataset = BPRDataset(train_data, item_num, 10, True)
test_dataset = BPRDataset(test_data, item_num, 100, False)
train_loader = data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = data.DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

In [9]:
train_loader.dataset.ng_sample()
test_loader.dataset.ng_sample()
for user, item_i, item_j in test_loader:
    # print(user,item_i,item_j)#, item_k)
    print(type(user),type(item_i),type(item_j)) #,type(item_k))
    print(user.type(), item_i.type(), item_j.type()) #, item_k.type())
    tmp = 1
    if tmp==1:
        break
    # test_data

<class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'>
torch.LongTensor torch.LongTensor torch.LongTensor


## Model

In [10]:
# model
class BPR(nn.Module):
    def __init__(self, user_num, item_num, factor_num):
        super(BPR, self).__init__()
        """
        user_num: number of users;
        item_num: number of items;
        factor_num: number of predictive factors.
        """		
        self.embed_user = nn.Embedding(user_num, factor_num)
        self.embed_item = nn.Embedding(item_num, factor_num)

        nn.init.normal_(self.embed_user.weight, std=0.01)
        nn.init.normal_(self.embed_item.weight, std=0.01)

    def forward(self, user, item_i, item_j, item_k):
        
        item_i = item_i.to(device)
        item_j = item_j.to(device)
        item_k = item_k.to(device)

        user = self.embed_user(user)
        item_i = self.embed_item(item_i)
        item_j = self.embed_item(item_j)
        item_k = self.embed_item(item_k)

        prediction_i = (user * item_i).sum(dim=-1)
        prediction_j = (user * item_j).sum(dim=-1)
        prediction_k = (user * item_k).sum(dim=-1)
        return prediction_i, prediction_j, prediction_k


In [11]:
model = BPR(user_num, item_num, 8)
model.to(device)

BPR(
  (embed_user): Embedding(2020, 8)
  (embed_item): Embedding(13303, 8)
)

In [12]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [13]:
########################### TRAINING #####################################
for epoch in range(5):
    model.train() 
    start_time = time.time()
    train_loader.dataset.ng_sample()
    

    for user, item_i, item_j, item_k in train_loader:

        user = user.to(device)
        item_i = item_i.to(device)
        item_j = item_j.to(device)
        item_k = item_k.to(device)

        model.zero_grad()
        prediction_i, prediction_j, prediction_k = model(user, item_i, item_j, item_k)
        loss = - (prediction_i - prediction_j).sigmoid().log().sum()
        loss -= (prediction_j - prediction_k).sigmoid().log().sum()
        loss.backward()
        optimizer.step()

    model.eval()
    test_loader.dataset.ng_sample()
    accuracy= metrics(model, test_loader, device)

    elapsed_time = time.time() - start_time
    print("The time elapse of epoch {:03d}".format(epoch) + " is: " + 
            time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
    print("accuracy : {}".format(accuracy))


The time elapse of epoch 000 is: 00: 00: 45
accuracy : 0.720147720867901
The time elapse of epoch 001 is: 00: 00: 44
accuracy : 0.7394011183287513
The time elapse of epoch 002 is: 00: 00: 43
accuracy : 0.7499389871177287
The time elapse of epoch 003 is: 00: 00: 43
accuracy : 0.7585503567257864
The time elapse of epoch 004 is: 00: 00: 44
accuracy : 0.7664050525509087
