In [1]:
import pandas as pd
import os
import numpy as np
import time

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Data Process

In [2]:
user_size = 6040
item_size = 3760

In [3]:
DATA_DIR = './processed_data/ml-1m/'

In [4]:
train_df = pd.read_csv(os.path.join(DATA_DIR,'train.csv'), usecols = ['user','item'], header = 0)

In [5]:
movieId_set = set(np.arange(item_size))

u_dict = train_df.groupby('user')['item'].apply(list).to_dict()

In [6]:
class BPRDataset(Dataset):

    def __init__(self, users, pos_items, neg_items):

        self.users = users
        self.pos_items = pos_items
        self.neg_items = neg_items

    def __len__(self):

        return len(self.users)

    def __getitem__(self, idx):

        user = self.users[idx]
        pos_item = self.pos_items[idx]
        neg_item = self.neg_items[idx]

        sample = {'user':user, 'pos_item':pos_item, 'neg_item':neg_item}

        return sample

In [18]:
def bpr_getTrain(N, train_batch_size):

        train_u = []
        train_pos_i = []
        train_neg_i = []
        
        u_list = train_df['user'].values
        i_list = train_df['item'].values
        #u_dict = train_df.groupby('user')['item'].apply(list).to_dict()
        
        for index in range(len(u_list)):
            
            u = u_list[index]
            i = i_list[index]
            train_u.extend([u]*(N))
            train_pos_i.extend([i]*(N))
        
            PositiveSet = set(u_dict[u]) 

            for t in range(N):# sample negative items
                neg_i = np.random.randint(0, item_size)
                while neg_i in PositiveSet:
                    neg_i = np.random.randint(0, item_size)
                train_neg_i.append(neg_i)

        train_dataset = BPRDataset(train_u, train_pos_i, train_neg_i)
        
        train_dataloader = DataLoader(train_dataset,
                                      batch_size = train_batch_size, 
                                      shuffle = True,
                                      num_workers = 4,
                                      pin_memory = True,
                                     )

        return train_dataloader

# Model

In [20]:
class GMF(nn.Module):
    
    def __init__(self, user_size, item_size, embed_size):
        super().__init__()
        
        self.user_size = user_size
        self.item_size = item_size
        self.embed_size = embed_size
        
        self.embedding_user = nn.Embedding(user_size, embed_size)
        nn.init.xavier_uniform_(self.embedding_user.weight)
        
        self.embedding_item = nn.Embedding(item_size, embed_size)
        nn.init.xavier_uniform_(self.embedding_item.weight)
        
        
    def forward(self, user, item):
        
        user_embedding = self.embedding_user(user)
        item_embedding = self.embedding_item(item)
        
        user_vec = user_embedding.view([-1, embed_size])
        item_vec = item_embedding.view([-1, embed_size])
        
        dot = torch.sum(torch.mul(user_vec, item_vec), dim = 1).view(-1)

        return dot

# Hyperparameters

In [21]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


train_batch_size = 128

embed_size = 16

learning_rate = 0.001

epochs = 20

# Training 

In [22]:
model = GMF(user_size, item_size, embed_size).to(device)

loss_function = nn.BCEWithLogitsLoss()

optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [23]:
for epoch in range(epochs):
    
   
    train_dataloader = bpr_getTrain(4, train_batch_size)
    
    print(epoch)
    for idx, batch_data in enumerate(train_dataloader):
        user = batch_data['user'].long().to(device)
        item_i = batch_data['pos_item'].long().to(device)
        item_j = batch_data['neg_item'].long().to(device)
        label = torch.ones_like(item_i).float().to(device)
        
        model.zero_grad()
        
        dot_i = model(user, item_i)#  [batch_size]
        dot_j= model(user, item_j)#  [batch_size]
        
        x_ij = dot_i - dot_j
        
        bpr_loss = loss_function(x_ij, label)
             
        bpr_loss.backward()
        
        optimizer.step()

0
1
2


Exception in thread Thread-6:
Traceback (most recent call last):
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/site-packages/torch/utils/data/_utils/pin_memory.py", line 25, in _pin_memory_loop
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/multiprocessing/queues.py", line 113, in get
    return _ForkingPickler.loads(res)
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/site-packages/torch/multiprocessing/reductions.py", line 282, in rebuild_storage_fd
    fd = df.detach()
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/multiprocessing/resource_sharer.py", line 57, in detach
    with _resource_sharer.get_connection(se

KeyboardInterrupt: 