In [1]:
import pandas as pd
import os
import numpy as np
import time

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Data Process

In [2]:
user_size = 6040
item_size = 3760

In [3]:
DATA_DIR = './processed_data/ml-1m/'

In [4]:
train_df = pd.read_csv(os.path.join(DATA_DIR,'train.csv'), usecols = ['user','item'], header = 0)

In [5]:
movieId_set = set(np.arange(item_size))

u_dict = train_df.groupby('user')['item'].apply(list).to_dict()

In [6]:
class TrainDataset(Dataset):
    def __init__(self, users, items):
    
        self.users = users
        self.items = items

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        user = self.users[idx]
        item = self.items[idx]
        label = self.items[idx]
        
       
        sample = {'user': user, 'item': item, 'label':label}

        return sample

In [7]:
train_users = train_df['user'].values.tolist()
train_items = train_df['item'].values.tolist()

train_dataset = TrainDataset(train_users, train_items)

# Model

In [8]:
class Softmax(nn.Module):
    
    def __init__(self, user_size, item_size, embed_size):
        
        super().__init__()
        
        self.user_size = user_size
        self.item_size = item_size
        self.embed_size = embed_size
        
        self.embedding_user = nn.Embedding(user_size, embed_size)
        nn.init.xavier_uniform_(self.embedding_user.weight)
        
        self.embedding_item = nn.Embedding(item_size, embed_size)
        nn.init.xavier_uniform_(self.embedding_item.weight)
        

    def forward(self, user):
        
        user_embedding = self.embedding_user(user)

        item_embedding = self.embedding_item.weight

        user_vec = user_embedding.view([-1,1,embed_size])
        item_vec = item_embedding.view([-1,item_size,embed_size])
        
        dot = torch.sum(torch.mul(user_vec, item_vec), dim = 2)
        
        return dot

# Hyperparameters

In [9]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


train_batch_size = 128

embed_size = 16

learning_rate = 0.001

epochs = 20

# Training 

In [10]:
model = Softmax(user_size, item_size, embed_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [11]:
for epoch in range(epochs):

    train_dataloader = DataLoader(train_dataset,
                              batch_size = train_batch_size, 
                              shuffle = True,
                              num_workers = 4,
                              pin_memory = True
                                 )

    for idx, batch_data in enumerate(train_dataloader):
        
        user = batch_data['user'].long().to(device)
        label = batch_data['label'].long().to(device)

        model.zero_grad()
        
        prediction = model(user)
        
        loss = criterion(prediction, label)
        
        loss.backward()
        
        
        optimizer.step()

Exception in thread Thread-6:
Traceback (most recent call last):
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/site-packages/torch/utils/data/_utils/pin_memory.py", line 25, in _pin_memory_loop
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/multiprocessing/queues.py", line 113, in get
    return _ForkingPickler.loads(res)
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/site-packages/torch/multiprocessing/reductions.py", line 282, in rebuild_storage_fd
    fd = df.detach()
  File "/users/kent/dli12/anaconda3/envs/sage91/lib/python3.7/multiprocessing/resource_sharer.py", line 57, in detach
    with _resource_sharer.get_connection(se

KeyboardInterrupt: 

In [12]:
epoch

2