In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('data.csv')
data.head()

In [None]:
class UserLocationDateset(Dataset):
    def __init__(self, data):
        self.data = data
        self.user_ids = data['user_id'].unique()
        self.user2idx = {o:i for i,o in enumerate(self.user_ids)}
        self.item_ids = data['item_id'].unique()
        self.item2idx = {o:i for i,o in enumerate(self.item_ids)}
        self.n_users = len(self.user_ids)
        self.n_items = len(self.item_ids)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        user_id = self.data.iloc[idx]['user_id']
        item_id = self.data.iloc[idx]['item_id']
        rating = self.data.iloc[idx]['rating']
        return self.user2idx[user_id], self.item2idx[item_id], rating

dset = UserLocationDateset(data)

In [None]:
class CollabFilter(nn.Module):
    def __init__(self, n_users, n_items, n_factors=50):
        super().__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.item_factors = nn.Embedding(n_items, n_factors)
        
    def forward(self, x):
        users = self.user_factors(x[:,0])
        items = self.item_factors(x[:,1])
        return (users * items).sum(1)

def get_model(data, n_factors):
    n_users = data.userId.nunique()
    n_items = data.movie_idx.nunique()
    model = CollabFilter(n_users, n_items, n_factors)
    return model

net = get_model(data, 50)

In [None]:
bs = 64
train_dl = DataLoader(dset, batch_size=bs, shuffle=True)
opt = torch.optim.SGD(net.parameters(), lr=1e-1, weight_decay=1e-5)
lr_scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=3, gamma=0.1)
n_epochs = 10

for epoch in range(n_epochs):
    net.train()
    for xb, yb in train_dl:
        loss = F.mse_loss(net(xb), yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    lr_scheduler.step()
    print(loss)

def get_predictions(model, data):
    model.eval()
    users, items, ratings = data
    users = torch.LongTensor(users)
    items = torch.LongTensor(items)
    predictions = model(torch.stack([users, items], dim=1))
    return predictions.detach().numpy()

def get_top_k_items(predictions, k=10):
    top_k = np.argpartition(predictions, -k)[-k:]
    return top_k[np.argsort(predictions[top_k])][::-1]