In [None]:
import torch
import torch.nn as nn
import pandas as pd
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

In [None]:
df_ratings = pd.read_csv('data/ratings_small.csv', usecols=['userId','movieId','rating'])
df_ratings.head(10)

In [None]:
df_ratings['movieId'] = pd.factorize(df_ratings.movieId)[0]
df_ratings.head(10)

Get number of unique users and unique movies

In [None]:
n_users = df_ratings.userId.nunique()
n_movies = df_ratings.movieId.nunique()

Split data into training and test data sets.

In [None]:
total_size = len(df_ratings)
train_size = int(total_size * 0.7)
test_size = total_size - train_size

# train_df, test_df = torch.utils.data.random_split(df_ratings, [train_size,test_size], generator=torch.Generator().manual_seed(42))
train_df, valid_df = train_test_split(df_ratings, test_size=0.2)

In [None]:
class CF_Net(nn.Module):
    def __init__(self, n_users, n_movies, n_factors=100) -> None:
        super(CF_Net, self).__init__()
        self.user_emb = nn.Embedding(n_users, n_factors, sparse=True)
        self.movie_emb = nn.Embedding(n_movies, n_factors, sparse=True)

        self.user_emb.weight.data.uniform_(0, 0.5)
        self.movie_emb.weight.data.uniform_(0, 0.5)
    def forward(self, u, m):
        u = self.user_emb(u)
        m = self.movie_emb(m)
        return (u*m).sum(1)



In [None]:
model = CF_Net(n_users, n_movies)

In [None]:
def training(model, epochs, lr=0.01):
    loss_func = F.mse_loss 
    optimizer = torch.optim.SGD(model.parameters(), lr=lr) 
    model.train()
    for i in range(epochs):
        users = torch.LongTensor(train_df.userId.values)
        movies = torch.LongTensor(train_df.movieId.values)
        ratings = torch.FloatTensor(train_df.rating.values)
        y = model(users, movies)
        loss = loss_func(y, ratings)
        optimizer.zero_grad()  
        loss.backward()
        optimizer.step()
        print(loss.item())
