In [1]:
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torchrecsys.datasets import InteractionsDataset, SequenceDataset
from torchrecsys.models import BaseModel
from torchrecsys.task import Ranking
from torchrecsys.layers import BruteForceLayer
import torchrecsys as trs

# Data

In [2]:
# data_path = "https://files.grouplens.org/datasets/movielens/ml-25m.zip"
data_path = "https://files.grouplens.org/datasets/movielens/ml-1m.zip"

In [3]:
with urlopen(data_path) as zipresp:
    with ZipFile(BytesIO(zipresp.read())) as zfile:
        zfile.extractall('data/')

In [4]:
ratings = pd.read_csv("data/ml-1m/ratings.dat", sep="::", names=["userId","movieId","rating","timestamp"])
n_users = ratings.userId.max()+1
n_movies = ratings.movieId.max()+1

  return func(*args, **kwargs)


In [5]:
ds =  ratings[["userId","movieId"]].values
train_ds, val_ds = train_test_split(ds, test_size=0.33, random_state=42)

window_length = 5
train_ds = InteractionsDataset(train_ds)
val_ds = InteractionsDataset(val_ds)

train_dataloader = torch.utils.data.DataLoader(train_ds, batch_size=4096, shuffle=True, num_workers=6)
val_dataloader = torch.utils.data.DataLoader(val_ds, batch_size=256, shuffle=False, num_workers=2)

In [6]:
candidates = ratings.movieId.unique()

# Model

In [7]:
class userModel(torch.nn.Module):
    def __init__(self, n_users, embedding_size=128):
        super().__init__()
        self.user_embeddings = torch.nn.Embedding(n_users, embedding_size)
        
    def forward(self, x):
        user = self.user_embeddings(x)
        
        return user
    
class movieModel(torch.nn.Module):
    def __init__(self, n_movies, embedding_size=128):
        super().__init__()
        self.movie_embeddings = torch.nn.Embedding(n_movies, embedding_size)
        
    def forward(self, x):
        movie = self.movie_embeddings(x)
        
        return movie

class retrievalModel(trs.BaseModel):
    def __init__(self, user_model, movie_model):
        super().__init__()
        self.user_model = user_model
        self.movie_model = movie_model
        self.task = Ranking(metrics -)
        self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)

    def forward(self, x):
        # Query, the session data
        user, movie = x
        
        query = self.user_model(user)
        candidate = self.movie_model(movie)

        # The task computes the loss and the metrics.
        return query, candidate
    


model = retrievalModel(user_model = userModel(n_users), movie_model=movieModel(n_movies))

SyntaxError: invalid syntax (<ipython-input-7-52d2b4f157f6>, line 26)

In [None]:
# training
trainer = pl.Trainer(gpus=1, precision=32, callbacks=[EarlyStopping(monitor="val_loss", patience=3)])
trainer.fit(model, train_dataloader, val_dataloader)