In [1]:
from torchrecsys.datasets import InteractionsDataset, SequenceDataset
from torchrecsys.models import BaseModel, NeuralCF
from torchrecsys.task import Ranking
from torchrecsys.layers import BruteForceLayer
from torchrecsys.external_datasets import Movielens_1M
from torchrecsys.utils import feature_catalog
from torchrecsys import Trainer
import pandas as pd

from torch.utils.data import DataLoader

In [2]:
data = Movielens_1M()
ratings, users, movies = data.load()

In [3]:
users

Unnamed: 0,user_id,gender,age,occupation,zip
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,02460
4,5,M,25,20,55455
...,...,...,...,...,...
6035,6036,F,25,15,32603
6036,6037,F,45,1,76006
6037,6038,F,56,1,14706
6038,6039,F,45,0,01060


In [4]:
ratings

#Preprocess users
users['gender'], uniques = pd.factorize(users['gender'])
users['occupation'], uniques = pd.factorize(users['occupation'])
users['zip'], uniques = pd.factorize(users['zip'])
##Set category dtype
users['gender'] = users.gender.astype('category')
users['occupation'] = users.occupation.astype('category')
users['zip'] = users.zip.astype('category')

#Preprocess movies, 
##categories to index
movies['title'], uniques = pd.factorize(movies['title'])
movies['genres'], uniques = pd.factorize(movies['genres'])
##Set category dtype
movies['title'] = movies.title.astype('category')
movies['genres'] = movies.genres.astype('category')

In [5]:
dataset = InteractionsDataset(ratings, users, movies, item_id="movie_id")
train = DataLoader(dataset, batch_size=5)

In [6]:
for u in train:
    break

In [7]:
dataset.data_schema

{'interactions': [6040, 3952],
 'context': [<torchrecsys.datasets.datasets.feature at 0x7f6580dd64f0>],
 'user_features': [<torchrecsys.datasets.datasets.feature at 0x7f6580dd6910>,
  <torchrecsys.datasets.datasets.feature at 0x7f6580dd6a00>,
  <torchrecsys.datasets.datasets.feature at 0x7f6580dd6c40>,
  <torchrecsys.datasets.datasets.feature at 0x7f6580dd6d30>,
  <torchrecsys.datasets.datasets.feature at 0x7f6580dd6eb0>],
 'item_features': [<torchrecsys.datasets.datasets.feature at 0x7f6580dd6580>,
  <torchrecsys.datasets.datasets.feature at 0x7f6580dd6670>,
  <torchrecsys.datasets.datasets.feature at 0x7f6580dd67f0>],
 'objetive': 'notbinary?'}

In [8]:
dataset[0]

(array([        1,      1193,         5, 978300760]),
 array([978300760]),
 array([1, 0, 1, 0, 0]),
 array([1193, 1176,   12]))

In [9]:

model = NeuralCF(dataset.data_schema)

In [10]:
u

[tensor([[        1,      1193,         5, 978300760],
         [        1,       661,         3, 978302109],
         [        1,       914,         3, 978301968],
         [        1,      3408,         4, 978300275],
         [        1,      2355,         5, 978824291]]),
 tensor([[978300760],
         [978302109],
         [978301968],
         [978300275],
         [978824291]]),
 tensor([[1, 0, 1, 0, 0],
         [1, 0, 1, 0, 0],
         [1, 0, 1, 0, 0],
         [1, 0, 1, 0, 0],
         [1, 0, 1, 0, 0]]),
 tensor([[1193, 1176,   12],
         [ 661,  655,   83],
         [ 914,  902,  138],
         [3408, 3339,   12],
         [2355, 2286,    0]])]

In [11]:
u[0][:, 0]

tensor([1, 1, 1, 1, 1])

In [12]:
model.criterion

MSELoss()

In [13]:
model.n_items

3952

In [15]:
trainer = Trainer()
trainer.fit(model, train)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn(
  rank_zero_warn("You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.")

  | Name               | Type      | Params
-------------------------------------------------
0 | user_embedding     | Embedding | 386 K 
1 | item_embedding     | Embedding | 252 K 
2 | linear             | Linear    | 33.3 K
3 | final_linear       | Linear    | 513   
4 | feature1_embedding | Embedding | 31.1 K
5 | feature2_embedding | Embedding | 2.4 K 
6 | criterion          | MSELoss   | 0     
-------------------------------------------------
706 K     Trainable params
0         Non-trainable params
706 K     Total params
2.828     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
import torch
x = torch.zeros(5, 1)
torch.squeeze(x).shape