In [1]:
from torchrecsys.datasets import InteractionsDataset, SequenceDataset
from torchrecsys.models import BaseModel, NeuralCF
from torchrecsys.task import Ranking
from torchrecsys.layers import BruteForceLayer
from torchrecsys.utils import feature_catalog
import pandas as pd

from torch.utils.data import DataLoader

In [2]:
ratings = pd.read_csv("ml-100k/ratings.csv").rename(columns={"userId": "user", "movieId": "item"})
movies = pd.read_csv("ml-100k/movies.csv").rename(columns={"movieId": "item"})
users = ratings[["user"]].drop_duplicates()


print(movies)
movies['title'], uniques = pd.factorize(movies['title'])
movies['genres'], uniques = pd.factorize(movies['genres'])

movies['title'] = movies.title.astype('category')
movies['genres'] = movies.genres.astype('category')

        item                                      title  \
0          1                           Toy Story (1995)   
1          2                             Jumanji (1995)   
2          3                    Grumpier Old Men (1995)   
3          4                   Waiting to Exhale (1995)   
4          5         Father of the Bride Part II (1995)   
...      ...                                        ...   
9737  193581  Black Butler: Book of the Atlantic (2017)   
9738  193583               No Game No Life: Zero (2017)   
9739  193585                               Flint (2017)   
9740  193587        Bungo Stray Dogs: Dead Apple (2018)   
9741  193609        Andrew Dice Clay: Dice Rules (1991)   

                                           genres  
0     Adventure|Animation|Children|Comedy|Fantasy  
1                      Adventure|Children|Fantasy  
2                                  Comedy|Romance  
3                            Comedy|Drama|Romance  
4                              

In [3]:
dataset = InteractionsDataset(ratings,movies, users)
dataloader = DataLoader(dataset,batch_size=5)

In [4]:
for u in dataloader:
    break

In [5]:
dataset.data_schema["interactions"]

[['int64', 610], ['int64', 9724]]

In [6]:
dataset[0]

(array([1, 1]), array([], dtype=float64), array([1, 0, 0]), array([1]))

In [7]:
model = NeuralCF(dataset.data_schema)

In [8]:
model(u)

tensor([[0.4265],
        [0.3975],
        [0.5154],
        [0.5286],
        [0.4221]], grad_fn=<SigmoidBackward0>)

In [9]:
for u in dataloader:
    break
    
u

[tensor([[ 1,  1],
         [ 1,  3],
         [ 1,  6],
         [ 1, 47],
         [ 1, 50]]),
 tensor([], size=(5, 0), dtype=torch.float64),
 tensor([[1, 0, 0],
         [1, 0, 0],
         [1, 0, 0],
         [1, 0, 0],
         [1, 0, 0]]),
 tensor([[ 1],
         [ 3],
         [ 6],
         [47],
         [50]])]

In [10]:
ratings

Unnamed: 0,user,item,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352
