We now define a deep-learning model as a PyTorch module that is simply the embedding layers, then concatenated as inputs to one hidden fully-connected layer before a sigmoid activation, that by default has 100 nodes in the hidden layer.

In [1]:
from fastai.collab import *
from fastai.tabular.all import *

class CollabNN(Module):
    def __init__(self, user_sz, item_sz, y_range=(0, 5.5), n_act=100):
        self.user_factors = Embedding(*user_sz)
        self.item_factors = Embedding(*item_sz)
        self.layers = nn.Sequential(
            nn.Linear(user_sz[1]+item_sz[1], n_act),
            nn.ReLU(),
            nn.Linear(n_act, 1))
        self.y_range = y_range
        
    def forward(self, x):
        embs = self.user_factors(x[:, 0]),self.item_factors(x[:, 1])
        x = self.layers(torch.cat(embs, dim=1))
        return sigmoid_range(x, *self.y_range)

We now prepare the data as in previous notebooks.

In [2]:
# Dataset preparation
path = untar_data(URLs.ML_100k)

ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user_id', 'movie_id', 'rating', 'timestamp'])
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                     usecols=(0, 1), names=('movie_id', 'title'), header=None)
ratings = ratings.merge(movies)
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)

And instantiate the model with the estimated embedding sizes.

In [3]:
embs = get_emb_sz(dls)
model = CollabNN(*embs)

Finally, we train the model.

In [4]:
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)

epoch,train_loss,valid_loss,time
0,0.907115,0.969053,00:06
1,0.865935,0.932627,00:06
2,0.802093,0.902687,00:06
3,0.797853,0.884469,00:06
4,0.772362,0.882913,00:06
