In [2]:
from fastai.collab import *
from fastai.tabular.all import *
import numpy as np, pandas as pd 

In [7]:
path = untar_data(URLs.ML_100k)

In [15]:
ratings= pd.read_csv(path/'u.data',delimiter='\t',header = None,
                 names= ['user','movie','rating','timestamp'])

In [16]:
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [12]:
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie','title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [17]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [18]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,200,Star Trek: First Contact (1996),5
1,297,2001: A Space Odyssey (1968),4
2,670,Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963),3
3,280,Unforgiven (1992),4
4,844,"Walk in the Clouds, A (1995)",4
5,308,Under Siege (1992),3
6,886,I.Q. (1994),4
7,10,Heavenly Creatures (1994),4
8,621,Jimmy Hollywood (1994),4
9,30,Picture Perfect (1997),1


In [22]:
x,y = dls.one_batch()

In [23]:
x.shape,y.shape

(torch.Size([64, 2]), torch.Size([64, 1]))

In [24]:
class DotProductBias(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
        self.user_factors = Embedding(n_users, n_factors)
        self.user_bias = Embedding(n_users, 1)
        self.movie_factors = Embedding(n_movies, n_factors)
        self.movie_bias = Embedding(n_movies, 1)
        self.y_range = y_range
        
    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        res = (users * movies).sum(dim=1, keepdim=True)
        res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
        return sigmoid_range(res, *self.y_range)

In [25]:
n_users = len(ratings.user)
n_movies = len(ratings.movie)
n_movies,n_users

(100000, 100000)

In [27]:
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd = 0.1)

epoch,train_loss,valid_loss,time
0,0.951707,0.929127,04:27
1,0.849441,0.860672,04:28
2,0.737588,0.816098,04:29
3,0.589388,0.803592,04:28
4,0.474362,0.804625,04:28
