In [5]:
from fastai.collab import *
from fastai.tabular.all import *
import fastai

In [6]:
path = untar_data(URLs.ML_100k)

In [7]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user','movie','rating','timestamp'])
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [8]:
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie','title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [9]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [10]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,433,"City of Lost Children, The (1995)",5
1,846,Blink (1994),4
2,291,Fierce Creatures (1997),4
3,474,"Lost World: Jurassic Park, The (1997)",4
4,758,Sneakers (1992),3
5,54,Contact (1997),4
6,44,"Frighteners, The (1996)",4
7,655,Blink (1994),3
8,756,Swiss Family Robinson (1960),3
9,7,Dolores Claiborne (1994),5


In [11]:
embs = get_emb_sz(dls)
embs

[(944, 74), (1665, 102)]

In [15]:
class CollabNN(Module):
    def __init__(self, user_sz, item_sz, y_range=(0,5.5), n_act=100):
        self.user_factors = Embedding(*user_sz)
        self.item_factors = Embedding(*item_sz)
        self.layers = nn.Sequential(
            nn.Linear(user_sz[1] + item_sz[1], n_act),
            nn.ReLU(),
            nn.Linear(n_act,1)
        )
        self.y_range = y_range
        
    def forward(self,x):
        embs = self.user_factors(x[:,0]),self.item_factors(x[:,1])
        x = self.layers(torch.cat(embs , dim = 1))
        return sigmoid_range(x, *self.y_range)

In [16]:
model = CollabNN(*embs)

In [17]:
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5,5e-3,wd=0.01)

epoch     train_loss  valid_loss  time    
0         0.913534    0.948290    00:10                                                                          
1         0.859773    0.909502    00:11                                                                          
2         0.848647    0.886255    00:10                                                                          
3         0.819966    0.866426    00:11                                                                          
4         0.735326    0.870974    00:11                                                                          


In [19]:
## Creating the same NN using fastai module

learn = collab_learner(dls, use_nn = True, y_range=(0,5.5), layers=[100,50])
learn.fit_one_cycle(5,5e-3, wd =0.01)

epoch     train_loss  valid_loss  time    
0         0.972964    0.974760    00:13                                                                          
1         0.880503    0.923998    00:12                                                                          
2         0.882893    0.876683    00:12                                                                          
3         0.788322    0.858963    00:12                                                                          
4         0.763199    0.863304    00:11                                                                          


In [21]:
learn.model

EmbeddingNN(
  (embeds): ModuleList(
    (0): Embedding(944, 74)
    (1): Embedding(1665, 102)
  )
  (emb_drop): Dropout(p=0.0, inplace=False)
  (bn_cont): BatchNorm1d(0, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): LinBnDrop(
      (0): Linear(in_features=176, out_features=100, bias=False)
      (1): ReLU(inplace=True)
      (2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): LinBnDrop(
      (0): Linear(in_features=100, out_features=50, bias=False)
      (1): ReLU(inplace=True)
      (2): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): LinBnDrop(
      (0): Linear(in_features=50, out_features=1, bias=True)
    )
    (3): fastai.layers.SigmoidRange(low=0, high=5.5)
  )
)