<a href="https://colab.research.google.com/github/bipinKrishnan/fastai_course/blob/master/fastai_collab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install fastai --upgrade

In [114]:
import fastai.collab as c
import fastai.tabular.all as t
import torch

import pandas as pd

In [5]:
path = c.untar_data(c.URLs.ML_100k)
path.ls()

(#23) [Path('/root/.fastai/data/ml-100k/ub.base'),Path('/root/.fastai/data/ml-100k/u5.base'),Path('/root/.fastai/data/ml-100k/u5.test'),Path('/root/.fastai/data/ml-100k/u.info'),Path('/root/.fastai/data/ml-100k/u2.base'),Path('/root/.fastai/data/ml-100k/u3.base'),Path('/root/.fastai/data/ml-100k/ua.test'),Path('/root/.fastai/data/ml-100k/u4.test'),Path('/root/.fastai/data/ml-100k/mku.sh'),Path('/root/.fastai/data/ml-100k/u.genre')...]

In [15]:
df = pd.read_csv(path/'u.data', delimiter='\t', names=['user', 'movie', 'rating', 'timestamp'])
df.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [17]:
df.info(), df.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype
---  ------     --------------   -----
 0   user       100000 non-null  int64
 1   movie      100000 non-null  int64
 2   rating     100000 non-null  int64
 3   timestamp  100000 non-null  int64
dtypes: int64(4)
memory usage: 3.1 MB


(None, (100000, 4))

In [28]:
movies = pd.read_csv(path/'u.item', delimiter='|', names=['movie', 'title'], encoding='latin-1', usecols=(0, 1))
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [29]:
df = df.merge(movies)
df.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [30]:
dls = c.CollabDataLoaders.from_df(
    df,
    user_name='user',
    item_name='title',
    rating_name='rating',
    bs=32
)

dls.show_batch()

Unnamed: 0,user,title,rating
0,768,Trainspotting (1996),2
1,31,Hoodlum (1997),4
2,711,Princess Caraboo (1994),3
3,456,Indiana Jones and the Last Crusade (1989),3
4,889,Back to the Future (1985),4
5,99,Moll Flanders (1996),3
6,894,Paradise Road (1997),3
7,682,Grumpier Old Men (1995),3
8,136,"Postino, Il (1994)",5
9,291,Four Rooms (1995),3


In [32]:
dls.classes

{'title': (#1665) ['#na#',"'Til There Was You (1997)",'1-900 (1994)','101 Dalmatians (1996)','12 Angry Men (1957)','187 (1997)','2 Days in the Valley (1996)','20,000 Leagues Under the Sea (1954)','2001: A Space Odyssey (1968)','3 Ninjas: High Noon At Mega Mountain (1998)'...],
 'user': (#944) ['#na#',1,2,3,4,5,6,7,8,9...]}

In [35]:
n_users = len(dls.classes['user'])
n_mov = len(dls.classes['title'])
n_factors = 5

user_factors = torch.randn(n_users, n_factors)
mov_factors = torch.randn(n_mov, n_factors)

In [36]:
user_factors.shape, mov_factors.shape

(torch.Size([944, 5]), torch.Size([1665, 5]))

In [39]:
user_factors.t()@t.one_hot(3, n_users).float() 

tensor([ 0.5634,  0.9709, -1.9626, -1.1323, -1.1149])

In [40]:
user_factors[3]

tensor([ 0.5634,  0.9709, -1.9626, -1.1323, -1.1149])

#Base line model

In [41]:
class DotProduct(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors):
    super().__init__()
    self.user_factors = c.Embedding(n_users, n_factors)
    self.movie_factors = c.Embedding(n_movies, n_factors)

  def forward(self, x):
    users = self.user_factors(x[:, 0])
    movies = self.movie_factors(x[:, 1])

    return (users*movies).sum(dim=1)

In [43]:
dls.one_batch()[0].shape, dls.one_batch()[1].shape

(torch.Size([32, 2]), torch.Size([32, 1]))

In [46]:
model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())

In [47]:
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.337588,1.2967,00:15
1,1.166774,1.152099,00:15
2,0.98063,1.0084,00:15
3,0.813815,0.883063,00:15
4,0.771524,0.860081,00:16


# Adding sigmoid range

In [52]:
class DotProduct(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
    super().__init__()
    self.user_factors = c.Embedding(n_users, n_factors)
    self.movie_factors = c.Embedding(n_movies, n_factors)
    self.y_range = y_range

  def forward(self, x):
    users = self.user_factors(x[:, 0])
    movies = self.movie_factors(x[:, 1])

    return c.sigmoid_range((users*movies).sum(dim=1), *self.y_range)

In [53]:
model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.97155,0.99831,00:16
1,0.912782,0.915975,00:15
2,0.711988,0.88065,00:15
3,0.483929,0.893834,00:15
4,0.302825,0.899002,00:15


#Adding bias parameter

In [56]:
class DotProduct(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
    super().__init__()
    self.user_factors = c.Embedding(n_users, n_factors)
    self.user_bias = c.Embedding(n_users, 1)

    self.movie_factors = c.Embedding(n_movies, n_factors)
    self.movie_bias = c.Embedding(n_movies, 1)

    self.y_range = y_range

  def forward(self, x):
    users = self.user_factors(x[:, 0])
    movies = self.movie_factors(x[:, 1])

    out = (users*movies).sum(dim=1, keepdim=True)
    out += self.user_bias(x[:, 0]) + self.movie_bias(x[:, 1])

    return c.sigmoid_range(out, *self.y_range)

In [57]:
model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.948573,0.921277,00:16
1,0.84692,0.869136,00:16
2,0.599078,0.884865,00:16
3,0.392103,0.912695,00:16
4,0.246958,0.920051,00:16


In [58]:
model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.95487,0.952425,00:17
1,0.888971,0.890508,00:16
2,0.819089,0.842323,00:16
3,0.678796,0.809745,00:16
4,0.601053,0.80786,00:16


## Embedding layer from scratch

In [59]:
def create_params(size): return torch.nn.Parameter(torch.zeros(*size).normal_(0, 0.01))

In [60]:
create_params((3, 4))

Parameter containing:
tensor([[ 0.0077, -0.0057,  0.0016,  0.0074],
        [ 0.0218,  0.0032, -0.0133, -0.0081],
        [-0.0032,  0.0034,  0.0105, -0.0097]], requires_grad=True)

In [65]:
class DotProduct(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
    super().__init__()
    self.user_factors = create_params((n_users, n_factors))
    self.user_bias = create_params((n_users, 1))

    self.movie_factors = create_params((n_movies, n_factors))
    self.movie_bias = create_params((n_movies, 1))

    self.y_range = y_range

  def forward(self, x):
    users = self.user_factors[x[:, 0]]
    movies = self.movie_factors[x[:, 1]]

    out = (users*movies).sum(dim=1, keepdim=True)
    out += self.user_bias[x[:, 0]] + self.movie_bias[x[:, 1]]

    return c.sigmoid_range(out, *self.y_range)

In [63]:
for p in DotProduct(3, 4, 5).parameters():
  print(p.shape)

torch.Size([3, 5])
torch.Size([3, 1])
torch.Size([4, 5])
torch.Size([4, 1])


In [66]:
model = DotProduct(n_users, n_mov, 50)
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.947946,0.953556,00:18
1,0.910811,0.886745,00:18
2,0.809086,0.838994,00:18
3,0.693406,0.81069,00:18
4,0.567613,0.807482,00:18


In [95]:
b = learn.model.movie_bias.squeeze()
idx = b.argsort()[:5]

[dls.classes['title'][i] for i in idx]

['Children of the Corn: The Gathering (1996)',
 'Mortal Kombat: Annihilation (1997)',
 'Robocop 3 (1993)',
 'Cable Guy, The (1996)',
 'Barb Wire (1996)']

In [96]:
b = learn.model.movie_bias.squeeze()
idx = b.argsort(descending=True)[:5]

[dls.classes['title'][i] for i in idx]

['Titanic (1997)',
 "Schindler's List (1993)",
 'Shawshank Redemption, The (1994)',
 'Star Wars (1977)',
 'Silence of the Lambs, The (1991)']

# Training using fastai collab_learner

In [252]:
learn = c.collab_learner(dls, n_factors=50, y_range=(0, 5.5))

In [118]:
learn.model, learn.loss_func

(EmbeddingDotBias(
   (u_weight): Embedding(944, 50)
   (i_weight): Embedding(1665, 50)
   (u_bias): Embedding(944, 1)
   (i_bias): Embedding(1665, 1)
 ), FlattenedLoss of MSELoss())

In [119]:
learn.fit_one_cycle(5, 5e-5, wd=0.1)

epoch,train_loss,valid_loss,time
0,1.827498,1.841038,00:16
1,1.801524,1.76594,00:16
2,1.667629,1.707044,00:16
3,1.651244,1.676496,00:16
4,1.657034,1.671289,00:16


In [129]:
dls.classes['title'].o2i['2 Days in the Valley (1996)']

6

In [142]:
(torch.nn.CosineSimilarity(dim=1)(learn.model.i_weight.weight[6][None], learn.model.i_weight.weight)).argsort(descending=True)[1]

tensor(686, device='cuda:0')

In [143]:
dls.classes['title'][686]

'Highlander (1986)'

## Using deep learning

In [161]:
class CollabNN(c.Module):
  def __init__(self, user_sz, item_sz, n_act=100, y_range=(0, 5.5)):
    super().__init__()
    self.user_factors = c.Embedding(*user_sz)
    self.movie_factors = c.Embedding(*item_sz)
    self.layers = torch.nn.Sequential(
        torch.nn.Linear(user_sz[1]+item_sz[1], n_act),
        torch.nn.ReLU(),
        torch.nn.Linear(n_act, 1)
    )

    self.y_range = y_range

  def forward(self, x):
    embs = self.user_factors(x[:, 0]), self.movie_factors(x[:, 1])
    x = self.layers(torch.cat(embs, dim=1))

    return c.sigmoid_range(x, *self.y_range)

In [179]:
sz = c.get_emb_sz(dls)
sz

[(944, 74), (1665, 102)]

In [163]:
model = CollabNN(*sz)

In [165]:
learn = c.Learner(dls, model, c.MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)

epoch,train_loss,valid_loss,time
0,0.926589,0.961286,00:18
1,0.882654,0.89912,00:18
2,0.863753,0.866934,00:18
3,0.853089,0.856041,00:18
4,0.760599,0.85861,00:18


In [194]:
learn = c.collab_learner(dls, use_nn=True, layers=[10, 50], y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,1.015441,1.018047,00:20
1,0.956647,0.917708,00:20
2,0.945269,0.898382,00:20
3,0.876454,0.859673,00:20
4,0.806192,0.85662,00:20


In [195]:
learn.model

EmbeddingNN(
  (embeds): ModuleList(
    (0): Embedding(944, 74)
    (1): Embedding(1665, 102)
  )
  (emb_drop): Dropout(p=0.0, inplace=False)
  (bn_cont): BatchNorm1d(0, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): LinBnDrop(
      (0): BatchNorm1d(176, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Linear(in_features=176, out_features=10, bias=False)
      (2): ReLU(inplace=True)
    )
    (1): LinBnDrop(
      (0): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Linear(in_features=10, out_features=50, bias=False)
      (2): ReLU(inplace=True)
    )
    (2): LinBnDrop(
      (0): Linear(in_features=50, out_features=1, bias=True)
    )
    (3): SigmoidRange(low=0, high=5.5)
  )
)

In [196]:
CollabNN((944, 74), (1665, 102))

CollabNN(
  (user_factors): Embedding(944, 74)
  (movie_factors): Embedding(1665, 102)
  (layers): Sequential(
    (0): Linear(in_features=176, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=1, bias=True)
  )
)

In [None]:
t.delegates

In [245]:
@t.delegates(t.TabularModel)
class EmbeddingNN(t.TabularModel):
  def __init__(self, emb_sz, layers, **kwargs):
    super().__init__(emb_sz, layers=layers, n_cont=0, out_sz=1, **kwargs)

### kwargs && args

kwargs

In [230]:
def s(a, **kwargs): 
  return kwargs

In [231]:
s(2, s='dhdj', j=3)

{'j': 3, 's': 'dhdj'}

args

In [248]:
def a(b, *size): return b, size

In [251]:
a(2, 3, 'klk', 5)

(2, (3, 'klk', 5))

In [253]:
learn.show_results()

Unnamed: 0,user,title,rating,rating_pred
0,662,1296,5,2.758691
1,289,745,3,2.754418
2,588,861,4,2.764377
3,872,1211,4,2.735981
4,668,714,3,2.744478
5,513,379,5,2.747952
6,919,294,3,2.713059
7,838,132,3,2.776652
8,6,1078,4,2.777724
