In [1]:
from fastai.collab import *
from fastai.tabular.all import *

In [2]:
path = untar_data(URLs.ML_100k)

In [3]:
path.ls()

(#23) [Path('/home/anasys/.fastai/data/ml-100k/u4.test'),Path('/home/anasys/.fastai/data/ml-100k/u5.base'),Path('/home/anasys/.fastai/data/ml-100k/u3.base'),Path('/home/anasys/.fastai/data/ml-100k/u.info'),Path('/home/anasys/.fastai/data/ml-100k/u.data'),Path('/home/anasys/.fastai/data/ml-100k/ub.test'),Path('/home/anasys/.fastai/data/ml-100k/u.genre'),Path('/home/anasys/.fastai/data/ml-100k/allbut.pl'),Path('/home/anasys/.fastai/data/ml-100k/u1.base'),Path('/home/anasys/.fastai/data/ml-100k/u3.test')...]

In [4]:
ratings = pd.read_csv(path / 'u.data',
                      delimiter='\t',
                      header=None,
                      names=['user', 'movie', 'rating', 'timestamp'])

In [5]:
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [8]:
# science-fiction, action, old movies
last_skywalker = np.array([0.98, 0.9, -0.9])
casablanca = np.array([-0.99, -0.3, 0.8])

In [9]:
user1 = np.array([0.9, 0.8, -0.6])

In [10]:
(user1 * last_skywalker).sum()

2.1420000000000003

In [11]:
(user1 * casablanca).sum()

-1.611

In [12]:
movies = pd.read_csv(path / 'u.item',
                     delimiter='|',
                     encoding='latin-1',
                     usecols=(0, 1),
                     names=('movie', 'title'),
                     header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [13]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [14]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,592,"Fast, Cheap & Out of Control (1997)",5
1,773,Last Man Standing (1996),1
2,796,"Sting, The (1973)",4
3,553,It Happened One Night (1934),5
4,758,Bananas (1971),4
5,269,Until the End of the World (Bis ans Ende der Welt) (1991),3
6,321,"Remains of the Day, The (1993)",4
7,590,Smilla's Sense of Snow (1997),3
8,635,Spawn (1997),1
9,753,"Bridge on the River Kwai, The (1957)",5


In [15]:
n_users = len(dls.classes['user'])
n_users

944

In [16]:
n_movies = len(dls.classes['title'])
n_movies

1665

In [17]:
n_factors = 5

In [18]:
user_factors = torch.randn(n_users, n_factors)
user_factors.shape

torch.Size([944, 5])

In [34]:
movie_factors = torch.randn(n_movies, n_factors)
movie_factors.shape

torch.Size([1665, 5])

In [44]:
one_hot_3 = one_hot(3, n_users).float()
user_factors.t() @ one_hot_3

tensor([ 0.4899, -0.2355, -0.0463,  0.9462,  0.5654])

In [46]:
user_factors.shape, one_hot_3.shape

(torch.Size([944, 5]), torch.Size([944]))

In [41]:
user_factors[3]

tensor([ 0.4899, -0.2355, -0.0463,  0.9462,  0.5654])

In [48]:
class Example:
    def __init__(self, a):
        self.a = a
    
    def say(self, x):
        return f'Hello {self.a}, {x}.'

In [49]:
ex = Example('Sylvan')
ex.say('nice to meet you')

'Hello Sylvan, nice to meet you.'

In [81]:
class DotProduct(Module):
    def __init__(self, n_users, n_movies, n_factors):
        self.user_factors = Embedding(n_users, n_factors)
        self.movie_factors = Embedding(n_movies, n_factors)
    
    def forward(self, x):
        # x[batch, 0] => user_id
        # x[batch, 1] => movie_id
        users = self.user_factors(x[:, 0])
        movies = self.movie_factors(x[:, 1])
        return (users * movies).sum(dim=1)

In [82]:
x, y = dls.one_batch()
x.shape, y.shape

(torch.Size([64, 2]), torch.Size([64, 1]))

In [83]:
x[:, 0].shape

torch.Size([64])

In [84]:
x[:, 1].shape

torch.Size([64])

In [85]:
user_factors = Embedding(n_users, n_factors)
users = user_factors(to_cpu(x[:, 0]))
users.shape

torch.Size([64, 5])

In [86]:
movie_factors = Embedding(n_movies, n_factors)
movies = movie_factors(to_cpu(x[:, 1]))
movies.shape

torch.Size([64, 5])

In [87]:
(users * movies).sum(dim=1).shape

torch.Size([64])

In [88]:
model = DotProduct(n_users, n_movies, 50)
model

DotProduct(
  (user_factors): Embedding(944, 50)
  (movie_factors): Embedding(1665, 50)
)

In [89]:
learn = Learner(dls, model, loss_func=MSELossFlat())

In [90]:
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.348482,1.292598,00:06
1,1.071086,1.102618,00:06
2,0.950296,0.965836,00:06
3,0.84071,0.874548,00:06
4,0.799524,0.858594,00:06


In [91]:
class DotProduct(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
        self.user_factors = Embedding(n_users, n_factors)
        self.movie_factors = Embedding(n_movies, n_factors)
        self.y_range = y_range
    
    def forward(self, x):
        # x[batch, 0] => user_id
        # x[batch, 1] => movie_id
        users = self.user_factors(x[:, 0])
        movies = self.movie_factors(x[:, 1])
        return sigmoid_range((users * movies).sum(dim=1), *self.y_range)

In [92]:
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.980007,0.991965,00:06
1,0.851325,0.892387,00:06
2,0.671646,0.860641,00:06
3,0.48319,0.865426,00:06
4,0.376567,0.869997,00:06


In [93]:
class DotProduct(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
        self.user_factors = Embedding(n_users, n_factors)
        self.user_bias = Embedding(n_users, 1)

        self.movie_factors = Embedding(n_movies, n_factors)
        self.movie_bias = Embedding(n_movies, 1)

        self.y_range = y_range
    
    def forward(self, x):
        # x[batch, 0] => user_id
        # x[batch, 1] => movie_id
        users = self.user_factors(x[:, 0])
        movies = self.movie_factors(x[:, 1])
        res = (users * movies).sum(dim=1, keepdim=True)
        res += self.user_bias(x[:, 0]) + self.movie_bias(x[:, 1])
        return sigmoid_range(res, *self.y_range)

In [94]:
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.953001,0.931553,00:07
1,0.838029,0.849181,00:07
2,0.594066,0.850282,00:07
3,0.414609,0.873276,00:07
4,0.28705,0.879537,00:07


In [122]:
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.955867,0.940355,00:06
1,0.868366,0.864493,00:07
2,0.745591,0.820899,00:06
3,0.595089,0.80767,00:06
4,0.49497,0.808947,00:07


In [96]:
class T(Module):
    def __init__(self):
        self.a = torch.ones(3)

In [99]:
L(T().parameters())

(#0) []

In [100]:
class T(Module):
    def __init__(self):
        self.a = nn.Parameter(torch.ones(3))

In [101]:
L(T().parameters())

(#1) [Parameter containing:
tensor([1., 1., 1.], requires_grad=True)]

In [102]:
class T(Module):
    def __init__(self):
        self.a = nn.Linear(1, 3, bias=False)

In [104]:
t = T()
L(t.parameters())

(#1) [Parameter containing:
tensor([[-0.8586],
        [ 0.8295],
        [-0.7262]], requires_grad=True)]

In [105]:
type(t.a.weight)

torch.nn.parameter.Parameter

In [106]:
def create_params(size):
    return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))

In [110]:
create_params([3, 5]).shape

torch.Size([3, 5])

In [125]:
class DotProductBias(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
        self.user_factors = create_params([n_users, n_factors])
        self.user_bias = create_params([n_users])

        self.movie_factors = create_params([n_movies, n_factors])
        self.movie_bias = create_params([n_movies])

        self.y_range = y_range
    
    def forward(self, x):
        # x[batch, 0] => user_id
        # x[batch, 1] => movie_id
        users = self.user_factors[x[:, 0]]
        movies = self.movie_factors[x[:, 1]]
        res = (users * movies).sum(dim=1)
        res += self.user_bias[x[:, 0]] + self.movie_bias[x[:, 1]]                                                         
        return sigmoid_range(res, *self.y_range)

In [126]:
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.948911,0.932214,00:07
1,0.857541,0.861588,00:07
2,0.737865,0.824385,00:07
3,0.568469,0.812904,00:07
4,0.456174,0.813951,00:07


In [127]:
movie_bias = learn.model.movie_bias.squeeze()
movie_bias.shape

torch.Size([1665])

In [130]:
idxs = movie_bias.argsort()[:5]
idxs

tensor([ 295,  850, 1001,  561,  140], device='cuda:0')

In [131]:
movie_bias[idxs]

tensor([-0.3588, -0.3142, -0.2913, -0.2910, -0.2799], device='cuda:0',
       grad_fn=<IndexBackward>)

In [133]:
[dls.classes['title'][i] for i in idxs]

['Children of the Corn: The Gathering (1996)',
 'Lawnmower Man 2: Beyond Cyberspace (1996)',
 'Mortal Kombat: Annihilation (1997)',
 'Free Willy 3: The Rescue (1997)',
 'Beautician and the Beast, The (1997)']

In [134]:
idxs = movie_bias.argsort(descending=True)[:5]
idxs

tensor([1399, 1501,   99, 1282, 1318], device='cuda:0')

In [135]:
movie_bias[idxs]

tensor([0.5826, 0.5511, 0.5257, 0.5125, 0.4976], device='cuda:0',
       grad_fn=<IndexBackward>)

In [136]:
[dls.classes['title'][i] for i in idxs]

['Star Wars (1977)',
 'Titanic (1997)',
 'As Good As It Gets (1997)',
 "Schindler's List (1993)",
 'Shawshank Redemption, The (1994)']

## Using fastai.collab

In [19]:
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.961796,0.928058,00:07
1,0.866128,0.851206,00:07
2,0.722969,0.813654,00:08
3,0.593162,0.803783,00:07
4,0.469238,0.804604,00:07


In [20]:
learn.model

EmbeddingDotBias(
  (u_weight): Embedding(944, 50)
  (i_weight): Embedding(1665, 50)
  (u_bias): Embedding(944, 1)
  (i_bias): Embedding(1665, 1)
)

In [21]:
movie_bias = learn.model.i_bias.weight.squeeze()
movie_bias.shape

torch.Size([1665])

In [22]:
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i] for i in idxs]

['Titanic (1997)',
 "Schindler's List (1993)",
 'Star Wars (1977)',
 'Silence of the Lambs, The (1991)',
 'Shawshank Redemption, The (1994)']

In [23]:
movie_factors = learn.model.i_weight.weight
movie_factors.shape

torch.Size([1665, 50])

In [24]:
idx = dls.classes['title'].o2i['Silence of the Lambs, The (1991)']
idx

1330

In [25]:
movie_factors[idx][None].shape

torch.Size([1, 50])

In [26]:
distances = nn.CosineSimilarity(dim=1)(movie_factors, movie_factors[idx][None])
distances.shape

torch.Size([1665])

In [27]:
idxs = distances.argsort(descending=True)[:10]
[dls.classes['title'][i] for i in idxs]

['Silence of the Lambs, The (1991)',
 'Traveller (1997)',
 'As Good As It Gets (1997)',
 'Shawshank Redemption, The (1994)',
 'Glory (1989)',
 'Last Time I Saw Paris, The (1954)',
 'When We Were Kings (1996)',
 'Alice in Wonderland (1951)',
 'Casablanca (1942)',
 'Love in the Afternoon (1957)']

## Deep Learning for Collaborative Filtering

In [31]:
embs = get_emb_sz(dls)
embs

[(944, 74), (1665, 102)]

In [35]:
class CollabNN(Module):
    def __init__(self, user_sz, item_sz, y_range=(0, 5.5), n_act=100):
        self.user_factors = Embedding(*user_sz)
        self.item_factors = Embedding(*item_sz)
        self.layers = nn.Sequential(
            nn.Linear(user_sz[1] + item_sz[1], n_act),
            nn.ReLU(),
            nn.Linear(n_act, 1))
        self.y_range = y_range
    
    def forward(self, x):
        embs = self.user_factors(x[:, 0]), self.item_factors(x[:, 1])
        x = self.layers(torch.cat(embs, dim=1))
        return sigmoid_range(x, *self.y_range)

In [38]:
model = CollabNN(*embs)
model

CollabNN(
  (user_factors): Embedding(944, 74)
  (item_factors): Embedding(1665, 102)
  (layers): Sequential(
    (0): Linear(in_features=176, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=1, bias=True)
  )
)

In [41]:
batch = dls.one_batch()
batch[0].shape, batch[1].shape

(torch.Size([64, 2]), torch.Size([64, 1]))

In [44]:
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)

epoch,train_loss,valid_loss,time
0,0.918025,0.932091,00:08
1,0.877373,0.89366,00:08
2,0.858593,0.863332,00:08
3,0.811172,0.849933,00:08
4,0.785799,0.854178,00:08


In [46]:
learn = collab_learner(dls, use_nn=True, y_range=(0, 5.5), layers=[100, 50])
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.967553,0.959536,00:09
1,0.913775,0.917067,00:10
2,0.890464,0.864495,00:09
3,0.810445,0.844975,00:10
4,0.769348,0.846648,00:09
