###Looking at Collabrative filtering (Recomender systems)

In [1]:
!pip install fastai -U

Collecting fastai
[?25l  Downloading https://files.pythonhosted.org/packages/d4/cf/9140964d3544d904cc718e519e1632a8a42e1eaaf2fafdaaa14716c42336/fastai-2.0.10-py3-none-any.whl (354kB)
[K     |█                               | 10kB 7.2MB/s eta 0:00:01[K     |█▉                              | 20kB 1.5MB/s eta 0:00:01[K     |██▊                             | 30kB 2.0MB/s eta 0:00:01[K     |███▊                            | 40kB 1.6MB/s eta 0:00:01[K     |████▋                           | 51kB 1.9MB/s eta 0:00:01[K     |█████▌                          | 61kB 2.2MB/s eta 0:00:01[K     |██████▌                         | 71kB 2.4MB/s eta 0:00:01[K     |███████▍                        | 81kB 2.6MB/s eta 0:00:01[K     |████████▎                       | 92kB 2.8MB/s eta 0:00:01[K     |█████████▎                      | 102kB 2.7MB/s eta 0:00:01[K     |██████████▏                     | 112kB 2.7MB/s eta 0:00:01[K     |███████████                     | 122kB 2.7MB/s eta 0:00

In [2]:
from fastai.collab import *

In [3]:
from fastai.vision.all import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)

In [4]:
ratings = pd.read_csv(path / 'u.data', sep="\t", header=None)
ratings.columns = ["user", 'movie', 'rating', 'timestamp']
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
movies = pd.read_csv(path / 'u.item', delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie', 'title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [6]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [7]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,342,Sunset Blvd. (1950),5
1,621,Cliffhanger (1993),2
2,637,Fargo (1996),4
3,104,"Very Brady Sequel, A (1996)",3
4,49,Sleepers (1996),4
5,94,"Man Without a Face, The (1993)",3
6,62,"Professional, The (1994)",5
7,269,Pulp Fiction (1994),5
8,267,Star Trek V: The Final Frontier (1989),2
9,771,Sleepless in Seattle (1993),4


In [8]:
dls.classes

{'title': (#1665) ['#na#',"'Til There Was You (1997)",'1-900 (1994)','101 Dalmatians (1996)','12 Angry Men (1957)','187 (1997)','2 Days in the Valley (1996)','20,000 Leagues Under the Sea (1954)','2001: A Space Odyssey (1968)','3 Ninjas: High Noon At Mega Mountain (1998)'...],
 'user': (#944) ['#na#',1,2,3,4,5,6,7,8,9...]}

In [9]:
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])

user_factors = torch.randn((n_users,5))
movie_factors = torch.randn((n_movies,5))
user_factors.shape, movie_factors.shape

(torch.Size([944, 5]), torch.Size([1665, 5]))

In [10]:
### Using One hot Encoded Vector as an index
one_hot_3 = one_hot(3, n_movies).float()
one_hot_3

tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [11]:
movie_factors.T @ (one_hot_3) 

tensor([ 0.0885,  0.2509,  0.1606, -0.1559,  0.4368])

In [12]:
movie_factors[3]

tensor([ 0.0885,  0.2509,  0.1606, -0.1559,  0.4368])

### Embedding layer optimizes indexing 

In [13]:
class DotProduct(Module):
    def __init__(self, n_users, n_movies, n_factors):
        self.user_factors = Embedding(n_users, n_factors)
        self.movie_factors = Embedding(n_movies, n_factors)

    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        return (users * movies).sum(dim=1)

In [14]:
x,y = dls.one_batch()
x.shape

torch.Size([64, 2])

In [15]:
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())

In [16]:
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.338458,1.293732,00:09
1,1.103731,1.0933,00:09
2,0.933753,0.974164,00:10
3,0.83799,0.882733,00:09
4,0.783526,0.866786,00:10


In [17]:
#Model without the bias term
class DotProduct(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
        self.user_factors = Embedding(n_users,n_factors)
        self.movie_factoes = Embedding(n_movies,n_factors)
        self.y_range = y_range
    
    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factoes(x[:,1])
        return sigmoid_range((users * movies).sum(dim=1), *self.y_range)

In [18]:
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.998785,0.984357,00:10
1,0.849171,0.887387,00:10
2,0.668018,0.855017,00:10
3,0.477904,0.858316,00:10
4,0.370119,0.8625,00:10


In [19]:
#Model with the bias
class DotProduct(Module):
    def __init__(self, n_movies, n_users, n_factors, y_range=(0,5.5)):
        self.user_factors = Embedding(n_users, n_factors)
        self.movie_factors = Embedding(n_movies, n_factors)
        self.user_bias = Embedding(n_users, 1)
        self.movie_bias = Embedding(n_movies, 1)
        self.y_range = y_range

    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        res = (users * movies).sum(dim=1, keepdim=True)
        res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
        return sigmoid_range(res, *self.y_range)

In [20]:
# model = DotProduct(n_movies, n_users, 50)
# learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(6, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.463546,0.876988,00:10
1,0.591635,0.880581,00:10
2,0.544066,0.882186,00:09
3,0.444556,0.871387,00:10
4,0.350507,0.869312,00:10
5,0.275518,0.870386,00:10


In [21]:
## Creating an embedding module
class T(Module):
    def __init__(self): self.a = torch.ones(3)

L(T().parameters())

(#0) []

In [22]:
##Wrapping the initialization with nn.Parameter so it starts keeping track of the gradients

class T(Module):
    def __init__(self): self.a = nn.Parameter(torch.ones(3))

L(T().parameters())

(#1) [Parameter containing:
tensor([1., 1., 1.], requires_grad=True)]

In [23]:
class T(Module):
    def __init__(self): self.a = nn.Linear(1, 3, bias=False)

t = T()
L(t.parameters())

(#1) [Parameter containing:
tensor([[ 0.1552],
        [-0.5753],
        [ 0.7817]], requires_grad=True)]

In [24]:
type(t.a.weight)

torch.nn.parameter.Parameter

In [25]:
# Creating a tensor parameter with random initialization
def create_params(size):
    return nn.Parameter(torch.zeros(*size).normal_(0,0.01))

In [26]:
class DotProductBias(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
        self.user_factors = create_params((n_users, n_factors))
        self.movie_factors = create_params((n_movies, n_factors))
        self.user_bias = create_params((n_users, 1))
        self.movie_bias = create_params((n_movies, 1))
        self.y_range = y_range
    
    def forward(self, x):
        users = self.user_factors[x[:,0]]
        movies = self.movie_factors[x[:,1]]
        res = (users * movies).sum(dim=1, keepdim=True, )
        res += self.user_bias[x[:,0]] + self.movie_bias[x[:,1]]
        return sigmoid_range(res, *self.y_range)

In [27]:
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.944233,0.935205,00:10
1,0.846389,0.864526,00:10
2,0.719292,0.816521,00:10
3,0.55958,0.808811,00:10
4,0.48202,0.809555,00:10


In [28]:
## Model imterpretation: Movies with highest bias 
movies_bias = learn.model.movie_bias.squeeze()
idxs = movies_bias.argsort()[-5:]
[dls.classes['title'][i] for i in idxs]

["Schindler's List (1993)",
 'Shawshank Redemption, The (1994)',
 'Silence of the Lambs, The (1991)',
 'Star Wars (1977)',
 'Titanic (1997)']

In [29]:
##Movies with lowest bias
idxs = movies_bias.argsort(descending=True)[-5:]
[dls.classes['title'][i] for i in idxs]

['Big Bully (1996)',
 'Crow: City of Angels, The (1996)',
 'Mortal Kombat: Annihilation (1997)',
 'Lawnmower Man 2: Beyond Cyberspace (1996)',
 'Children of the Corn: The Gathering (1996)']

In [30]:
##Using Fastai API
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.949048,0.9408,00:11
1,0.861725,0.864549,00:11
2,0.730251,0.822154,00:11
3,0.592759,0.807585,00:10
4,0.479891,0.808877,00:10


In [31]:
learn.model

EmbeddingDotBias(
  (u_weight): Embedding(944, 50)
  (i_weight): Embedding(1665, 50)
  (u_bias): Embedding(944, 1)
  (i_bias): Embedding(1665, 1)
)

In [32]:
movie_bias = learn.model.i_bias.weight.squeeze()
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes["title"][i] for i in idxs]

['Titanic (1997)',
 'Silence of the Lambs, The (1991)',
 'Star Wars (1977)',
 "Schindler's List (1993)",
 'Apt Pupil (1998)']

In [33]:
dls.classes['title'].o2i['Silence of the Lambs, The (1991)']

1330

In [34]:
##Using Deep-Learning

class CollabNN(Module):
    def __init__(self, user_sz, item_sz, y_range=(0,5.5), n_act=100):
        self.user_factors = Embedding(*user_sz)
        self.item_factors = Embedding(*item_sz)
        self.layers = nn.Sequential(
            nn.Linear(user_sz[1]+item_sz[1], n_act),
            nn.ReLU(),
            nn.Linear(n_act, 1))
        self.y_range = y_range

    def forward(self, x):
        embs = torch.cat((self.user_factors(x[:,0]), 
                          self.item_factors(x[:,1])), dim=1)
        x = self.layers(embs)
        return sigmoid_range(x, *self.y_range)

In [35]:
embs = get_emb_sz(dls)
embs

[(944, 74), (1665, 102)]

In [36]:
model = CollabNN(*embs)

In [37]:
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.911341,0.942013,00:12
1,0.917705,0.897171,00:12
2,0.871006,0.873613,00:12
3,0.847162,0.853907,00:12
4,0.789134,0.853683,00:12


In [38]:
## Using collab learning's api that uses NN
learn = collab_learner(dls, use_nn=True, y_range=(0,5.5), layers=[100,50])
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,1.013563,0.978419,00:16
1,0.908022,0.921574,00:16
2,0.865575,0.866939,00:16
3,0.810551,0.847082,00:16
4,0.763115,0.844307,00:16


In [39]:
model.user_factors(x[:,0])[0]

tensor([ 0.0521, -0.0996,  0.0163, -0.1211,  0.0241,  0.1465, -0.0387,  0.1354,
         0.0169, -0.1420,  0.0160, -0.0363,  0.0066, -0.1307, -0.1368,  0.0781,
        -0.0054,  0.1374,  0.0186,  0.0923, -0.0214, -0.0169, -0.0586,  0.0586,
         0.0799, -0.1191, -0.1391, -0.1136, -0.1771, -0.0085,  0.0401, -0.1357,
         0.0278,  0.0155, -0.0663, -0.1660,  0.0031, -0.0199,  0.0302,  0.0175,
         0.0751,  0.0398, -0.0285,  0.1679,  0.0149,  0.0497,  0.0189, -0.0066,
         0.1947,  0.0244, -0.1302,  0.0002, -0.1316, -0.1412, -0.1425, -0.0863,
        -0.0762, -0.0787, -0.1312,  0.0654,  0.1144, -0.1381,  0.1349, -0.0229,
        -0.0528,  0.0698, -0.1525, -0.1419,  0.1613,  0.1138,  0.1091, -0.0173,
        -0.1498,  0.0099], grad_fn=<SelectBackward>)

In [40]:
model.user_factors(x[:,0])[0]

tensor([ 0.0521, -0.0996,  0.0163, -0.1211,  0.0241,  0.1465, -0.0387,  0.1354,
         0.0169, -0.1420,  0.0160, -0.0363,  0.0066, -0.1307, -0.1368,  0.0781,
        -0.0054,  0.1374,  0.0186,  0.0923, -0.0214, -0.0169, -0.0586,  0.0586,
         0.0799, -0.1191, -0.1391, -0.1136, -0.1771, -0.0085,  0.0401, -0.1357,
         0.0278,  0.0155, -0.0663, -0.1660,  0.0031, -0.0199,  0.0302,  0.0175,
         0.0751,  0.0398, -0.0285,  0.1679,  0.0149,  0.0497,  0.0189, -0.0066,
         0.1947,  0.0244, -0.1302,  0.0002, -0.1316, -0.1412, -0.1425, -0.0863,
        -0.0762, -0.0787, -0.1312,  0.0654,  0.1144, -0.1381,  0.1349, -0.0229,
        -0.0528,  0.0698, -0.1525, -0.1419,  0.1613,  0.1138,  0.1091, -0.0173,
        -0.1498,  0.0099], grad_fn=<SelectBackward>)

In [41]:
nn.Linear??