In [1]:
!pip install fastai --upgrade
!pip install nbdev
!pip install azure-cognitiveservices-search-imagesearch
from utils import *

Collecting fastai
[?25l  Downloading https://files.pythonhosted.org/packages/d4/cf/9140964d3544d904cc718e519e1632a8a42e1eaaf2fafdaaa14716c42336/fastai-2.0.10-py3-none-any.whl (354kB)
[K     |█                               | 10kB 22.7MB/s eta 0:00:01[K     |█▉                              | 20kB 3.0MB/s eta 0:00:01[K     |██▊                             | 30kB 4.0MB/s eta 0:00:01[K     |███▊                            | 40kB 4.3MB/s eta 0:00:01[K     |████▋                           | 51kB 3.4MB/s eta 0:00:01[K     |█████▌                          | 61kB 3.9MB/s eta 0:00:01[K     |██████▌                         | 71kB 4.3MB/s eta 0:00:01[K     |███████▍                        | 81kB 4.5MB/s eta 0:00:01[K     |████████▎                       | 92kB 4.9MB/s eta 0:00:01[K     |█████████▎                      | 102kB 4.8MB/s eta 0:00:01[K     |██████████▏                     | 112kB 4.8MB/s eta 0:00:01[K     |███████████                     | 122kB 4.8MB/s eta 0:0

In [2]:
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)

In [3]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user', 'movie', 'rating', 'timestamp'])

In [4]:
# science fiction, action, old movies
last_skywalker = np.array([0.98, 0.9, -0.9])

In [5]:
user1 = np.array([0.9, 0.8, -0.6])

In [6]:
np.dot(last_skywalker, user1)

2.1420000000000003

In [7]:
# Dot Product
(user1*last_skywalker).sum()

2.1420000000000003

In [8]:
casablanca = np.array([-0.99, -0.3, 0.8])

In [9]:
# Match between user1 and casablanca
user1.dot(casablanca)

-1.611

We don't know the latent factors.

## Creating the DataLoaders

In [10]:
movies = pd.read_csv(path/'u.item', delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie', 'title'), header=None)
#movies.head()

In [11]:
# Merge with ratings
ratings = ratings.merge(movies)
#ratings.head()

In [12]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,542,My Left Foot (1989),4
1,422,Event Horizon (1997),3
2,311,"African Queen, The (1951)",4
3,595,Face/Off (1997),4
4,617,Evil Dead II (1987),1
5,158,Jurassic Park (1993),5
6,836,Chasing Amy (1997),3
7,474,Emma (1996),3
8,466,Jackie Chan's First Strike (1996),3
9,554,Scream (1996),3


In [13]:
dls.classes

{'title': (#1665) ['#na#',"'Til There Was You (1997)",'1-900 (1994)','101 Dalmatians (1996)','12 Angry Men (1957)','187 (1997)','2 Days in the Valley (1996)','20,000 Leagues Under the Sea (1954)','2001: A Space Odyssey (1968)','3 Ninjas: High Noon At Mega Mountain (1998)'...],
 'user': (#944) ['#na#',1,2,3,4,5,6,7,8,9...]}

In [14]:
n_users = len(dls.classes['user']) # num user classes
n_movies = len(dls.classes['title']) # num title classes
n_factors = 5

# randomly generated latent factors
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)

# We can represent look up in an index as a matrix product!
one_hot_3 = one_hot(3, n_users).float()
user_factors.t() @ one_hot_3

tensor([-0.4586, -0.9915, -0.4052, -0.3621, -0.5908])

In [15]:
user_factors[3]

tensor([-0.4586, -0.9915, -0.4052, -0.3621, -0.5908])

In [16]:
one_hot_3

tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,


In [17]:
len(one_hot_3)

944

In [18]:
user_factors[3]

tensor([-0.4586, -0.9915, -0.4052, -0.3621, -0.5908])

## Questions and Answers
* We are not doing working with sparse data in collaborative filtering.
* In practice, we tune the number of latent factors.

## Collaborative Filtering from Scratch

In [19]:
class Example:
  def __init__(self, a):
    self.a = a

  def say(self, x):
    return f'Hello {self.a}, {x}' 

In [20]:
ex = Example('Sylvain')
ex.say('nice to meet you')

'Hello Sylvain, nice to meet you'

In [21]:
class DotProduct(Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
    self.user_factors = Embedding(n_users, n_factors)
    self.movie_factors = Embedding(n_movies, n_factors)
    self.y_range = y_range

  def forward(self, x):
    users = self.user_factors(x[:,0])
    movies = self.movie_factors(x[:,1])
    return sigmoid_range((users * movies).sum(dim=1), *self.y_range)

In [22]:
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.005721,0.999734,00:08
1,0.885945,0.905953,00:08
2,0.693833,0.876002,00:08
3,0.484503,0.874067,00:08
4,0.369077,0.877741,00:08


In [23]:
class DotProductBias(Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
    self.user_factors = Embedding(n_users, n_factors)
    self.user_bias = Embedding(n_users, 1)
    self.movie_factors = Embedding(n_movies, n_factors)
    self.movie_bias = Embedding(n_movies, 1)
    self.y_range = y_range

  def forward(self, x):
    users = self.user_factors(x[:,0])
    movies = self.movie_factors(x[:,1])
    res = (users * movies).sum(dim=1, keepdim=True)
    res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
    return sigmoid_range(res, *self.y_range)

In [24]:
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat()) # generic learner
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.928226,0.941926,00:09
1,0.821354,0.864699,00:09
2,0.61655,0.869891,00:09
3,0.410764,0.890642,00:09
4,0.292861,0.897089,00:09


In [25]:
x, y = dls.one_batch()
x.shape, y.shape

(torch.Size([64, 2]), torch.Size([64, 1]))

## Weight Decay

In [26]:
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.935395,0.949589,00:09
1,0.877538,0.875935,00:09
2,0.716325,0.835029,00:09
3,0.603237,0.822503,00:09
4,0.476814,0.823543,00:09


In [27]:
class T(Module):
  def __init__(self):
    self.a = torch.ones(3)

In [28]:
L(T().parameters())

(#0) []

In [30]:
class T(Module):
  def __init__(self):
    self.a = nn.Parameter(torch.ones(3))

L(T().parameters())

(#1) [Parameter containing:
tensor([1., 1., 1.], requires_grad=True)]

In [31]:
class T(Module):
  def __init__(self):
    self.a = nn.Linear(1, 3, bias=False)

t = T()
L(t.parameters())

(#1) [Parameter containing:
tensor([[-0.1875],
        [ 0.0791],
        [-0.3082]], requires_grad=True)]

In [32]:
type(t.a.weight)

torch.nn.parameter.Parameter