In [1]:
DATA_PATH = "D:/github/AML_DS_1/data/collaborative-filtering/"

In [2]:
import pandas as pd
import numpy as np

In [310]:
test = pd.read_csv(DATA_PATH + 'test.csv')
train = pd.read_csv(DATA_PATH + 'train.csv')

In [7]:
ratings = train['rating'].values
userIds = train['userId'].values
itemIds = train['movieId'].values

1. 
Implement a basic Collaborative Filtering model. Implement coordinate descend with the manual update of the parameters (use of tools that calculate gradients automatically is not allowed). Preferably use numpy and scipy.sparse. (50 points)

In [8]:
from scipy.sparse import coo_matrix

n_users = np.max(userIds) + 1
n_items = np.max(itemIds) + 1

R = coo_matrix((ratings, (userIds, itemIds)), shape=(n_users, n_items))

In [319]:
n_features = 15

P = np.random.random((n_users,n_features))
Q = np.random.random((n_items,n_features))

lr = 160
l2 = 0.000001

#train loop
for e in range(250):
    P_tau = P[userIds,:]
    Q_tau = Q[itemIds,:]
    pred = np.sum(P_tau * Q_tau, axis = 1)#np.inner(P_tau, Q_tau)
    R_hat = coo_matrix((pred, (userIds, itemIds)), shape=(n_users, n_items))
    MSE = np.sum(np.square(ratings - pred))/ratings.shape[0] + l2 * (np.sum(np.square(Q)) + np.sum(np.square(P)))
    P -= lr * ((R_hat - R) @ Q)/ratings.shape[0] + l2 * np.square(P)
    Q -= lr * ((R_hat - R).T @ P)/ratings.shape[0] + l2 * np.square(Q)
    lr *= 0.998
    print(f"epoch {e}: MSE = {MSE}, lr = {lr}")

epoch 0: MSE = 2.475704440397803, lr = 159.68
epoch 1: MSE = 1.9950555246905843, lr = 159.36064000000002
epoch 2: MSE = 1.8442224802676357, lr = 159.04191872
epoch 3: MSE = 1.7595724546028126, lr = 158.72383488256
epoch 4: MSE = 1.7027402143219432, lr = 158.40638721279487
epoch 5: MSE = 1.6612659968485277, lr = 158.08957443836928
epoch 6: MSE = 1.6289896252283829, lr = 157.77339528949256
epoch 7: MSE = 1.603070558439236, lr = 157.45784849891356
epoch 8: MSE = 1.5814257759039845, lr = 157.14293280191575
epoch 9: MSE = 1.5631063898598156, lr = 156.8286469363119
epoch 10: MSE = 1.5471443504771238, lr = 156.51498964243928
epoch 11: MSE = 1.5331799485246147, lr = 156.20195966315438
epoch 12: MSE = 1.5206794770324605, lr = 155.88955574382808
epoch 13: MSE = 1.5095087181452385, lr = 155.57777663234043
epoch 14: MSE = 1.4993393487733457, lr = 155.26662107907575
epoch 15: MSE = 1.4901298303935901, lr = 154.9560878369176
epoch 16: MSE = 1.481660712275804, lr = 154.64617566124375
epoch 17: MSE = 

epoch 139: MSE = 1.3238198056132817, lr = 120.89149906048958
epoch 140: MSE = 1.3235484235617596, lr = 120.6497160623686
epoch 141: MSE = 1.3232791469511822, lr = 120.40841663024386
epoch 142: MSE = 1.3230119235028288, lr = 120.16759979698338
epoch 143: MSE = 1.32274670233552, lr = 119.92726459738941
epoch 144: MSE = 1.3224834339191984, lr = 119.68741006819464
epoch 145: MSE = 1.322222070030357, lr = 119.44803524805825
epoch 146: MSE = 1.3219625637092212, lr = 119.20913917756214
epoch 147: MSE = 1.3217048692186089, lr = 118.97072089920701
epoch 148: MSE = 1.321448942004403, lr = 118.7327794574086
epoch 149: MSE = 1.3211947386575535, lr = 118.49531389849378
epoch 150: MSE = 1.3209422168775442, lr = 118.25832327069679
epoch 151: MSE = 1.3206913354372731, lr = 118.0218066241554
epoch 152: MSE = 1.3204420541492685, lr = 117.78576301090709
epoch 153: MSE = 1.320194333833193, lr = 117.55019148488527
epoch 154: MSE = 1.319948136284581, lr = 117.3150911019155
epoch 155: MSE = 1.319703424244755

In [491]:
ratings_test = test['rating'].values
userIds_test = test['userId'].values
itemIds_test = test['movieId'].values
P_tau = P[userIds_test,:]
Q_tau = Q[itemIds_test,:]
pred = np.sum(P_tau * Q_tau, axis = 1)
MSE_test_MF = np.sum(np.square(ratings_test - pred))/ratings_test.shape[0]


MSE on test dataset = 0.7666636992135833


Implement a Deep Learning based Collaborative Filtering model. (20 points)

In [349]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import pandas as pd

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

batch_sz = 128

In [351]:
n_samples = len(ratings)

In [353]:
batches = []

#Create batches
for i in range(0, n_samples, batch_sz):
    limit =  min(i + batch_sz, n_samples)
    users_batch, movies_batch, rates_batch = userIds[i: limit], itemIds[i: limit], ratings[i: limit]
    batches.append((torch.tensor(users_batch, dtype=torch.long), torch.tensor(movies_batch, dtype=torch.long),
                  torch.tensor(rates_batch, dtype=torch.float)))

In [397]:
class RecommenderNet(nn.Module):
    def __init__(self, n_users, n_movies, n_factors=50, embedding_dropout=0.02, dropout_rate=0.2):
        super().__init__()

        self.u = nn.Embedding(n_users, n_factors)
        self.m = nn.Embedding(n_movies, n_factors)
        self.drop = nn.Dropout(embedding_dropout)
        self.hidden = nn.Sequential(nn.Linear(2*n_factors, n_factors*4),
                                nn.ReLU(),
                                nn.Dropout(0.15),
                                nn.Linear(n_factors*4, 2*n_factors),
                                nn.ReLU())#,
#                                 nn.Dropout(0.15),
#                                 nn.Linear(150, n_factors*2),
#                                 nn.ReLU(),
#                                 nn.Dropout(0.15))
        self.fc = nn.Linear(n_factors*2, 1)
        self._init()

    def forward(self, users, movies, minmax=[1,5]):
        features = torch.cat([self.u(users), self.m(movies)], dim=1)
        x = self.drop(features)
        x = self.hidden(x)
        out = torch.sigmoid(self.fc(x))

        if minmax is not None: #Scale the output to [1,5]
            min_rating, max_rating = minmax
            out = (max_rating - min_rating)*out + min_rating
        return out

    def _init(self):
        """
        Initialize embeddings and hidden layers weights with xavier.
        """
        def init(m):
            if type(m) == nn.Linear:
                torch.nn.init.xavier_uniform_(m.weight)
                m.bias.data.fill_(0.01)

        self.u.weight.data.uniform_(-0.05, 0.05)
        self.m.weight.data.uniform_(-0.05, 0.05)
        self.hidden.apply(init)
        init(self.fc)

In [401]:
net = RecommenderNet(n_factors = 20, n_users=n_users, n_movies=n_items).to(device)
net

RecommenderNet(
  (u): Embedding(6744, 20)
  (m): Embedding(118697, 20)
  (drop): Dropout(p=0.02, inplace=False)
  (hidden): Sequential(
    (0): Linear(in_features=40, out_features=80, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=80, out_features=40, bias=True)
    (4): ReLU()
  )
  (fc): Linear(in_features=40, out_features=1, bias=True)
)

In [402]:
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(net.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.3, patience=2)

In [403]:
epochs = 20

for epoch in range(epochs):
    train_loss = 0
    c = 0
    for users_batch, movies_batch, rates_batch in batches:
        net.zero_grad()
        out = net(users_batch.to(device), movies_batch.to(device), [1, 5]).squeeze()
        loss = criterion(rates_batch.to(device), out)

        loss.backward()
        optimizer.step()
        train_loss += loss
        
        c += 1
    scheduler.step(loss)
    print("Loss at epoch {} = {}".format(epoch, train_loss/c))
#     print("Last Loss = {}".format(loss.item()))

Loss at epoch 0 = 0.8992513418197632
Loss at epoch 1 = 0.7728310823440552
Loss at epoch 2 = 0.7299784421920776
Loss at epoch 3 = 0.7046374678611755
Loss at epoch 4 = 0.6911947131156921
Loss at epoch 5 = 0.6751245856285095
Loss at epoch 6 = 0.6543981432914734
Loss at epoch 7 = 0.6439195275306702
Loss at epoch 8 = 0.6360467076301575
Loss at epoch 9 = 0.628823459148407
Loss at epoch 10 = 0.6223900318145752
Loss at epoch 11 = 0.6165963411331177
Loss at epoch 12 = 0.6112933158874512
Loss at epoch 13 = 0.6054704189300537
Loss at epoch 14 = 0.6001187562942505
Loss at epoch 15 = 0.5941504240036011
Loss at epoch 16 = 0.5902841091156006
Loss at epoch 17 = 0.5878979563713074
Loss at epoch 18 = 0.5860994458198547
Loss at epoch 19 = 0.5841062664985657


In [490]:
ratings_test = test['rating'].values
userIds_test = test['userId'].values
itemIds_test = test['movieId'].values
pred = net.forward(torch.tensor(userIds_test).to(device),torch.tensor(itemIds_test).to(device)).cpu().detach().numpy()
pred = np.array([s[0] for s in pred])
MSE_test_nn = np.sum(np.square(ratings_test - pred))/ratings_test.shape[0]


MSE on test dataset = 0.7211063910365202


Compare the Deep learning-based approach and basic Collaborative Filtering model. (10 points)

In [492]:
print(f"basic Collaborative Filtering model MSE on test dataset = {MSE_test_MF}")
print(f"Deep learning model MSE on test dataset = {MSE_test_nn}")

basic Collaborative Filtering model MSE on test dataset = 0.7666636992135833
Deep learning model MSE on test dataset = 0.7211063910365202


4. Implement a method that is going to take a user id as an input and return a top 5 recommended movies for that given user. (5 points)

In [481]:
def pred(user_id, method = 'MF'):
    if method == 'MF':
        p = P[user_id]
        scores = np.inner(p, Q)
    if method == 'NN':
#         scores = []
#         for mid in range(n_items):
        scores = net.forward(torch.tensor([user_id for _ in range(n_items)]).to(device),torch.tensor(list(range(n_items))).to(device)).cpu().detach().numpy()
        scores = np.array([s[0] for s in scores])
    ids = np.argsort(-np.array(scores))
    print(ids[:5])
    print(scores[ids][:5])
    return ids[:5]