# Homework 2 part a: Logistic MF with Pytorch

Given a dataset with amazon ratings of books, write a pytorch code with the following model:
$$\hat{y_{ij}} = sigmoid(u_i \cdot v_j + b_i + c_j)$$

This is a binary dataset (ratings are 0's or 1's). The loss function for this model is log loss or binary cross entropy.
Also:
* Print training loss, validation loss and validation accuracy at every training iteration.

Write your code based on the notebook from the class https://github.com/yanneta/ML-notebooks/blob/master/MF_with_pytorch.ipynb

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [2]:
PATH = Path("data/")

In [3]:
# reading a csv into pandas
train = pd.read_csv(PATH/"train_books_ratings.csv")
valid = pd.read_csv(PATH/"valid_books_ratings.csv")

In [4]:
train.head()

Unnamed: 0,user,item,rating,timestamp
0,A2IIIDRK3PRRZY,0000000116,0,1395619200
1,A9KTKY6BUR8U6,0000013714,0,1357516800
2,A35OP02LIXZ84E,0000477141,0,1399939200
3,A9WX8DK93SN5,000100039X,0,1385683200
4,A36JQ1WC5JQPFQ,000100039X,0,1391990400


In [5]:
valid.head()

Unnamed: 0,user,item,rating,timestamp
0,A2E2F4MLVYDGEQ,000100039X,0,1393286400
1,A386A9WE42M4PG,000100039X,0,1371772800
2,A1OGQA984MTKBH,000100039X,0,1372118400
3,A1VVBHGM8DFIZ4,000100039X,0,1387152000
4,AD6E4Y092Y4KP,000100039X,0,1392336000


In [6]:
# Convert train set users to id numbers
train_user_ids = np.sort(np.unique(train.user.values))
num_users = len(train_user_ids)
userid2idx = {o:i for i,o in enumerate(train_user_ids)}
train["user"] = train["user"].apply(lambda x: userid2idx[x])

In [7]:
# Convert train set movies to id numbers
train_movie_ids = np.sort(np.unique(train.item.values))
num_items = len(train_movie_ids)
movieid2idx = {o:i for i,o in enumerate(train_movie_ids)}
train["item"] = train["item"].apply(lambda x: movieid2idx[x])

In [8]:
# Convert validation set users to id numbers
valid["user"] = valid["user"].apply(lambda x: userid2idx.get(x, -1)) # -1 for users not in training
valid = valid[valid["user"] >= 0].copy()

# Convert validation set movies to id numbers
valid["item"] = valid["item"].apply(lambda x: movieid2idx.get(x, -1))
valid = valid[valid["item"] >= 0].copy()

In [9]:
train.head()

Unnamed: 0,user,item,rating,timestamp
0,527409,0,0,1395619200
1,1059073,1,0,1357516800
2,750064,2,0,1399939200
3,1062362,3,0,1385683200
4,758289,3,0,1391990400


In [10]:
valid.head()

Unnamed: 0,user,item,rating,timestamp
4,1093526,3,0,1392336000
5,1172263,3,0,1022025600
6,1114879,13,0,1277337600
19,6532,34,0,1336867200
24,416115,34,0,1390176000


In [11]:
# Define the class
class MF(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100):
        super(MF, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.item_bias = nn.Embedding(num_items, 1)
        
        # initlializing weights
        self.user_emb.weight.data.uniform_(0,0.05)
        self.user_bias.weight.data.uniform_(-0.01,0.01)
        self.item_emb.weight.data.uniform_(0,0.05)
        self.item_bias.weight.data.uniform_(-0.01,0.01)
        
    def forward(self, u, v):
        U = self.user_emb(u)
        V = self.item_emb(v)
        b_u = self.user_bias(u).squeeze()
        b_v = self.item_bias(v).squeeze()
        z = (U*V).sum(1) +  b_u  + b_v
        y_pred = torch.sigmoid(z)
        return y_pred

In [12]:
# Define the loss function
def valid_loss(model):
    
    # Prepare validation data
    model.eval()
    users = torch.LongTensor(valid.user.values) # .cuda()
    items = torch.LongTensor(valid.item.values) #.cuda()
    ratings = torch.FloatTensor(valid.rating.values) #.cuda()
    
    # Calculate y_hat validation and validation loss
    y_hat = model(users, items)
    loss = F.binary_cross_entropy(y_hat, ratings)   
    
    # Calculate validation accuracy
    y_pred = torch.round(y_hat)
    num_correct = torch.sum(torch.eq(y_pred, ratings)).float()
    testaccuracy = torch.div(num_correct, ratings.size()[0])
    return loss.item(), testaccuracy

In [13]:
# Define the train function
def train_epocs(model, epochs=10, lr=0.01, wd=0.0):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    for i in range(epochs):
        
        # Prepare training data
        model.train()
        train_users = torch.LongTensor(train.user.values)  #.cuda()
        train_items = torch.LongTensor(train.item.values) #.cuda()
        train_ratings = torch.FloatTensor(train.rating.values)  #.cuda()
        
        # Get training predictions
        y_hat_train = model(train_users, train_items)
        loss = F.binary_cross_entropy(y_hat_train, train_ratings)          
        
        # Zero gradients and update 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Calculate validation loss
        testloss, testaccuracy = valid_loss(model)
        print("train loss: %.3f, valid loss: %.3f" % (loss.item(), testloss))
        print("valid accuracy: %.3f" % testaccuracy)
        print("---------------------------------------")

#### First, I'll try with a small learning rate (0.01)

In [14]:
# Define the model
model = MF(num_users, num_items, emb_size=100)

In [15]:
# Train the model with a small learning rate
train_epocs(model, epochs=15, lr=0.01, wd=1e-5)

train loss: 0.694, valid loss: 0.689
valid accuracy: 0.534
---------------------------------------
train loss: 0.681, valid loss: 0.684
valid accuracy: 0.742
---------------------------------------
train loss: 0.679, valid loss: 0.680
valid accuracy: 0.754
---------------------------------------
train loss: 0.673, valid loss: 0.676
valid accuracy: 0.757
---------------------------------------
train loss: 0.666, valid loss: 0.673
valid accuracy: 0.759
---------------------------------------
train loss: 0.654, valid loss: 0.670
valid accuracy: 0.760
---------------------------------------
train loss: 0.651, valid loss: 0.668
valid accuracy: 0.762
---------------------------------------
train loss: 0.650, valid loss: 0.665
valid accuracy: 0.765
---------------------------------------
train loss: 0.649, valid loss: 0.663
valid accuracy: 0.768
---------------------------------------
train loss: 0.649, valid loss: 0.660
valid accuracy: 0.771
---------------------------------------
train loss

#### Next, I'll try with a larger learning rate (0.1).

In [16]:
# Define the model
model = MF(num_users, num_items, emb_size=100)

In [17]:
# Train the model with a larger learning rate
train_epocs(model, epochs=10, lr=0.1, wd=1e-5)

train loss: 0.694, valid loss: 0.722
valid accuracy: 0.429
---------------------------------------
train loss: 0.674, valid loss: 0.639
valid accuracy: 0.765
---------------------------------------
train loss: 0.625, valid loss: 0.639
valid accuracy: 0.742
---------------------------------------
train loss: 0.643, valid loss: 0.655
valid accuracy: 0.656
---------------------------------------
train loss: 0.664, valid loss: 0.641
valid accuracy: 0.675
---------------------------------------
train loss: 0.649, valid loss: 0.623
valid accuracy: 0.762
---------------------------------------
train loss: 0.632, valid loss: 0.617
valid accuracy: 0.756
---------------------------------------
train loss: 0.612, valid loss: 0.619
valid accuracy: 0.681
---------------------------------------
train loss: 0.604, valid loss: 0.619
valid accuracy: 0.678
---------------------------------------
train loss: 0.603, valid loss: 0.615
valid accuracy: 0.748
---------------------------------------


#### The smaller learning rate produced a more consistent, gradual trend towards a smaller loss and higher accuracy (as expected). 