In [1]:
import pandas as pd
import numpy as np
import math
import copy

from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch import optim
from torch import functional as F

# Datasets

In [2]:
ratings = pd.read_csv('./Desktop/UMass/Courses/STAT535/Dataset/ratings.csv')
users = pd.read_csv('./Desktop/UMass/Courses/STAT535/Dataset/users.csv')

In [3]:
def create_datasets(ratings):
    unique_users = ratings.userID.unique()
    user_index = {old: new for new, old in enumerate(unique_users)}
    new_users = ratings.userID.map(user_index)
    
    unique_movies = ratings.movieID.unique()
    movie_index = {old: new for new, old in enumerate(unique_movies)}
    new_movies = ratings.movieID.map(movie_index)
    
    num_users = unique_users.shape[0]
    num_movies = unique_movies.shape[0]
    
    X = pd.DataFrame({'userID': new_users, 'movieID': new_movies})
    y = ratings[['rating']]
    
    return num_users, num_movies, X, y, (user_index, movie_index)

In [4]:
class ReviewsIterator:    
    def __init__(self, X, y, batch_size=32, shuffle=True):
        X, y = np.asarray(X), np.asarray(y)
        
        if shuffle:
            index = np.random.permutation(X.shape[0])
            X, y = X[index], y[index]
            
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.n_batches = int(math.ceil(X.shape[0] // batch_size))
        self._current = 0
        
    def __iter__(self):
        return self
    
    def __next__(self):
        return self.next()
    
    def next(self):
        if self._current >= self.n_batches:
            raise StopIteration()
        k = self._current
        self._current += 1
        bs = self.batch_size
        return self.X[k*bs:(k + 1)*bs], self.y[k*bs:(k + 1)*bs]

In [5]:
def batches(X, y, bs=32, shuffle=True):
    for xb, yb in ReviewsIterator(X, y, bs, shuffle):
        xb = torch.LongTensor(xb)
        yb = torch.FloatTensor(yb)
        yield xb, yb.view(-1, 1)

In [6]:
n, m, X, y, _ = create_datasets(ratings)
minmax = ratings.rating.min(), ratings.rating.max()
print(f'Embeddings: {n} users, {m} movies')
print(f'Dataset shape: {X.shape}')
print(f'Target shape: {y.shape}')

Embeddings: 2353 users, 1465 movies
Dataset shape: (31620, 2)
Target shape: (31620, 1)


In [7]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
datasets = {'train': (X_train, y_train), 'val': (X_valid, y_valid)}
dataset_sizes = {'train': len(X_train), 'val': len(X_valid)}

# Neural network

In [8]:
class EmbeddingNet(nn.Module):
    def __init__(self, n_users, n_movies, n_factors=50, embedding_dropout=0.02, hidden=10, dropouts=0.2):
        super().__init__()
        n_last = hidden[-1]
            
        self.u = nn.Embedding(n_users, n_factors)
        self.m = nn.Embedding(n_movies, n_factors)
        self.drop = nn.Dropout(embedding_dropout)
        self.hidden = nn.Sequential(*list(self.gen_layers(n_factors * 2, hidden, dropouts)))
        self.fc = self.xavier_init(nn.Linear(n_last, 1))
        self.xavier_all_hidden()
        
    def xavier_all_hidden(self):                
        self.u.weight.data.uniform_(-0.05, 0.05)
        self.m.weight.data.uniform_(-0.05, 0.05)
        self.hidden.apply(self.xavier_init)
        
    def xavier_init(self, layer):
        if type(layer) == nn.Linear:
            torch.nn.init.xavier_uniform_(layer.weight)
            layer.bias.data.fill_(0.01)
        return layer
    
    def gen_layers(self, n_in, hidden, dropouts):
        torch.manual_seed(134)
        for n_out, rate in zip(hidden, dropouts):
            yield nn.Linear(n_in, n_out)
            yield nn.ReLU()
            if rate is not None and rate > 0.:
                yield nn.Dropout(rate)
            n_in = n_out
        
    def forward(self, users, movies, minmax=None):
        features = torch.cat([self.u(users), self.m(movies)], dim=1)
        x = self.drop(features)
        x = self.hidden(x)
        out = torch.sigmoid(self.fc(x))
        if minmax is not None:
            min_rating, max_rating = minmax
            out = out*(max_rating - min_rating + 1) + min_rating - 0.5
        return out
#         return features

In [9]:
net = EmbeddingNet(n_users=n, n_movies=m, 
    n_factors=150, hidden=[500, 500, 500], 
    embedding_dropout=0.05, dropouts=[0.5, 0.5, 0.25])
net

EmbeddingNet(
  (u): Embedding(2353, 150)
  (m): Embedding(1465, 150)
  (drop): Dropout(p=0.05, inplace=False)
  (hidden): Sequential(
    (0): Linear(in_features=300, out_features=500, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=500, out_features=500, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=500, out_features=500, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.25, inplace=False)
  )
  (fc): Linear(in_features=500, out_features=1, bias=True)
)

In [28]:
torch.manual_seed(134)
np.random.seed(0)

lr = 1e-3
wd = 1e-5
bs = 2000
n_epochs = 100
patience = 10
no_improvements = 0
best_loss = np.inf
best_weights = None

net = EmbeddingNet(
    n_users=n, n_movies=m, 
    n_factors=150, hidden=[500, 500, 500], 
    embedding_dropout=0.05, dropouts=[0.5, 0.5, 0.25])

criterion = nn.MSELoss(reduction='sum')
optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=wd)
# iterations_per_epoch = int(math.ceil(dataset_sizes['train'] // bs))

for epoch in range(n_epochs):
    stats = {'epoch': epoch + 1, 'total': n_epochs}
    
    for phase in ('train', 'val'):
        training = phase == 'train'
        running_loss = 0.0
#         n_batches = 0
        
        for batch in batches(*datasets[phase], shuffle=training, bs=bs):
            x_batch, y_batch = batch
            optimizer.zero_grad()
        
            # compute gradients only during 'train' phase
            with torch.set_grad_enabled(training):
                outputs = net(x_batch[:, 0], x_batch[:, 1], minmax)
                loss = criterion(outputs, y_batch)
                
                # don't update weights and rates when in 'val' phase
                if training:
                    loss.backward()
                    optimizer.step()
            running_loss += loss.item()
            
        epoch_loss = running_loss / dataset_sizes[phase]
        stats[phase] = epoch_loss
        
        # early stopping: save weights of the best model so far
        if phase == 'val':
            if epoch_loss < best_loss:
                print('loss improvement on epoch: %d' % (epoch + 1))
                best_loss = epoch_loss
                best_weights = copy.deepcopy(net.state_dict())
                no_improvements = 0
            else:
                no_improvements += 1
                
    print('[{epoch:03d}/{total:03d}] train: {train:.4f} - val: {val:.4f}'.format(**stats))
    if no_improvements >= patience:
        print('early stopping after epoch {epoch:03d}'.format(**stats))
        break

loss improvement on epoch: 1
[001/100] train: 1.2046 - val: 1.1696
loss improvement on epoch: 2
[002/100] train: 1.0185 - val: 0.9674
loss improvement on epoch: 3
[003/100] train: 0.8493 - val: 0.9084
loss improvement on epoch: 4
[004/100] train: 0.7570 - val: 0.8597
[005/100] train: 0.7107 - val: 0.8602
loss improvement on epoch: 6
[006/100] train: 0.6836 - val: 0.8566
loss improvement on epoch: 7
[007/100] train: 0.6615 - val: 0.8546
[008/100] train: 0.6386 - val: 0.8713
[009/100] train: 0.6203 - val: 0.8701
[010/100] train: 0.6075 - val: 0.8801
[011/100] train: 0.5944 - val: 0.8758
[012/100] train: 0.5791 - val: 0.8869
[013/100] train: 0.5633 - val: 0.8952
[014/100] train: 0.5419 - val: 0.8930
[015/100] train: 0.5166 - val: 0.9362
[016/100] train: 0.4836 - val: 0.9382
[017/100] train: 0.4513 - val: 0.9664
early stopping after epoch 017


# Mynet

In [21]:
class myNet(nn.Module):
    def __init__(self):
        super(myNet, self).__init__()
        self.EMB_OUTPUT_DIM = 150
        self.CATEGORY_DIM = 6
        self.L1 = self.EMB_OUTPUT_DIM * 2
        self.L2 = 500
        self.L3 = 500
        self.L4 = 500
        self.L5 = 1
        self.dropout = nn.Dropout(p=0.02, inplace=False)
        
        self.emb1 = nn.Embedding(2353, self.EMB_OUTPUT_DIM)
        self.emb3 = nn.Embedding(1465, self.EMB_OUTPUT_DIM)
        self.hidden = nn.Sequential(
            nn.Linear(self.L1, self.L2),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(self.L2, self.L3),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(self.L3, self.L4),
            nn.ReLU(),
            nn.Dropout(p=0.25)
        )
        self.emb1.weight.data.uniform_(-0.05, 0.05)
        self.emb3.weight.data.uniform_(-0.05, 0.05) 
#         self.fc1 = self.dropout(nn.Linear(self.L1, self.L2))
#         self.fc2 = self.dropout(nn.Linear(self.L2, self.L3))
#         self.fc3 = nn.Linear(self.L3, self.L4)
        self.fc4 = self.init(nn.Linear(self.L4, 1))
        self.hidden.apply(self.init)
        
    def init(self, m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)
        return m

    def forward(self, users, movies):
#         categorical_x = x[:, :self.CATEGORY_DIM].long()
#         numerical_x = x[:, self.CATEGORY_DIM:].float()
#         numerical_x.requires_grad = False
#         embedding_list = [self.embedding[i](categorical_x[:, i]) for i in range(6)]
        embedding_list = [self.emb1(users), self.emb3(movies)]
        embedding_x = torch.cat(embedding_list, dim=1)    
# #         feature_x = torch.cat((embedding_x, numerical_x), dim=-1)
        feature_x = self.dropout(embedding_x)
        output_x = self.hidden(feature_x)
        output_x = torch.sigmoid(self.fc4(output_x))
        output_x = output_x * (5 - 1 + 1) + 1 -0.5
        return output_x

#### mynet + non-dataloader + working training code

In [29]:
torch.manual_seed(134)
np.random.seed(0)

lr = 1e-3
wd = 1e-5
bs = 2000
n_epochs = 100
patience = 10
no_improvements = 0
best_loss = np.inf
best_weights = None

# net = EmbeddingNet(
#     n_users=n, n_movies=m, 
#     n_factors=150, hidden=[500, 500, 500], 
#     embedding_dropout=0.05, dropouts=[0.5, 0.5, 0.25])

net = myNet()

criterion = nn.MSELoss(reduction='sum')
optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=wd)
# iterations_per_epoch = int(math.ceil(dataset_sizes['train'] // bs))

for epoch in range(n_epochs):
    stats = {'epoch': epoch + 1, 'total': n_epochs}
    
    for phase in ('train', 'val'):
        training = phase == 'train'
        running_loss = 0.0
#         n_batches = 0
        
        for batch in batches(*datasets[phase], shuffle=training, bs=bs):
            x_batch, y_batch = batch
            optimizer.zero_grad()
        
            # compute gradients only during 'train' phase
            with torch.set_grad_enabled(training):
                outputs = net(x_batch[:, 0], x_batch[:, 1])
                loss = criterion(outputs, y_batch)
                
                # don't update weights and rates when in 'val' phase
                if training:
                    loss.backward()
                    optimizer.step()
            running_loss += loss.item()
            
        epoch_loss = running_loss / dataset_sizes[phase]
        stats[phase] = epoch_loss
        
        # early stopping: save weights of the best model so far
        if phase == 'val':
            if epoch_loss < best_loss:
                print('loss improvement on epoch: %d' % (epoch + 1))
                best_loss = epoch_loss
                best_weights = copy.deepcopy(net.state_dict())
                no_improvements = 0
            else:
                no_improvements += 1
                
    print('[{epoch:03d}/{total:03d}] train: {train:.4f} - val: {val:.4f}'.format(**stats))
    if no_improvements >= patience:
        print('early stopping after epoch {epoch:03d}'.format(**stats))
        break

loss improvement on epoch: 1
[001/100] train: 1.1934 - val: 1.1564
loss improvement on epoch: 2
[002/100] train: 0.9952 - val: 0.9370
loss improvement on epoch: 3
[003/100] train: 0.8261 - val: 0.8868
loss improvement on epoch: 4
[004/100] train: 0.7423 - val: 0.8570
[005/100] train: 0.7045 - val: 0.8611
loss improvement on epoch: 6
[006/100] train: 0.6769 - val: 0.8534
[007/100] train: 0.6523 - val: 0.8670
[008/100] train: 0.6335 - val: 0.8816
[009/100] train: 0.6162 - val: 0.8804
[010/100] train: 0.5978 - val: 0.8819
[011/100] train: 0.5845 - val: 0.8831
[012/100] train: 0.5643 - val: 0.9005
[013/100] train: 0.5444 - val: 0.8968
[014/100] train: 0.5210 - val: 0.9064
[015/100] train: 0.4843 - val: 0.9325
[016/100] train: 0.4500 - val: 0.9615
early stopping after epoch 016


#### mynet + non-dataloader + my training code

In [35]:
net = EmbeddingNet(n_users=2353, n_movies=1465, 
    n_factors=150, hidden=[500, 500, 500], 
    embedding_dropout=0.05, dropouts=[0.5, 0.5, 0.25])

criterion = nn.MSELoss()
learning_rate = 1e-3
EPOCHS = 300
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
# training
# start = time.time()

for epo in range(EPOCHS):
    if (epo + 1) % 10 == 0:
        learning_rate /= 2
    
    running_loss = 0
    running_loss_val = 0
    
    for phase in ('train', 'val'):
        training = phase == 'train'
        
        for i, batch in enumerate(batches(*datasets[phase], shuffle=training, bs=2000)):
    #         temp_training, temp_validation = _data
            # training inputs
    #         inputs, targets = temp_training
            x_batch, y_batch = batch

            optimizer.zero_grad()        
            outputs = net(x_batch[:,0], x_batch[:,1], minmax=[1,5])
            batch_loss = criterion(outputs, y_batch)

            # validation inputs
    #         val_inputs, val_targets = temp_validation

    #         val_outputs = net(val_inputs[:,0], val_inputs[:,1], minmax=[1,5])
    #         batch_loss_val = criterion(val_outputs, val_targets)

            # backpropagation
            if phase == 'train':
                batch_loss.backward()
                optimizer.step()
                running_loss += batch_loss.item()
            else:
                running_loss_val += batch_loss.item()


        epoch_loss = running_loss / (i+1)
        epoch_loss_val = running_loss_val / (i+1)
    #     end = time.time()
        print('Epoch %d: \ntraining loss: %.4f, validation loss: %.4f, time passed: .2fs\n'%
              (epo+1, epoch_loss, epoch_loss_val))

Epoch 1: 
training loss: 1.2541, validation loss: 0.0000, time passed: .2fs

Epoch 1: 
training loss: 5.0162, validation loss: 1.2233, time passed: .2fs

Epoch 2: 
training loss: 1.0679, validation loss: 0.0000, time passed: .2fs

Epoch 2: 
training loss: 4.2715, validation loss: 1.0115, time passed: .2fs

Epoch 3: 
training loss: 0.8914, validation loss: 0.0000, time passed: .2fs

Epoch 3: 
training loss: 3.5655, validation loss: 0.9509, time passed: .2fs

Epoch 4: 
training loss: 0.7982, validation loss: 0.0000, time passed: .2fs

Epoch 4: 
training loss: 3.1926, validation loss: 0.9102, time passed: .2fs

Epoch 5: 
training loss: 0.7497, validation loss: 0.0000, time passed: .2fs

Epoch 5: 
training loss: 2.9988, validation loss: 0.9111, time passed: .2fs

Epoch 6: 
training loss: 0.7180, validation loss: 0.0000, time passed: .2fs

Epoch 6: 
training loss: 2.8721, validation loss: 0.9057, time passed: .2fs

Epoch 7: 
training loss: 0.6951, validation loss: 0.0000, time passed: .2fs


KeyboardInterrupt: 