# RMB

### Setup notebook

In [None]:
# Importing the libraries
import sys, os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
import torch.nn.functional as F

from mymods.lauthom import *

### Data

In [None]:
get_path('*/*', 'movies')
get_path('*/*', 'users')
get_path('*/*', 'ratings')

In [None]:
def read_file(filename, **kwargs):
    """Get file path and read file"""
    from pathlib import Path
    fpath = list(Path('../../').glob('*/*/' + str(filename)))[0]
    return pd.read_csv(fpath, **kwargs)

In [None]:
# Importing the dataset
movies = read_file('movies.dat', sep='::', header=None, encoding='latin-1', names=['id', 'movie', 'cat'])
users = read_file('users.dat', sep='::', header=None, encoding='latin-1', names=['id', 'sex', 'unk1', 'unk2', 'unk3'])
ratings = read_file('ratings.dat', sep='::', header=None, encoding='latin-1', names=['user_id', 'movie_id', 'rating' , 'unk'])

In [None]:
movies.sample(10)
users.sample(10)
ratings.sample(10)

In [None]:
movies.info()
users.info()
ratings.info()

### Train test sets

In [None]:
get_path('*/*', 'u1')

In [None]:
df_train = read_file('../../_data/ml-100k/u1.base', delimiter='\t', header=None, names=['user_id', 'movie_id', 'rating', 'unk'])
df_test = read_file('../../_data/ml-100k/u1.test', delimiter='\t', header=None, names=['user_id', 'movie_id', 'rating', 'unk'])

In [None]:
df_train['test'] = False
df_test['test'] = True

In [None]:
df = pd.concat([df_train, df_test])
df.sample(10)

In [None]:
df.info()

In [None]:
# example ratings for user = 1
user_id = 1
mask = df['user_id'] == user_id
trn = df.loc[mask, :]
trn.sample(10)

### Unique users and movies in both train and test set

In [None]:
users = set(df['user_id'])
movies = set(df['movie_id'])
nb_users, nb_movies = len(users), len(movies)
nb_users, nb_movies

In [None]:
# Prepare for pivot and split
df.loc[df['test']==False, 'user_id'] = df.loc[df['test']==False, 'user_id'].values + 99000

# Pivot for RBM model
pv = df.pivot(index='user_id', columns='movie_id', values='rating')

# Change rating: negative/positive:
# nan: -1, 1-2: 0, 3-5: 1
mask_null = pv.isnull()
mask_3 = pv>=3

pv[mask_3] = 1
pv[~mask_3] = 0
pv[mask_null] = -1

# Split train test
pv_train = pv.loc[pv.index > 99000, :]
pv_train.index = pv_train.index - 99000
pv_test = pv.loc[pv.index < 99000, :]

pv_train.sample(10)
pv_test.sample(10)

In [None]:
pv_train.info()
pv_test.info()

In [None]:
np_train = np.array(pv_train, dtype='int')
np_test = np.array(pv_test, dtype='int')

np_train.shape
np_test.shape

#### Sanity check np.array & rating

In [None]:
print('rank train:', 'nan', sum(sum(np_train == -1)))
print('rank test:', 'nan', sum(sum(np_train == -1)))
    
for r in range(6):
    print('rank train:', r, sum(sum(np_train == r)))
    print('rank test:', r, sum(sum(np_train == r)))

### Converting the data into Torch tensors

In [None]:
training_set = torch.FloatTensor(np_train)
test_set = torch.FloatTensor(np_test)

In [None]:
training_set.shape
test_set.shape

### Build model

args: nv = visual neurons(input layer), nh = hidden neurons

In [None]:
class RBM():
    def __init__(self, nv, nh):
        """initialize weights with random normal"""
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(1, nh) 
        self.b = torch.randn(1, nv) 
        
    def probability(self, activation):
        """get sigmoid probability and sample from a Bernoulli distribution"""
        sigmoid = torch.sigmoid(activation)
        return sigmoid, torch.bernoulli(sigmoid)
    
    def activation(self, inputs, weight, bias):
        """get activation"""
        wi = torch.mm(inputs, weight)
        return wi + bias.expand_as(wi)
    
    def sample_h(self, x):
        """get prob and binairy activation for hidden layer"""
        return self.probability(self.activation(x, self.W.t(), self.a))
    
    def sample_v(self, y):
        """get prob and binairy activation for visual layer"""
        return self.probability(self.activation(y, self.W, self.b))
    
    def train(self, v0, vk, ph0, phk):
        """update weights for state 0 to k"""
        Wi = torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk) # 1682x100
#         print(Wi.shape, self.W.shape)
        self.W += Wi.t() # torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)

In [None]:
N_VIS = len(training_set[0])
N_HID = 100
N_EPOCH = 20
BATCH_SIZE = 100
N_WALKS = 15

'W.shape:', N_HID, N_VIS

rbm = RBM(N_VIS, N_HID)
print(rbm)

### Train model

In [None]:
import random

id_ = list(range(nb_users))
ran_idx = random.sample(id_, len(id_))
batches = range(len(ran_idx)//BATCH_SIZE + 1)
loader = [ran_idx[b*BATCH_SIZE:(b+1)*BATCH_SIZE] for b in batches]

# Last batch
len(list(loader)[-1])

In [None]:
# train on rated movies - exclude unrated movies (ratings with value -1)
# loss = train set without blanks - test set without blanks from train set
train_loss = 0
    
for epoch in np.arange(N_EPOCH)+1:
    
    # average train_loss /users
    cum_train_loss = 0
    u = 0. 
    
    # batchwise
    for b in batches:
        vk = training_set[loader[b]]
        v0 = training_set[loader[b]] # vk[:] # training_set[id_user:id_user+batch_size]
        assert v0 is not vk
        ph0, _ = rbm.sample_h(v0)
        
        # optimize by blind/random walk
        # divergence
        # get bernoulli 
        for k in range(N_WALKS):
            _, hk = rbm.sample_h(vk)
            _, vk = rbm.sample_v(hk)
            vk[v0 < 0] = v0[v0 < 0] # do not update vk for unrated movies
        
        # get converged hidden probs
        phk, _ = rbm.sample_h(vk) 
        
        # update weights
        rbm.train(v0, vk, ph0, phk)
        
        # cum train_loss/users
        # vk is v_hat or inferred rating of rated movies
        # loss = difference in ratings; |0-1|=1, |1-0|=1, |0-0|=0, |1-1|=0
        # 25% loss = 1 out of 4 movies are misqualified
        cum_train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
        train_loss = cum_train_loss/(b+1)
        
#         u += 1. # number of users to average out the cum loss
    print('epoch: {} train loss: {} u: {}'.format(epoch, train_loss, b+1))

### Test model

In [None]:
# Testing the RBM
# test set contains all users and all ratings
# train set has same shape as test set, but contains unrated movies for inferrence
cum_test_loss = 0
u = 0.

for id_user in np.arange(nb_users-1):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    
    # infer unrated movies in train set if test set contains rated movies
    if len(vt[vt >= 0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        
    # loss = train set with true & inferred ratings -/- test set with all true ratings
    u += 1.
    cum_test_loss += torch.mean(torch.abs(vt[vt >= 0] - v[vt >= 0]))
        
    print('test loss: ' + str(cum_test_loss.item()/u), u)

# Stacked Auto Encoder

In [None]:
# Creating the architecture of the Stacked Auto Encoder
# inherit from Class nn
class SAE(nn.Module):
    def __init__(self, n_hl1, n_hl2, n_hl3):
        # initialize nn.Module(super of SAE)
        super(SAE, self).__init__()
        
        self.fc1 = nn.Linear(nb_movies, n_hl1)
        self.fc2 = nn.Linear(n_hl1, n_hl2)
        self.fc3 = nn.Linear(n_hl2, n_hl3)
        self.fc4 = nn.Linear(n_hl3, nb_movies)
        self.activation = nn.Sigmoid()
        
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x


In [None]:
# create NN
sae = SAE(20, 10, 20)

# define loss function
loss_fn = nn.MSELoss()

# define optimizer
def optimizer(name):
    opt = {'RMS': 'optim.RMSprop(sae.parameters(), lr=0.001, alpha=0.99, eps=1e-08, weight_decay=0.5)', 
         'Adam': 'optim.Adam(sae.parameters(), lr=0.01, eps=1e-08, weight_decay=0.25)'}
    return eval(opt[name])

In [None]:
# Train the SAE
N_EPOCH = 50

for epoch in range(1, N_EPOCH + 1):
    train_loss = 0
    s = 0.
    
    for id_user in range(nb_users):
        input = Variable(training_set[id_user]).unsqueeze(0) # [torch.FloatTensor of size 1x1682]
        target = input.clone()
        
        # train rated movies only
        if torch.sum(target.data > 0) > 0:
            output = sae(input)
            target.require_grad = False # turn off gradient computation
            output[target == 0] = 0 # save computation cost
            
            # compute (MSE) loss
            # adjust trained/rated movies loss to all movies loss
            loss = loss_fn(output, target)
            mean_adjust = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
            train_loss += np.sqrt(loss.item() * mean_adjust)
            s += 1.
            
            # define loss direction and optimizer generator
            loss.backward()
            optimize = optimizer("Adam")
            optimize.step()
            
    print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))