### Goal is to learn vector embeddings for users and movies via Neural Network and predict ratings

- Esha Singh
- Anirudh Agarwal

In [1]:
# Loading data file

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

ratings_df = pd.read_csv('ratings.dat', 
                         names=['UserID','MovieID','Rating','Time'], 
                         sep='::', engine='python')

# Label encoding for continuous IDs, required for embedding vector
ratings_df['UserID'] = LabelEncoder().fit_transform(ratings_df['UserID'])
ratings_df['MovieID'] = LabelEncoder().fit_transform(ratings_df['MovieID'])

ratings_df.head()

Unnamed: 0,UserID,MovieID,Rating,Time
0,0,1104,5,978300760
1,0,639,3,978302109
2,0,853,3,978301968
3,0,3177,4,978300275
4,0,2162,5,978824291


**Benchmarking RMSE value against FunkSVD**

*Code snippert derived from previous homework*

In [12]:
from lenskit import util
from lenskit.batch import predict
from lenskit import crossfold as cv
from lenskit.algorithms import funksvd
from lenskit.metrics.predict import rmse

# Get Recommendations/Predictions
def eval(aname, algo, train, test): 
    algoClone = util.clone(algo)
    algoClone.fit(train)
    #batch.predict returns a frame with columns user, item, and prediction containing the prediction results.
    #If pairs contains a rating column, this result will also contain a rating column.
    recs = predict(algoClone, test)
    recs['Algorithm'] = aname
    
    recs.fillna(0, inplace=True)
    return recs

# Calculating RMSE
def rmse_cal(data, kFolds, nSamples, algoName, algoObject): 
    test_data = []
    train_data = []
    rmseList =[]
    count = 1 # to keep track of the ith fold

    for train, test in cv.partition_users(data, kFolds, cv.SampleN(nSamples)):
        test_data.append(test)
        train_data.append(train)
        recs = eval(algoName, algoObject, train, test)
        rmseList.append( rmse(recs['prediction'], recs['rating']))
        print('RMSE of fold #', count, 'for', algoName ,'\t', rmseList[count-1])
        
        count = count+1 
    return rmseList

#got the best results for features = 20
algo_FunkSVD = funksvd.FunkSVD(20) 

kFolds = 5
nSamples =1
data = ratings_df.rename(columns={'UserID': 'user', 'MovieID': 'item', 'Rating': 'rating'})
rmseFunkSVD = rmse_cal(data, kFolds, nSamples, 'FunkSVD', algo_FunkSVD)
print('Mean RMSE of FunkSVD \t', np.mean(rmseFunkSVD))

RMSE of fold # 1 for FunkSVD 	 0.9078703112570627
RMSE of fold # 2 for FunkSVD 	 0.8852976347723788
RMSE of fold # 3 for FunkSVD 	 0.9310011969700549
RMSE of fold # 4 for FunkSVD 	 0.9019645033317578
RMSE of fold # 5 for FunkSVD 	 0.8920507731303983
Mean RMSE of FunkSVD 	 0.9036368838923305


**Initialization of User and Item latent vectors**

Using Embedding in Keras which maps discrete IDs into continuous vectors

In [2]:
from keras.layers.embeddings import Embedding

class GetVec:
    def __init__(self, input_dim, output_dim):
        '''
        returns Embedding of vectors
        '''
        self.input_dim = input_dim
        self.output_dim = output_dim
    
    def __call__(self, x):
        '''
        going with default initiaizer uniform and no regularizer
        '''
        x = Embedding(self.input_dim, self.output_dim)(x)
        # x = Reshape((self.n_factors,))(x)
        return x

Using TensorFlow backend.


**Creating a simple Neural Net**

Dot product of user and item vectors is optimized over MSE

In [3]:
from keras import optimizers
from keras.models import Model
from keras import backend as B
from keras.layers import Input, Reshape, Dot

# custom error function
def rmse(y_true, y_pred):
    return B.sqrt(B.mean(B.square(y_pred - y_true), axis=-1))

def basicNN(n_users, n_movies, dim_latent=20):
    '''
    Creates a graph for keras model
    '''
    # Creating partially known shaped tensors 
    I1 = Input(shape=(1,))
    I2 = Input(shape=(1,))

    # Getting vectors
    # default latent feature size=20, this is based on tuning in previous homework
    E1 = GetVec(n_users, dim_latent)(I1)
    E2 = GetVec(n_movies, dim_latent)(I2)

    # Reshaping embeddings as latent feature vectors
    V1 = Reshape((dim_latent,))(E1)
    V2 = Reshape((dim_latent,))(E2)

    # Performing dot product
    Y = Dot(axes=1)([V1, V2])

    # Model creation and compiling
    model = Model(inputs=[I1, I2], outputs=Y)
    sgd = optimizers.SGD(lr=0.5)
    model.compile(loss='mean_squared_error', optimizer=sgd, metrics=[rmse])

    return model

**BasicNN Training and evaluation**

Not running k-fold cross validation as running NN is expensive and time consuming.<br />
Evaluation done by splitting data into train, validation and test sets.<br />
Default configuration for optimiser works best.<br />
No of epochs decided on the basis of observation, when rmse stops changing.

In [4]:
from sklearn.model_selection import train_test_split

def dataTrainEval(DNfun, epochs=10):
    # data preprocessing
    n_users = ratings_df['UserID'].nunique()
    n_movies = ratings_df['MovieID'].nunique()

    # Splitting data into parts
    data = ratings_df[['UserID', 'MovieID']].values
    target = ratings_df['Rating'].values
    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.1)

    # Flattening columns
    X_train = [X_train[:, 0], X_train[:, 1]]
    X_test = [X_test[:, 0], X_test[:, 1]]

    # Initializing model
    model = DNfun(n_users, n_movies)

    # Fitting 
    model.fit(x=X_train, y=y_train, epochs=epochs, verbose=1, validation_split=0.1)

    # Evaluation
    error = model.evaluate(x=X_test, y=y_test)
    return error

# Running function
print ('RMSE Error: ', dataTrainEval(basicNN, epochs=10)[1])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 810169 samples, validate on 90019 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
RMSE Error:  0.7218112349510193


**Extra Credit**

Approach 1: Trying different optimiser configurations (This optimizer and learning rate works best)

In [24]:
from keras import optimizers

def confNN(n_users, n_movies, dim_latent=50):
    '''
    Creates a graph for keras model
    '''
    # Creating partially known shaped tensors 
    I1 = Input(shape=(1,))
    I2 = Input(shape=(1,))

    # Getting vectors
    # default latent feature size=20, this is based on tuning in previous homework
    E1 = GetVec(n_users, dim_latent)(I1)
    E2 = GetVec(n_movies, dim_latent)(I2)

    # Reshaping embeddings as latent feature vectors
    V1 = Reshape((dim_latent,))(E1)
    V2 = Reshape((dim_latent,))(E2)

    # Performing dot product
    Y = Dot(axes=1)([V1, V2])

    # Model creation and compiling
    opt = optimizers.Adam(lr=0.001)
    model = Model(inputs=[I1, I2], outputs=Y)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=[rmse])

    return model

# Running function
print ('RMSE Error: ', dataTrainEval(confNN, epochs=5)[1])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 810169 samples, validate on 90019 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
RMSE Error:  0.7200255393981934


**Extra Credit**

Approach 2: Adding more Dense layers, making NN deeper <br />
Also, in the final layer we can introduce non-linearity in the form of 'Sigmoid'. <br />
Then scale the output in the 1-5 ratings scale.

In [6]:
from keras.layers import Add, Activation, Lambda, Dense

def deepNN(n_users, n_movies, dim_latent=20):
    '''
    Creates a graph for keras model
    '''
    # Creating partially known shaped tensors 
    I1 = Input(shape=(1,))
    I2 = Input(shape=(1,))

    # Getting vectors
    # default latent feature size=20this is based on tuning in previous homework
    E1 = GetVec(n_users, dim_latent)(I1)
    E2 = GetVec(n_movies, dim_latent)(I2)

    # Creating more layers (introducing more non-linearity)
    E1 = Dense(dim_latent, activation='sigmoid')(E1)
    E2 = Dense(dim_latent, activation='sigmoid')(E2)

    # Reshaping embeddings as latent feature vectors
    V1 = Reshape((dim_latent,))(E1)
    V2 = Reshape((dim_latent,))(E2)

    # Performing dot product
    Y = Dot(axes=1)([V1, V2])

    # Non-linearity and scaling (As we already know the scales)
    min_rat = 1 
    max_rat = 5
    Y = Activation('sigmoid')(Y)
    Y = Lambda(lambda x: x * (max_rat - min_rat) + min_rat)(Y)

    # Model creation and compiling
    opt = optimizers.Adam(lr=0.001)
    model = Model(inputs=[I1, I2], outputs=Y)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=[rmse])

    return model

# Running function
print ('RMSE Error: ', dataTrainEval(deepNN, epochs=5)[1])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 810169 samples, validate on 90019 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
RMSE Error:  0.7195802927017212


**Extra Credit**

Approach 3: The performance of this Neural Net can be kicked up by adding user and item biases. <br />
These biases can internally help normalize ratings and are required as intercepts to balance weights.

In [26]:
from keras.layers import Add, Activation, Lambda

def biasNN(n_users, n_movies, dim_latent=20):
    '''
    Creates a graph for keras model
    '''
    # Creating partially known shaped tensors 
    I1 = Input(shape=(1,))
    I2 = Input(shape=(1,))

    # Getting vectors
    # default latent feature size=20this is based on tuning in previous homework
    E1 = GetVec(n_users, dim_latent)(I1)
    E2 = GetVec(n_movies, dim_latent)(I2)

    # One bias for each uesr and each movie
    E1_bias = GetVec(n_users, 1)(I1)
    E2_bias = GetVec(n_movies, 1)(I2)

    # Reshaping embeddings as latent feature vectors
    V1 = Reshape((dim_latent,))(E1)
    V2 = Reshape((dim_latent,))(E2)

    # Performing dot product
    Y = Dot(axes=1)([V1, V2])

    # Reshaping and adding biases
    E1_bias = Reshape((1,))(E1_bias)
    E2_bias = Reshape((1,))(E1_bias)
    Y = Add()([Y, E1_bias, E2_bias])

    # Non-linearity and scaling (As we already know the scales)
    min_rat = 1 
    max_rat = 5
    Y = Activation('sigmoid')(Y)
    Y = Lambda(lambda x: x * (max_rat - min_rat) + min_rat)(Y)

    # Model creation and compiling
    opt = optimizers.Adam(lr=0.001)
    model = Model(inputs=[I1, I2], outputs=Y)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=[rmse])

    return model

# Running function
print ('RMSE Error: ', dataTrainEval(biasNN, epochs=5)[1])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 810169 samples, validate on 90019 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
RMSE Error:  0.6838966012001038


**Extra Credit**

Approach 4: Switching activation functions and learning rates in our deep NN approach

In [7]:
from keras.layers import Add, Activation, Lambda, Dense

def deepNN2(n_users, n_movies, dim_latent=20):
    '''
    Creates a graph for keras model
    '''
    # Creating partially known shaped tensors 
    I1 = Input(shape=(1,))
    I2 = Input(shape=(1,))

    # Getting vectors
    # default latent feature size=20this is based on tuning in previous homework
    E1 = GetVec(n_users, dim_latent)(I1)
    E2 = GetVec(n_movies, dim_latent)(I2)

    # Creating more layers (introducing more non-linearity)
    E1 = Dense(dim_latent, activation='relu')(E1)
    E2 = Dense(dim_latent, activation='relu')(E2)

    # Reshaping embeddings as latent feature vectors
    V1 = Reshape((dim_latent,))(E1)
    V2 = Reshape((dim_latent,))(E2)

    # Performing dot product
    Y = Dot(axes=1)([V1, V2])

    # Non-linearity and scaling (As we already know the scales)
    min_rat = 1 
    max_rat = 5
    Y = Activation('relu')(Y)
    Y = Lambda(lambda x: x * (max_rat - min_rat) + min_rat)(Y)

    # Model creation and compiling
    opt = optimizers.Adam(lr=0.001)
    model = Model(inputs=[I1, I2], outputs=Y)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=[rmse])

    return model

# Running function
print ('RMSE Error: ', dataTrainEval(deepNN2, epochs=5)[1])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 810169 samples, validate on 90019 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
RMSE Error:  0.6887091994285583


#### Results:

- RMSE for FunkSVD: **0.904**
- RMSE for best config DeepNN: **0.684**

#### Further scope of improvement:

- Trying different activation functions in the outer layer
- Different initializers for the embedding
- Different learning rates, with clampers and momentum in optimizers

#### References
- https://keras.io/
- https://www.youtube.com/watch?v=UOEhojCzWrY&list=PLgJhDSE2ZLxaPX0jteHZG4skdj8ZrST9d
- https://towardsdatascience.com/building-a-book-recommendation-system-using-keras-1fba34180699, 
- https://medium.com/@jdwittenauer/deep-learning-with-keras-recommender-systems-e7b99cb29929