## Evaluate and compare the different models
Using the 10% of cross validated training set records  and the history I saved:

In [1]:
import pickle
import pandas as pd
import numpy as np

import os

#Keras
from keras.models import load_model
from keras import backend as K

# Tensorflow
import tensorflow as tf

from sklearn.metrics import mean_squared_error

Using TensorFlow backend.





### Set and Check GPUs

In [2]:
def set_check_gpu():
    cfg = K.tf.ConfigProto()
    cfg.gpu_options.per_process_gpu_memory_fraction =1 # allow all of the GPU memory to be allocated
    # for 8 GPUs
    # cfg.gpu_options.visible_device_list = "0,1,2,3,4,5,6,7" # "0,1"
    # for 1 GPU
    cfg.gpu_options.visible_device_list = "0"
    #cfg.gpu_options.allow_growth = True  # # Don't pre-allocate memory; dynamically allocate the memory used on the GPU as-needed
    #cfg.log_device_placement = True  # to log device placement (on which device the operation ran)
    sess = K.tf.Session(config=cfg)
    K.set_session(sess)  # set this TensorFlow session as the default session for Keras

    print("* TF version: ", [tf.__version__, tf.test.is_gpu_available()])
    print("* List of GPU(s): ", tf.config.experimental.list_physical_devices() )
    print("* Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) 
  
    
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
    # set for 8 GPUs
#     os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7";
    # set for 1 GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "0";

    # Tf debugging option
    tf.debugging.set_log_device_placement(True)

    gpus = tf.config.experimental.list_physical_devices('GPU')

    if gpus:
        try:
            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)

#     print(tf.config.list_logical_devices('GPU'))
    print(tf.config.experimental.list_physical_devices('GPU'))
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [3]:
set_check_gpu()

* TF version:  ['1.15.2', True]
* List of GPU(s):  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
* Num GPUs Available:  1
1 Physical GPUs, 1 Logical GPUs
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Num GPUs Available:  1


## Name of trained model

In [6]:
from os import listdir
from os.path import isfile, join

mypath = './models'

onlyfiles = [f.replace('.h5', '') for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles

['dense_2_Concatenate_10_embeddings_10_epochs',
 'dense_1_Multiply_50_embeddings_7_epochs',
 'dense_2_Concatenate_20_embeddings_25_epochs',
 'dense_1_Multiply_50_embeddings_4_epochs_dropout',
 'dense_2_Meta_Concatenate_15_embeddings_50_epochs-DropOut0.6',
 'matrix_facto_10_embeddings_3_epochs',
 'dense_1_Multiply_50_embeddings_20_epochs_dropout',
 'dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.6-dropout-input',
 'dense_1_Multiply_50_embeddings_7_epochs_dropout',
 'dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.8',
 'matrix_facto_10_embeddings_100_epochs',
 'dense_2_Meta_Concatenate_15_embeddings_30_epochs-DropOut0.6',
 'matrix_facto_10_embeddings_20_epochs',
 'dense_4_Multiply_5_embeddings_7_epochs',
 'dense_1_Multiply_50_embeddings_2_epochs_dropout']

In [9]:
models_history =['dense_2_Concatenate_10_embeddings_10_epochs',
 'dense_1_Multiply_50_embeddings_7_epochs',
 'dense_2_Concatenate_20_embeddings_25_epochs',
 'dense_1_Multiply_50_embeddings_4_epochs_dropout',
 'matrix_facto_10_embeddings_3_epochs',
 'dense_1_Multiply_50_embeddings_20_epochs_dropout',
 'dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.6-dropout-input',
 'dense_1_Multiply_50_embeddings_7_epochs_dropout',
 'dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.8',
 'matrix_facto_10_embeddings_100_epochs',
 'dense_2_Meta_Concatenate_15_embeddings_30_epochs-DropOut0.6',
 'matrix_facto_10_embeddings_20_epochs',
 'dense_4_Multiply_5_embeddings_7_epochs',
 'dense_1_Multiply_50_embeddings_2_epochs_dropout']

### Compare MSE validation error / Train error

In [10]:
hist_path = "./histories/"

validation_error = {}
train_error = {}

for val in models_history:
    with open(hist_path +  val +'.pkl', 'rb') as file_pi:
        thepickle = pickle.load(file_pi)
        
        validation_error[val]=np.min(thepickle["val_loss"])
        train_error[val]=np.min(thepickle["loss"])
        
validation_error = pd.Series(validation_error)
train_error = pd.Series(train_error)
print ("MSE validation error \n",validation_error.sort_values(ascending=True).head(20))
print ("\nTrain error \n",train_error.sort_values(ascending=True).head(20))

MSE validation error 
 dense_4_Multiply_5_embeddings_7_epochs                               1.542893
dense_1_Multiply_50_embeddings_7_epochs                              1.552457
dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.8                   1.555063
dense_1_Multiply_50_embeddings_7_epochs_dropout                      1.574078
dense_1_Multiply_50_embeddings_20_epochs_dropout                     1.575834
dense_1_Multiply_50_embeddings_4_epochs_dropout                      1.576017
dense_2_Concatenate_20_embeddings_25_epochs                          1.580353
dense_2_Concatenate_10_embeddings_10_epochs                          1.598811
dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.6-dropout-input     1.663208
dense_2_Meta_Concatenate_15_embeddings_30_epochs-DropOut0.6          1.672508
dense_1_Multiply_50_embeddings_2_epochs_dropout                      1.707937
matrix_facto_10_embeddings_20_epochs                                17.537834
matrix_facto_10_embeddings_100_epochs    

### We can notice the following points from the above:

- Performance got way better when using neural network comparing to using matrix factorization.

- When using neural network, I converge to the best model very quickly, sometimes after 2 epochs and after that the model starts overfitting or at least the validation error does not seem to go down anymore. Matrix factorization does not converge at all.

- Adding epochs lead to overfitting

- Adding layers (over 3) does not help much and actually leads to overfitting

- Changing the number of hidden units does not help.

- Simplifying the model by reducing embedding size does not help either.

- Choosing large values of embedding has made a small improvement in the results.

- Multiply or concatenate user and item embeddings does not seem to matter, but concatenate seems to give little better results

- Training with Dropout seem to prevent some overfitting

- Adding dense layers on top of the embeddings before the merge helps a bit.

- Adding some metadata lead to some improvement in the results.

- Running on a larger dataset does not help either, because the data in both datasets is very skewed.


In [11]:
!pwd

/home/ec2-user/SageMaker/dse260-CapStone-Amazon/2-**Final-Keras-DeepRecommender


In [12]:
!ls -al

total 3704
drwxrwxr-x  8 ec2-user ec2-user   4096 May 27 04:20 .
drwxrwxr-x 14 ec2-user ec2-user   4096 May 26 15:19 ..
-rw-rw-r--  1 ec2-user ec2-user  11399 May 27 04:06 Amazon_Recommender_System.ipynb
drwxrwxr-x  5 ec2-user ec2-user   4096 May 26 16:08 data
-rw-rw-r--  1 ec2-user ec2-user 468696 May 27 01:29 DeepRecommendation_Keras.ipynb
-rw-rw-r--  1 ec2-user ec2-user 278681 May 27 04:19 *dense_1_Multiply_50_embeddings_4_epochs_dropout.ipynb
-rw-rw-r--  1 ec2-user ec2-user 285939 May 27 04:11 dense_1_Multiply_50_embeddings_7_epochs_dropout.ipynb
-rw-rw-r--  1 ec2-user ec2-user 191476 May 27 03:45 *dense_1_Multiply_50_embeddings_7_epochs.ipynb
-rw-rw-r--  1 ec2-user ec2-user 273534 May 26 21:26 *dense_2_Concatenate_10_embeddings_10_epochs.ipynb
-rw-rw-r--  1 ec2-user ec2-user 278732 May 26 22:02 *dense_2_Concatenate_20_embeddings_25_epochs.ipynb
-rw-rw-r--  1 ec2-user ec2-user 302811 May 27 03:36 *dense_2_Meta_Concatenate_15_embeddings_30_epochs-DropOut0.6.ipynb
-rw-rw-r--  1 ec2-u

## Predict - Verifying the performance on the test set.
- Check whether our results are reproducible on unseen data.
- Test on new data using previously saved models.
- I got the following results on the test set:

In [13]:
ratings_test = pd.read_parquet('./data/ratings_test.parquet')
ratings_train = pd.read_parquet('./data/ratings_train.parquet')

In [14]:
models =['dense_2_Concatenate_10_embeddings_10_epochs',
 'dense_1_Multiply_50_embeddings_7_epochs',
 'dense_2_Concatenate_20_embeddings_25_epochs',
 'dense_1_Multiply_50_embeddings_4_epochs_dropout',
 'matrix_facto_10_embeddings_3_epochs',
 'dense_1_Multiply_50_embeddings_20_epochs_dropout',
 'dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.6-dropout-input',
 'dense_1_Multiply_50_embeddings_7_epochs_dropout',
 'dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.8',
 'matrix_facto_10_embeddings_100_epochs',
 'matrix_facto_10_embeddings_20_epochs',
 'dense_4_Multiply_5_embeddings_7_epochs',
 'dense_1_Multiply_50_embeddings_2_epochs_dropout']

models_with_Meta =[
'dense_2_Meta_Concatenate_15_embeddings_50_epochs-DropOut0.6',
'dense_2_Meta_Concatenate_15_embeddings_30_epochs-DropOut0.6'
]

In [15]:
load_path = "./models/"
perfs = {}

for mod in models:
    model = load_model(load_path+mod+'.h5')
    ratings_test['preds_' + mod] = model.predict([ratings_test['user_id'],
                                                  ratings_test['item_id']])
    perfs[mod] = mean_squared_error(ratings_test['score'], ratings_test['preds_'+mod])

perfs= pd.Series(perfs)
perfs.sort_values(ascending=True).head(20)





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.









dense_1_Multiply_50_embeddings_7_epochs                              1.565703
dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.8                   1.567906
dense_4_Multiply_5_embeddings_7_epochs                               1.572504
dense_2_Concatenate_20_embeddings_25_epochs                          1.575907
dense_1_Multiply_50_embeddings_7_epochs_dropout                      1.587513
dense_1_Multiply_50_embeddings_20_epochs_dropout                     1.587922
dense_1_Multiply_50_embeddings_4_epochs_dropout                      1.588667
dense_2_Concatenate_10_embeddings_10_epochs                          1.595932
dense_4_Multiply_5_embeddings_7_epochs-DropOut-0.6-dropout-input     1.677053
dense_1_Multiply_50_embeddings_2_epochs_dropout                      1.722337
matrix_facto_10_embeddings_100_epochs                               17.545488
matrix_facto_10_embeddings_20_epochs                                17.552762
matrix_facto_10_embeddings_3_epochs                             

In [16]:
perfs = {}

for mod in models_with_Meta:
    model = load_model(load_path+mod+'.h5')
    ratings_test['preds_' + mod] = model.predict([ratings_test["user_id"]
                                                , ratings_test["item_id"]
                                                , ratings_test["price_id"]
                                                , ratings_test["title_id"]
                                                ])
    
    perfs[mod] = mean_squared_error(ratings_test['score'], ratings_test['preds_'+mod]) ## MSE between real score and prdicted score

perfs= pd.Series(perfs)
perfs


dense_2_Meta_Concatenate_15_embeddings_50_epochs-DropOut0.6    1.649172
dense_2_Meta_Concatenate_15_embeddings_30_epochs-DropOut0.6    1.686280
dtype: float64

### MSE on test data is very similar to what I got on the evaluation data
### The best result  on both the internal keras random cross validation scheme and test-set acheived when using 1 layers, 15 layered concatenated embeddings, Dropout and 7 epochs
### I will use this model further for executing recommendations (dense_1_Multiply_50_embeddings_7_epochs )

## Conclusion
- In this work I created and compared 2 models for predicting user's ratings on top of Amazon's review data: a matrix factorization model and deep network model, and used the models for recommending items to users.

- I showed that using deep neural networks can achieve better performance than using matrix factorization. 

- Going deeper (more than 3 layers) seems to lead to overfitting and not to further improvement.

- Adding epochs, reducing embedding size or change hidden units numbers does not help either.

- Running on a larger dataset does not help either, because the data in both datasets is very skewed.

- Choosing large values of embedding (50) and adding dense layers on top of the embeddings before concatenating helps a bit.

- Adding metadata and training with Dropout lead to some improvement in the results.

- The fact that the data is so sparsed and skewed has a huge impact on the ability to model the recommendation problem and to achieve smaller test MSE.

