In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings


warnings.filterwarnings('ignore')
%matplotlib inline


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Neural Network with RMSE loss function

In [13]:
from helpers import load_data

DATA_TRAIN_PATH = "data/data_train.csv"
data = load_data(DATA_TRAIN_PATH)


DATA_TEST_PATH = "data/sampleSubmission.csv"
samples = load_data(DATA_TEST_PATH)

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# take 15 more frequent users
g = data.groupby('user_id')['rating'].count()
top_users = g.sort_values(ascending=False)[:15]

# take 15 more frequent movies
g = data.groupby('movie_id')['rating'].count()
top_movies = g.sort_values(ascending=False)[:15]

# combine frequent users and most rated movies
top_r = data.join(top_users, rsuffix = '_r', how = 'inner', on = 'user_id')
top_r = top_r.join (top_movies, rsuffix = '_r', how = 'inner', on = 'movie_id')

pd.crosstab(top_r.user_id, top_r.movie_id, top_r.rating, aggfunc = np.sum)

movie_id,134,14,156,178,256,46,471,495,594,596,6,60,608,668,978
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1000,5.0,5.0,,5.0,,5.0,5.0,,5.0,5.0,,,,5.0,3.0
1570,5.0,,,,,,,5.0,,5.0,5.0,5.0,,,5.0
1830,,,,,5.0,5.0,,5.0,,5.0,5.0,5.0,5.0,,
1878,5.0,5.0,,5.0,5.0,5.0,5.0,,,,,,,5.0,
2038,5.0,5.0,,,,,,,5.0,,5.0,5.0,,5.0,
4600,5.0,,5.0,5.0,5.0,,5.0,5.0,,5.0,,,,,5.0
5289,5.0,,5.0,5.0,5.0,5.0,5.0,,5.0,,5.0,5.0,5.0,5.0,
5512,,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,,,5.0,,5.0
7014,5.0,5.0,5.0,,,5.0,,5.0,,,,5.0,,,
8575,5.0,,5.0,,,5.0,5.0,5.0,5.0,,5.0,5.0,5.0,,5.0


In [15]:
user_enc = LabelEncoder()
data ['user'] = user_enc.fit_transform(data['user_id'].values)
n_users = data ['user'].nunique()

item_enc = LabelEncoder()
data ['movie'] = item_enc.fit_transform(data['movie_id'].values)
n_movies = data ['movie'].nunique()


data ['rating'] = data ['rating'].values.astype(np.int)
min_rating = min(data['rating'])
max_rating = max(data ['rating'])

n_users, n_movies, max_rating, min_rating

(10000, 1000, 5, 1)

In [16]:
X = data[['user', 'movie']].values
y = data ['rating']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.1, random_state=42)

In [17]:
n_factors = 50

X_train_array = [X_train[:,0], X_train[:,1]]
X_test_array = [X_test[:,0], X_test[:,1]]

In [18]:
from keras.models import Model,load_model
from keras.layers import Input, Reshape, Dot
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.layers import Concatenate, Dense, Dropout, Add, Activation, Lambda

In [19]:
from DummyNet import recommenderNet
model = recommenderNet(n_users, n_movies, n_factors, min_rating, max_rating)
model.summary()

if os.path.exists('RecommenderNet.h5'):
    model = load_model('RecommenderNet.h5')
else:
    history = model.fit(x=X_train_array, y=y_train,  batch_size=10000, epochs=10,verbose=1, validation_data=(X_test_array, y_test))
    model.save('RecommenderNet.h5')
    plt.plot(history.history['val_loss'])
    plt.xlabel("Epochs")
    plt.ylabel("Test Error")

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 1, 50)        500000      input_3[0][0]                    
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 1, 50)        50000       input_4[0][0]                    
____________________________________________________________________________________________

## Optimize Layers, Neurons, Dropouts & Models

In [None]:
from keras.models import Sequential
from keras.callbacks import EarlyStopping

from OptimizerLayerNeuronDropoutModel import optimizerLayersNeuronsDropoutsModels

model = optimizerLayersNeuronsDropoutsModels(X, y)

In [None]:
from sklearn.metrics import mean_squared_error

prediction_test = np.array([a[0] for a in model.predict(x=X_test_array)])

print (mean_squared_error(y_test, prediction_test))