In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings
import tensorflow as tf
from keras.callbacks import *

warnings.filterwarnings('ignore')
%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
from helpersNeuralNet import*

# Load dataset and samples into a pandas data frame
DATA_TRAIN_PATH = 'data/data_train.csv'
data = load_data(DATA_TRAIN_PATH)


DATA_TEST_PATH = 'data/sampleSubmission.csv'
samples = load_data(DATA_TEST_PATH)

In [8]:
# Give number of users and movies
n_users = data['user_id'].nunique()
n_movies = data['movie_id'].nunique()

In [11]:
from optimizer import*
from model_generation import*

## Layer & Neurons Optimization

In [11]:
# Initialization
prev_neurons = 100
prev_embedding = 50
prev_dropout = 0.05

# Maximal number of tested values 
max_nb_layers = 20
max_nb_neurons = 200

# Maximal number of iterations & initialization
max_iter = 50
n_iter = 0
break_ind = 0
max_accurracy = 0

while (n_iter<max_iter):
    # Compute the optimal number of layers
    next_layer, accurracy_layer = layers(max_nb_layers, prev_neurons, prev_dropout,  prev_embedding, data)
    if (accurracy_layer >= max_accurracy):
        max_accurracy = accurracy_layer
        prev_layer = next_layer
    print('Max Accuracy :{}\n'.format(max_accurracy))
    # Compute the optimal number of neurons
    next_neurons, accurracy_neurons = neurons(next_layer, max_nb_neurons, prev_dropout, prev_embedding, data)
    if (accurracy_neurons >= max_accurracy):
        max_accurracy = accurracy_neurons
        prev_neurons = next_neurons
    print('Max Accuracy :{}\n'.format(max_accurracy))
    n_iter+=1
    print ('\n New iteration\n')

model = generate(prev_layer, prev_neurons, prev_dropout, n_users, n_movies, prev_embedding)
model.save('model_LayersNeurons.h5')
print ('\n Simulation finished \n')

## Layers, Neurons, Dropout & Embedding Layer Optimization

Need to run first "Layers & Neurons Optimization"

In [None]:
max_nb_embedding = 150

n_iter = 0

while (n_iter<max_iter):
    # Compute the optimal factor for emedding layers
    next_embedding, accurracy_embeddinglayers = embeddinglayer(prev_layer, prev_neurons, prev_dropout, max_nb_embedding, data)
    if (accurracy_embeddinglayers>= max_accurracy):
        max_accurracy = accurracy_embeddinglayers
        prev_embedding = next_embedding
    print('Max Accuracy :{}\n'.format(max_accurracy))
    # Compute the optimal number for dropout
    next_dropout, accurracy_dropout = optimize_dropout(prev_layer, prev_neurons, prev_embedding, data)
    if(accurracy_dropout>= max_accurracy):
        max_accurracy = accurracy_dropout
        prev_dropout = next_dropout
    print('Max Accuracy :{}\n'.format(max_accurracy))
    # Compute the optimal number of layers
    next_layer, accurracy_layer = layers(max_nb_layers, prev_neurons, prev_dropout,  prev_embedding, data)
    if (accurracy_layer >= max_accurracy):
        max_accurracy = accurracy_layer
        prev_layer = next_layer
    print('Max Accuracy :{}\n'.format(max_accurracy))
    # Compute the optimal number of neurons
    next_neurons, accurracy_neurons = neurons(next_layer, max_nb_neurons, prev_dropout, prev_embedding, data)
    if (accurracy_neurons >= max_accurracy):
        max_accurracy = accurracy_neurons
        prev_neurons = next_neurons
    print('Max Accuracy :{}\n'.format(max_accurracy))
    n_iter+=1
    print ('\n New iteration\n')
    
model = generate(prev_layer, prev_neurons, prev_dropout, n_users, n_movies, prev_embedding)
model.save('model_LayersNeuronsDropEmbed.h5')
print ('\n Simulation Successfully Finished\n')

## Optimal Neuronal Network
- RMSE : 1.329
- Secondary : -0.413
- ID : 26039

In [12]:
prev_layer = 2
prev_neurons = 80
prev_dropout = 0.45
prev_embedding = 10
model = generate(prev_layer, prev_neurons, prev_dropout, n_users, n_movies, prev_embedding)
model.save('model_LayersNeurons.h5')

In [14]:
X_train_array, X_test_array, y_train, y_test, n_movies, n_users = setDataSet(data)

In [23]:
history = model.fit(x=X_train_array, y=y_train,  batch_size=1024, 
                             epochs=10,verbose=1,validation_data=(X_test_array, y_test))

Train on 941561 samples, validate on 235391 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:
item_enc = LabelEncoder()
user_enc = LabelEncoder()
samples ['user'] = user_enc.fit_transform(samples['user_id'].values)
samples['movie'] = item_enc.fit_transform(samples['movie_id'].values)
X_samples = samples[['user', 'movie']].values
X_samples_array = [X_samples[:,0], X_samples[:,1]]

#make predictions with model
sample_pred = model.predict(X_samples_array)

In [27]:
print (sample_pred)
rating_samples = (np.argmax(sample_pred,1)+1).tolist()
samples['rating'] = rating_samples

[[0.05152712 0.16593911 0.4203472  0.24062006 0.12156641]
 [0.05441142 0.17377752 0.42906865 0.23178619 0.11095628]
 [0.01176349 0.05259233 0.28243184 0.41827196 0.23494038]
 ...
 [0.11927024 0.19266436 0.2946778  0.19256788 0.20081976]
 [0.11528388 0.17552482 0.25898227 0.17678757 0.27342156]
 [0.02192065 0.06789187 0.24458118 0.32865798 0.33694834]]


In [28]:
# make a csv file
PATH_SUBMISSION = "data/OptimizeSimplePrediction.csv"
create_csv(PATH_SUBMISSION, samples)