# Recommender System - Neural Network CNN

Source : https://missinglink.ai/guides/keras/keras-conv1d-working-1d-convolutional-neural-networks-keras/

## 1. Prerequisites

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings


warnings.filterwarnings('ignore')
%matplotlib inline


import scipy
import scipy.io
import scipy.sparse as sp

from sklearn.preprocessing import LabelEncoder
from keras import layers

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from helpersNeuralNet import load_data
from sklearn.model_selection import train_test_split

DATA_TRAIN_PATH = "data/data_train.csv"
ratings = load_data(DATA_TRAIN_PATH)

DATA_TEST_PATH = "data/sampleSubmission.csv"
samples = load_data(DATA_TEST_PATH)

n_users = len(ratings.user_id.unique())
n_books = len(ratings.movie_id.unique())

train, test = train_test_split(ratings, test_size=0.1, random_state=42)

Using TensorFlow backend.


In [3]:
samples.head()

Unnamed: 0,user_id,movie_id,rating
0,37,1,3
1,73,1,3
2,156,1,3
3,160,1,3
4,248,1,3


## 2. Data preprocessing

In [4]:
user_enc = LabelEncoder()
ratings['user'] = user_enc.fit_transform(ratings['user_id'].values)
n_users = ratings['user'].nunique()

item_enc = LabelEncoder()
ratings['movie'] = item_enc.fit_transform(ratings['movie_id'].values)
n_movies = ratings['movie'].nunique()

ratings['rating'] = ratings['rating'].values.astype(np.int)
min_rating = min(ratings['rating'])
max_rating = max(ratings['rating'])

In [5]:
X = ratings[['user', 'movie']].values
y = ratings['rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [71]:
n_factors = 25

X_train_array = [X_train[:,0], X_train[:,1]]
X_test_array = [X_test[:,0], X_test[:,1]]

## 3. Neural network CNN

In [70]:
from keras.layers import Add, Activation, Lambda
from keras.layers import Concatenate, Dense, Dropout, Input, Reshape, Dot
from keras.models import Model,load_model
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.regularizers import l2
from keras_radam import RAdam

class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal', embeddings_regularizer=l2(1e-5))(x)
        x = Reshape((self.n_factors,))(x)
        return x
    
    
def create_CNN(n_users, n_movies, n_factors):
    
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, n_factors)(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, n_factors)(movie)
    
    x = Concatenate()([u, m])
    x = Dropout(0.05)(x)
#    x = layers.SpatialDropout1D(0.3)(x)
    
    # Add the convolutional Layer
    batch_input_shape=(None, 64, 1)
    conv_layer = layers.Conv1D(50, 1, activation="relu", batch_input_shape=batch_input_shape)(x)

    # Add the pooling Layer
    pooling_layer = layers.GlobalMaxPool1D()(conv_layer)

    # Add the output Layers
    output_layer1 = layers.Dense(50, activation="relu")(pooling_layer)
    output_layer1 = layers.Dropout(0.25)(output_layer1)
    output_layer2 = layers.Dense(1, activation="sigmoid")(output_layer1) 
    
    cnn_model = Model(inputs=input_layer, outputs=output_layer2)
    cnn_model.compile(optimizer=optimizers.RAdam(), loss='categorical_crossentropy', metrics=['mse', 'accuracy'])
    
    return cnn_model

In [73]:
model = create_CNN(n_users, n_movies, n_factors)
model.summary()

if os.path.exists('NeuralNet_CNN.h5'):
    model = load_model('NeuralNet_CNN.h5')
else:
    history = model.fit(x=X_train_array, y=y_train,  batch_size=10000, epochs=20, verbose=1, validation_data=(X_test_array, y_test))
    model.save('NeuralNet_CNN.h5')
    plt.plot(history.history['val_loss'])
    plt.xlabel("Epochs")
    plt.ylabel("Test Error")

ValueError: Input 0 is incompatible with layer conv1d_15: expected ndim=3, found ndim=2

In [74]:
import keras
from keras.models import Sequential
from keras.layers import Dense, InputLayer, Dropout, Flatten, BatchNormalization, Conv1D

N = len(X_train_array)
batch_input_shape=(None, 64, 1)
nb_classes = 5

print(N)

model = Sequential([
    InputLayer(batch_input_shape=batch_input_shape), 
    Conv1D(64, 1),
    Dense(nb_classes, activation='softmax')
])
model.compile(loss=keras.losses.mean_squared_error,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()
model.fit(x, y_train, epochs=25, verbose=True)

2
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_16 (Conv1D)           (None, 64, 64)            128       
_________________________________________________________________
dense_10 (Dense)             (None, 64, 5)             325       
Total params: 453
Trainable params: 453
Non-trainable params: 0
_________________________________________________________________


AttributeError: 'Reshape' object has no attribute 'ndim'

In [None]:
trainX.shape[1]

In [79]:
from keras.layers import MaxPooling1D

def evaluate_model(trainX, trainy, testX, testy):
    verbose, epochs, batch_size = 0, 10, 100
    
    n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], trainy.shape[0]
    
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', bach_input_shape=(n_timesteps, n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['mse', 'accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    return accuracy

def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))
 

scores = list()
score = evaluate_model(X_train, y_train, X_test, y_test)
score = score * 100.0
print('>#%d: %.3f' % (r+1, score))
scores.append(score)
# summarize results
summarize_results(scores)

ValueError: Error when checking input: expected conv1d_21_input to have 3 dimensions, but got array with shape (1059256, 2)

### 5.2 Model validation

In [49]:
from sklearn.metrics import mean_squared_error

prediction_test = np.array([a[0] for a in model.predict(x=X_test_array)])

print (mean_squared_error(y_test, prediction_test))

0.9997292606535696


## 6. Building real test data

In [None]:
user_enc = LabelEncoder()
samples['user'] = user_enc.fit_transform(samples['user_id'].values)
n_users = samples['user'].nunique()

item_enc = LabelEncoder()
samples['movie'] = item_enc.fit_transform(samples['movie_id'].values)
n_movies = samples['movie'].nunique()

samples['rating'] = samples['rating'].values.astype(np.int)

X = samples[['user', 'movie']].values
X_array = [X[:,0], X[:,1]]

prediction = np.array([a[0] for a in model.predict(x=X_array)])

samples.drop('rating',axis = 1, inplace = True)
samples['rating'] = prediction

In [None]:
from helpersNeuralNet import create_csv

DATA_SUBMISSION = "data/submission_neuralnet_base.csv"
create_csv(DATA_SUBMISSION, samples)