# Recommender System - Tensor Flow

## 1. Prerequisites

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings


warnings.filterwarnings('ignore')
%matplotlib inline


import tensorflow as tf
import scipy
import scipy.io
import scipy.sparse as sp

from keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from keras.models import Model

%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


In [2]:
from helpers import load_data

DATA_TRAIN_PATH = "data/data_train.csv"
ratings = load_data(DATA_TRAIN_PATH)

DATA_TEST_PATH = "data/sampleSubmission.csv"
samples = load_data(DATA_TEST_PATH)

number of items: 10000, number of users: 1000
number of items: 10000, number of users: 1000


In [3]:
from sklearn.model_selection import train_test_split
from helpers import convert_train


data, n_users, n_movies = convert_train(ratings)
train, test = train_test_split(data, test_size=0.1, random_state=42)

submission,_,_=convert_train(samples)


In [21]:
data.head()

Unnamed: 0,user_id,movie_id,rating
0,0,9,5
1,0,60,5
2,0,67,4
3,0,83,4
4,0,205,2


## 2. Creating Neural Network

In [4]:
# Creating movie embedding path
movie_input = Input(shape = [1], name = "Movies-Input")
movie_embedding = Embedding(n_movies + 1, 5, name = "Movies-Embedding")(movie_input)
movie_vec = Flatten(name = "Flatten-Movies")(movie_embedding)

# Creating user embedding path
user_input = Input(shape = [1], name = "User-Input")
user_embedding = Embedding(n_users + 1, 5, name = "User-Embedding")(user_input)
user_vec = Flatten(name = "Flatten-Users")(user_embedding)

# Concatenate features
conc = Concatenate()([movie_vec, user_vec])

# Add fully-connected-layers
fc1 = Dense(128, activation = 'relu') (conc)
fc2 = Dense(32, activation = 'relu') (fc1)
out = Dense(1)(fc2)

# Create model and compile
model = Model([user_input, movie_input], out)
model.compile('adam', 'mean_squared_error')

In [5]:
from keras.models import load_model

if os.path.exists('regression_model_neural_network.h5'):
    model = load_model('regression_model_neural_network.h5')
else:
    history = model.fit([train.user_id, train.movie_id], train.rating, epochs=150,batch_size=200 ,verbose=1)
    model.save('regression_model_neural_network.h5')
    plt.plot(history.history['loss'])
    plt.xlabel("Epochs")
    plt.ylabel("Training Error")




In [6]:
scores = model.evaluate([test.user_id, test.movie_id], test.rating)


print("Accuracy: %.2f%%" % (scores*100))

predictions_dot= model.predict([submission.user_id, submission.movie_id])

Accuracy: 106.61%


In [10]:
prediction_test = np.array([a[0] for a in model.predict([test.user_id, test.movie_id])])
prediction_test = np.round(prediction_test)

prediction_test = np.where(prediction_test < 0, 0, prediction_test)
prediction_test = np.where(prediction_test > 5, 5, prediction_test)

Y = np.mean(np.where(prediction_test == test.rating, 1, 0))
print (np.where(prediction_test == test.rating, 1, 0))
print (Y)
test.head()

[1 0 0 ... 0 0 0]
0.3783646003262643


Unnamed: 0,user_id,movie_id,rating
904556,7906,412,4
171326,1513,590,4
190893,1697,25,3
1082109,9264,833,1
881877,7730,403,3


In [7]:
predictions_dot = np.array([a[0] for a in predictions_dot])
prediction_dot = np.rint(predictions_dot)

prediction_dot = np.where(prediction_dot < 0, 0, prediction_dot)
prediction_dot = np.where(prediction_dot > 5, 5, prediction_dot)



submission.drop('rating',axis = 1, inplace = True)
submission['rating'] = prediction_dot

In [8]:
from helpers import load_csv
export_csv = load_csv('data/prediction_neural_network',submission)