In [1]:
#import dependencies
import numpy as np
import pandas as pd
import warnings

from keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from keras.models import Model
from sklearn.model_selection import train_test_split

warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [2]:
#set debug to true for faster processing
is_debug = False

In [3]:
#load data
if is_debug==True:
    dataset = pd.read_csv('ml-20m/ratings.csv', nrows=100)
else:
    dataset = pd.read_csv('ml-20m/ratings.csv')

#get count of users and movies for embedding layers
n_users = len(dataset.userId.unique())
n_movies = len(dataset.movieId.unique())

#split data
train, test = train_test_split(dataset, test_size=0.2, random_state=42)

In [4]:
#create embedding path
movie_input = Input(shape=[1], name="Movie-Input")
movie_embedding = Embedding(n_movies+1, 5, name="Movie-Embedding")(movie_input)
movie_vec = Flatten(name="Flatten-Movies")(movie_embedding)

#create user embedding path
user_input = Input(shape=[1], name="User-Input")
user_embedding = Embedding(n_users+1, 5, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-Users")(user_embedding)

#concatenate features
conc = Concatenate()([movie_vec, user_vec])

#fully connected layers
fc1 = Dense(128, activation='relu')(conc)
fc2 = Dense(32, activation='relu')(fc1)
out = Dense(1)(fc2)

#create and compile model
model = Model([user_input, movie_input], out)
model.compile('adam','mean_squared_error')
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Movie-Input (InputLayer)        (None, 1)            0                                            
__________________________________________________________________________________________________
User-Input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
Movie-Embedding (Embedding)     (None, 1, 5)         133725      Movie-Input[0][0]                
__________________________________________________________________________________________________
User-Embedding (Embedding)      (None, 1, 5)         692470      User-Input[0][0]                 
_____________________________________

In [5]:
if is_debug==True:
    epochs=1
    batch_size=len(dataset)
else:
    epochs=5
    batch_size=128

model.fit([train.userId, train.movieId], train.rating, batch_size=128, epochs=epochs, verbose=1)
model.save('NN_Movie_Model.h5')

model.evaluate([test.userId, test.movieId], test.rating)

Instructions for updating:
Use tf.cast instead.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


0.6848732901450331

In [23]:
#data set for first user
movie_data = np.array(list(set(dataset.movieId)))
user = np.array([2 for i in range(len(movie_data))])

#predict movies for first user
predictions = model.predict([user, movie_data])
predictions = np.array([a[0] for a in predictions])
#keep top 5 predictions
recommended_movie_ids = pd.DataFrame((-predictions).argsort()[:5],columns=['movieId'])

print(recommended_movie_ids)

   movieId
0     7044
1     5856
2      317
3      259
4     4900


In [15]:
#join movie data set to get movie titles
movie_dataset = pd.read_csv('ml-20m/movies.csv')
print(movie_dataset.head(5))

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


In [24]:
recommended_df = pd.merge(recommended_movie_ids, movie_dataset, how='left', 
                left_on='movieId', right_on='movieId')

In [25]:
print(recommended_df['title'])

0                                 Wild at Heart (1990)
1    Do You Remember Dolly Bell? (Sjecas li se, Dol...
2                             Santa Clause, The (1994)
3                                 Kiss of Death (1995)
4                                      Out Cold (2001)
Name: title, dtype: object
