## Data Pre-Processing

In [None]:
import pandas as pd

In [2]:
movies = pd.read_csv('../data/movies/ml-latest-small/ratings.csv')

In [3]:
movies.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [5]:
len(movies.movieId.unique()), len(movies.userId.unique())

(9724, 610)

so 9724 movies and 610 users exist

Lets do a categorical mapping for users and movies

In [6]:
movies.movieId = movies.movieId.astype('category').cat.codes.values
movies.userId = movies.userId.astype('category').cat.codes.values

In [7]:
movies.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,0,0,4.0,964982703
1,0,2,4.0,964981247
2,0,5,4.0,964982224
3,0,43,5.0,964983815
4,0,46,5.0,964982931


In [8]:
from sklearn.model_selection import train_test_split

In [9]:
train, test = train_test_split(movies, test_size = 0.2)

In [10]:
train.head()

Unnamed: 0,userId,movieId,rating,timestamp
17585,110,6613,2.5,1516143236
69383,447,3433,2.0,1178981559
66506,427,1971,2.0,1111487454
66199,425,322,5.0,1451080787
42694,287,2248,3.0,978467651


In [11]:
test.head()

Unnamed: 0,userId,movieId,rating,timestamp
20561,134,2438,5.0,1009691897
90509,588,592,2.0,856038894
67414,435,365,4.0,833530513
37810,255,4648,4.0,1446580806
20974,138,5159,2.0,1453924912


## Matrix Factorisation For Recommendation

In [13]:
import keras
from IPython.display import SVG
from keras.optimizers import Adam
from keras.utils.vis_utils import model_to_dot

In [14]:
n_users, n_movies = len(movies.userId.unique()), len(movies.movieId.unique())

In [15]:
n_embeddings = 30

In [16]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [17]:
movie_input = keras.layers.Input(shape=[1], name = 'Movie')
movie_embedding = keras.layers.Embedding(n_movies + 1, n_embeddings, name = 'Movie-Embeddings')(movie_input)
movie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)

user_input = keras.layers.Input(shape=[1], name = 'User')
user_embedding = keras.layers.Embedding(n_users + 1, n_embeddings, name = 'User-Embeddings')(user_input)
user_vec = keras.layers.Flatten(name='FlattenUsers')(user_embedding)

In [None]:
prod = keras.layers.merge([movie_vec, user_vec], mode = 'dot', name='DotProduct')
model = keras.Model([user_input, movie_input], prod)
model.compile('adam', 'mean_squared_error')