## Import necessary modules

In [32]:
import pandas as pd
import numpy as np
import helpers
from tensorflow.python.keras.layers import Input, Embedding, Flatten, Dot
from tensorflow.python.keras.layers import Reshape, Add, Concatenate, Dense, Dropout
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras.optimizers import Adam

In [33]:
#train_path = helpers.get_train_file_path()
ratings = pd.read_csv('NCF_3.csv')# helpers.get_test_file_path())
ratings.head()

Unnamed: 0,index,userId,movieId,rating
0,0,44,1,4
1,1,61,1,3
2,2,67,1,4
3,3,72,1,3
4,4,86,1,5


In [34]:
len(ratings)

1176952

In [35]:
n_users = ratings.userId.nunique()
print(n_users)

10000


In [36]:
n_movies = ratings.movieId.nunique()
print(n_movies)

1000


In [37]:
g = ratings.groupby('userId')['rating'].count()
print(g[:10])

userId
1      23
2     149
3      86
4      20
5     134
6     169
7      62
8      26
9     178
10    107
Name: rating, dtype: int64


In [38]:
topUsers = g.sort_values(ascending=False)[:15]
print(topUsers)

userId
5512    522
9711    487
8706    484
966     470
9377    458
8575    452
1000    438
1878    437
9827    433
4600    433
2038    429
1830    427
1570    425
7014    418
5289    417
Name: rating, dtype: int64


In [39]:
g = ratings.groupby('movieId')['rating'].count()
topMovies = g.sort_values(ascending=False)[:15]
top_r = ratings.join(topUsers, rsuffix='_r', how='inner', on='userId')
print(top_r[:25])
#rating_r column is the number of movies that user with userId has rated

       index  userId  movieId  rating  rating_r
37        37     966        1       5       470
1944    1944     966        4       5       470
4938    4938     966        5       5       470
6484    6484     966        6       5       470
10520  10520     966        7       4       470
14999  14999     966        9       5       470
18173  18173     966       11       5       470
19304  19304     966       12       2       470
19808  19808     966       13       5       470
20569  20569     966       14       5       470
24336  24336     966       15       5       470
26540  26540     966       17       2       470
30279  30279     966       21       5       470
33417  33417     966       23       5       470
34113  34113     966       24       5       470
37845  37845     966       27       5       470
39089  39089     966       29       5       470
39970  39970     966       30       5       470
40708  40708     966       31       3       470
41146  41146     966       32       5   

In [40]:
top_r = top_r.join(topMovies, rsuffix='_x', how='inner', on='movieId')
print(top_r[:20])
# rating_x is the total number of users that ratied the movie with id movieId

         index  userId  movieId  rating  rating_r  rating_x
6484      6484     966        6       5       470      4347
6741      6741    1570        6       5       425      4347
6853      6853    1830        6       5       427      4347
6945      6945    2038        6       5       429      4347
8334      8334    5289        6       5       417      4347
9821      9821    8706        6       5       484      4347
10137    10137    9377        6       5       458      4347
10343    10343    9827        6       5       433      4347
9763      9763    8575        6       5       452      4347
20569    20569     966       14       5       470      3986
20925    20925    1878       14       5       437      3986
20988    20988    2038       14       5       429      3986
22317    22317    5512       14       5       522      3986
22939    22939    7014       14       5       418      3986
23649    23649    8706       14       5       484      3986
23940    23940    9377       14       5 

In [41]:
pd.crosstab(top_r.userId, top_r.movieId, top_r.rating, aggfunc=np.sum)

movieId,6,14,46,60,134,156,178,256,471,495,594,596,608,668,978
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
966,5.0,5.0,,,,,5.0,5.0,5.0,5.0,5.0,,,5.0,5.0
1000,,5.0,5.0,,5.0,,5.0,,5.0,,5.0,5.0,,5.0,3.0
1570,5.0,,,5.0,5.0,,,,,5.0,,5.0,,,5.0
1830,5.0,,5.0,5.0,,,,5.0,,5.0,,5.0,5.0,,
1878,,5.0,5.0,,5.0,,5.0,5.0,5.0,,,,,5.0,
2038,5.0,5.0,,5.0,5.0,,,,,,5.0,,,5.0,
4600,,,,,5.0,5.0,5.0,5.0,5.0,5.0,,5.0,,,5.0
5289,5.0,,5.0,5.0,5.0,5.0,5.0,5.0,5.0,,5.0,,5.0,5.0,
5512,,5.0,5.0,,,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,,5.0
7014,,5.0,5.0,5.0,5.0,5.0,,,,5.0,,,,,


In [42]:
# split train and validation
msk = np.random.rand(len(ratings)) < 0.8
train = ratings[msk]
valid = ratings[~msk]
print(len(train), len(valid))

941771 235181


## Dot Product

In [43]:
print(n_users)
print(n_movies)

10000
1000


In [44]:
n_factors = 128

### Embeddings

In [45]:
user_in = Input(shape=(1,), dtype='int64', name='user_in')
u = Embedding(n_users, n_factors, input_length=1, embeddings_regularizer=None, embeddings_initializer='glorot_normal')(user_in)
u = Reshape((n_factors,))(u)
movie_in = Input(shape=(1,), dtype='int64', name='movie_in')
m = Embedding(n_movies, n_factors, input_length=1, embeddings_regularizer=None, embeddings_initializer='glorot_normal')(movie_in)
m = Reshape((n_factors,))(m)

In [51]:
x = Dot(axes=(0))([u, m])
x = Flatten()(x)
model = Model([user_in, movie_in], x)
model.compile(Adam(0.000001), loss='mse')

In [52]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_in (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
movie_in (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 1, 128)       1280000     user_in[0][0]                    
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 1, 128)       128000      movie_in[0][0]                   
__________________________________________________________________________________________________
reshape_3 

In [53]:
batch_size=64

In [54]:
def fit_model(epochs=1):
    for i in range(epochs):
        model.fit(
            [train.userId, train.movieId], train.rating, batch_size=batch_size,
            validation_data=([valid.userId, valid.movieId], valid.rating))

In [55]:
fit_model()

Train on 941771 samples, validate on 235181 samples
Epoch 1/1


InvalidArgumentError: Incompatible shapes: [128,1] vs. [64,1]
	 [[Node: training_2/Adam/gradients/loss_2/flatten_3_loss/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss_2/flatten_3_loss/sub"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training_2/Adam/gradients/loss_2/flatten_3_loss/sub_grad/Shape, training_2/Adam/gradients/loss_2/flatten_3_loss/sub_grad/Shape_1)]]

Caused by op 'training_2/Adam/gradients/loss_2/flatten_3_loss/sub_grad/BroadcastGradientArgs', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 345, in run_forever
    self._run_once()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 1312, in _run_once
    handle._run()
  File "/usr/lib/python3.5/asyncio/events.py", line 125, in _run
    self._callback(*self._args)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2909, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-55-e240893ee132>", line 1, in <module>
    fit_model()
  File "<ipython-input-54-0860a6c78567>", line 5, in fit_model
    validation_data=([valid.userId, valid.movieId], valid.rating))
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1647, in fit
    self._make_train_function()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 980, in _make_train_function
    params=self._collected_trainable_weights, loss=self.total_loss)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/optimizers.py", line 444, in get_updates
    grads = self.get_gradients(loss, params)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/optimizers.py", line 95, in get_gradients
    grads = K.gradients(loss, params)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/backend.py", line 2593, in gradients
    loss, variables, colocate_gradients_with_ops=True)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 609, in gradients
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 375, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 609, in <lambda>
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_grad.py", line 764, in _SubGrad
    rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 528, in _broadcast_gradient_args
    "BroadcastGradientArgs", s0=s0, s1=s1, name=name)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
    op_def=op_def)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op 'loss_2/flatten_3_loss/sub', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
[elided 22 identical lines from previous traceback]
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-51-fb40edbb6516>", line 4, in <module>
    model.compile(Adam(0.000001), loss='mse')
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 849, in compile
    output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 454, in weighted
    score_array = fn(y_true, y_pred)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/losses.py", line 29, in mean_squared_error
    return K.mean(K.square(y_pred - y_true), axis=-1)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 907, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py", line 4856, in _sub
    "Sub", x=x, y=y, name=name)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
    op_def=op_def)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Incompatible shapes: [128,1] vs. [64,1]
	 [[Node: training_2/Adam/gradients/loss_2/flatten_3_loss/sub_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss_2/flatten_3_loss/sub"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training_2/Adam/gradients/loss_2/flatten_3_loss/sub_grad/Shape, training_2/Adam/gradients/loss_2/flatten_3_loss/sub_grad/Shape_1)]]


In [31]:
fit_model(10)

Train on 941554 samples, validate on 235398 samples
Epoch 1/1
  1536/941554 [..............................]  1536/941554 [..............................] - ETA: 6:05 - loss: 16.1226

InvalidArgumentError: indices[38,0] = 1000 is not in [0, 1000)
	 [[Node: embedding_2/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](embedding_1/embeddings/read, embedding_2/Cast)]]

Caused by op 'embedding_2/Gather', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 345, in run_forever
    self._run_once()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 1312, in _run_once
    handle._run()
  File "/usr/lib/python3.5/asyncio/events.py", line 125, in _run
    self._callback(*self._args)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-25-f9bfe250d611>", line 5, in <module>
    m = Embedding(n_movies, n_factors, input_length=1, embeddings_regularizer=None, embeddings_initializer='glorot_normal')(movie_in)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/engine/topology.py", line 258, in __call__
    output = super(Layer, self).__call__(inputs, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 652, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/layers/embeddings.py", line 158, in call
    out = K.gather(self.embeddings, inputs)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/keras/_impl/keras/backend.py", line 1402, in gather
    return array_ops.gather(reference, indices)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 2585, in gather
    params, indices, validate_indices=validate_indices, name=name)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1864, in gather
    validate_indices=validate_indices, name=name)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
    op_def=op_def)
  File "/home/lucien/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): indices[38,0] = 1000 is not in [0, 1000)
	 [[Node: embedding_2/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](embedding_1/embeddings/read, embedding_2/Cast)]]
