## Experiment 2: Explicit Feedback

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

### Ratings file
Each line contains a rated video game:

- a user
- an item
- a rating from 1 to 5

In [3]:
raw_ratings = pd.read_csv('ratings_latest.csv', sep=',')

raw_ratings.head()

Unnamed: 0,id,rate,appid
0,76561197960265729,1.0,10
1,76561197960265729,1.0,20
2,76561197960265729,1.0,30
3,76561197960265729,1.0,40
4,76561197960265729,1.0,50


### Metadata file

This file contains information about each game, specifically:
- item
- name
- genres

In [4]:
items = pd.read_csv('metadata.csv', sep=',')

items.head()

Unnamed: 0,appid,appname,genres
0,10,Counter-Strike,1/
1,20,Team Fortress Classic,1/
2,30,Day of Defeat,1/
3,40,Deathmatch Classic,1/
4,50,Half-Life: Opposing Force,1/


In [5]:
all_ratings = pd.merge(items, raw_ratings)

all_ratings.head()

Unnamed: 0,appid,appname,genres,id,rate
0,10,Counter-Strike,1/,76561197960265729,1.0
1,10,Counter-Strike,1/,76561197960265730,2.05
2,10,Counter-Strike,1/,76561197960265731,1.0
3,10,Counter-Strike,1/,76561197960265733,1.4
4,10,Counter-Strike,1/,76561197960265734,1.0


In [68]:
max_user_id = all_ratings['id'].max()
max_user_id

76561198800607700

In [70]:
max_item_id = all_ratings['appid'].max()
max_item_id

787370

In [6]:
from sklearn.model_selection import train_test_split

ratings_train, ratings_test = train_test_split(
    all_ratings, test_size=0.2, random_state=0)

user_id_train = ratings_train['id']
item_id_train = ratings_train['appid']
rating_train = ratings_train['rate']

user_id_test = ratings_test['id']
item_id_test = ratings_test['appid']
rating_test = ratings_test['rate']

### Supervised Ratings Prediction with Explicit Feedback

In [54]:
import tensorflow as tf
def dot_mode(inputs):
    """Work around for Keras bug with merge([...], mode='dot').

    https://github.com/fchollet/keras/issues/2626

    The dot product of 2 embeddings can be used as an unnormalized
    approximation to the cosine similarity.
    """
    latent_codes_1, latent_codes_2 = inputs
    return tf.reduce_sum(latent_codes_1 * latent_codes_2, axis=-1)

In [63]:
from keras.layers import Input, Embedding, Flatten, merge, Dense, Dropout, Lambda, Dot, Reshape
from keras.models import Model
import keras.backend as K

In [60]:
# # For each sample we input the integer identifiers
# # of a single user and a single item
# user_id_input = Input(shape=[1], name='user')
# item_id_input = Input(shape=[1], name='item')

# embedding_size = 30
# user_embedding = Embedding(output_dim=embedding_size, input_dim=max_user_id + 1,
#                            input_length=1, name='user_embedding')(user_id_input)
# item_embedding = Embedding(output_dim=embedding_size, input_dim=max_item_id + 1,
#                            input_length=1, name='item_embedding')(item_id_input)

# # reshape from shape: (batch_size, input_length, embedding_size)
# # to shape: (batch_size, input_length * embedding_size) which is
# # equal to shape: (batch_size, embedding_size)
# user_vecs = Flatten()(user_embedding)
# item_vecs = Flatten()(item_embedding)

# # y = merge([user_vecs, item_vecs], mode=dot_mode, output_shape=(1,))

# y = Dot(-1, normalize=False)([user_vecs, item_vecs])

# model = Model(inputs=[user_id_input, item_id_input], outputs=y)


# #model = Model(input=[user_id_input, item_id_input], output=y)
# model.compile(optimizer='adam', loss='mae')

In [74]:
all_ratings.id.unique().shape[0]

676668

In [76]:
user_id_input = Input(shape=[1], name='user')
item_id_input = Input(shape=[1], name='item')

embedding_size = 30
user_embedding = Embedding(output_dim=embedding_size, input_dim=all_ratings.id.unique().shape[0],
                           input_length=1, name='user_embedding')(user_id_input)
item_embedding = Embedding(output_dim=embedding_size, input_dim=max_item_id + 1,
                           input_length=1, name='item_embedding')(item_id_input)

user_vecs = Reshape([embedding_size])(user_embedding)
item_vecs = Reshape([embedding_size])(item_embedding)

y = Dot(1, normalize=False)([user_vecs, item_vecs])

model = Model(inputs=[user_id_input, item_id_input], outputs=y)

model.compile(loss='mse',
              optimizer="adam"
             )

In [77]:
initial_train_preds = model.predict([user_id_train, item_id_train])
# initial_train_preds.shape

InvalidArgumentError: Shape [76561198800607701,30] is too large (more than 1099511627776 entries)
	 [[Node: user_embedding_8/embeddings = VariableV2[container="", dtype=DT_FLOAT, shape=[76561198800607701,30], shared_name="", _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'user_embedding_8/embeddings', defined at:
  File "/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-72-49b237b33b10>", line 6, in <module>
    input_length=1, name='user_embedding')(user_id_input)
  File "/anaconda3/lib/python3.6/site-packages/keras/engine/topology.py", line 590, in __call__
    self.build(input_shapes[0])
  File "/anaconda3/lib/python3.6/site-packages/keras/layers/embeddings.py", line 105, in build
    dtype=self.dtype)
  File "/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/keras/engine/topology.py", line 414, in add_weight
    constraint=constraint)
  File "/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 392, in variable
    v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 197, in __init__
    expected_shape=expected_shape)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 294, in _init_from_args
    name=name)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 128, in variable_op_v2
    shared_name=shared_name)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 708, in _variable_v2
    name=name)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Shape [76561198800607701,30] is too large (more than 1099511627776 entries)
	 [[Node: user_embedding_8/embeddings = VariableV2[container="", dtype=DT_FLOAT, shape=[76561198800607701,30], shared_name="", _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
