In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split



In [2]:
rating = pd.read_csv('rating.csv')
anime = pd.read_csv('anime.csv')
#number of users
n_users = rating.user_id.unique().shape[0]
# drop rows with rating -1
rating = rating[rating.rating != -1]

genres = set()
for i in anime['genre']:
    if type(i) != float:
        for j in i.split(', '):
            genres.add(j)
genres = list(genres)
anime = anime.drop(['type', 'episodes', 'rating', 'members'], axis=1)
# add generes list as columns in anime dataframe
ad = pd.DataFrame(columns=genres)
anime = pd.concat([anime, ad], axis=1)
# add 1 if anime has that genre else 0
for i in range(len(anime)):
    if type(anime['genre'][i]) != float:
        for j in anime['genre'][i].split(', '):
            anime[j][i] = 1
# drop unnecessary columns
anime = anime.drop(['genre'], axis=1)
# fillna
anime = anime.fillna(0)
print(anime.head())

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anime[j][i] = 1


   anime_id                              name  Military  Shounen Ai  Josei  \
0     32281                    Kimi no Na wa.         0           0      0   
1      5114  Fullmetal Alchemist: Brotherhood         1           0      0   
2     28977                          Gintama°         0           0      0   
3      9253                       Steins;Gate         0           0      0   
4      9969                     Gintama&#039;         0           0      0   

   Action  Vampire  Yuri  Music  Space  ...  Romance  Ecchi  Drama  Harem  \
0       0        0     0      0      0  ...        1      0      1      0   
1       1        0     0      0      0  ...        0      0      1      0   
2       1        0     0      0      0  ...        0      0      0      0   
3       0        0     0      0      0  ...        0      0      0      0   
4       1        0     0      0      0  ...        0      0      0      0   

   Historical  Mystery  Shounen  Kids  Seinen  Hentai  
0           

In [3]:
# merge anime and rating
merged = pd.merge(rating, anime, on='anime_id')
merged = merged.drop(['name'], axis=1)
merged


Unnamed: 0,user_id,anime_id,rating,Military,Shounen Ai,Josei,Action,Vampire,Yuri,Music,...,Romance,Ecchi,Drama,Harem,Historical,Mystery,Shounen,Kids,Seinen,Hentai
0,1,8074,10,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,3,8074,6,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,5,8074,2,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,12,8074,6,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,14,8074,6,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6337234,69964,23585,7,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
6337235,69964,33659,6,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6337236,72800,30738,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
6337237,73135,8723,5,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
# normalize rating
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
merged['rating'] = scaler.fit_transform(merged['rating'].values.reshape(-1,1))
merged.head()


Unnamed: 0,user_id,anime_id,rating,Military,Shounen Ai,Josei,Action,Vampire,Yuri,Music,...,Romance,Ecchi,Drama,Harem,Historical,Mystery,Shounen,Kids,Seinen,Hentai
0,1,8074,1.0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,3,8074,0.555556,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,5,8074,0.111111,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,12,8074,0.555556,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,14,8074,0.555556,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0


In [8]:
# create embeddings for users and anime
from keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate, Dropout
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

n_latent_factors_user = 5
n_latent_factors_anime = 8

# anime embedding
anime_input = Input(shape=[1], name="Anime-Input")
anime_embedding = Embedding(12294 + 1, n_latent_factors_anime, name="Anime-Embedding")(anime_input)
anime_vec = Flatten(name="Flatten-Anime")(anime_embedding)

# user embedding
user_input = Input(shape=[1], name="User-Input")
user_embedding = Embedding(73515 + 1, n_latent_factors_user, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-Users")(user_embedding)

# concatenate
concat = Concatenate()([anime_vec, user_vec])
concat_dropout = Dropout(0.2)(concat)
dense = Dense(200, name="Fully-Connected", activation="relu")(concat)
dropout_1 = Dropout(0.2,name="Dropout1")(dense)
dense_2 = Dense(100,name="Fully-Connected-1",activation="relu")(dropout_1)
dropout_2 = Dropout(0.2,name="Dropout2")(dense_2)
dense_3 = Dense(50,name="Fully-Connected-2",activation="relu")(dropout_2)
dropout_3 = Dropout(0.2,name="Dropout3")(dense_3)
dense_4 = Dense(20,name="Fully-Connected-3",activation="relu")(dropout_3)

result = Dense(1, activation='sigmoid')(dense_4)
model = Model([user_input, anime_input], result)
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

# # split data into train and test
# train, test = train_test_split(merged, test_size=0.3, random_state=42)
# print(train.shape)
# print(test.shape)




Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Anime-Input (InputLayer)       [(None, 1)]          0           []                               
                                                                                                  
 User-Input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 Anime-Embedding (Embedding)    (None, 1, 8)         98360       ['Anime-Input[0][0]']            
                                                                                                  
 User-Embedding (Embedding)     (None, 1, 5)         367580      ['User-Input[0][0]']             
                                                                                            

In [10]:
model.fit([merged.user_id, merged.anime_id], merged.rating, epochs=10, verbose=1)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node 'model_1/Anime-Embedding/embedding_lookup' defined at (most recent call last):
    File "c:\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\traitlets\config\application.py", line 982, in launch_instance
      app.start()
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "c:\Python310\lib\asyncio\base_events.py", line 600, in run_forever
      self._run_once()
    File "c:\Python310\lib\asyncio\base_events.py", line 1896, in _run_once
      handle._run()
    File "c:\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 2940, in run_cell
      result = self._run_cell(
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 2995, in _run_cell
      return runner(coro)
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3194, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3373, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Dell Latitude E5470\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3433, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Dell Latitude E5470\AppData\Local\Temp\ipykernel_9828\3353970990.py", line 1, in <module>
      model.fit([merged.user_id, merged.anime_id], merged.rating, epochs=10, verbose=1)
    File "c:\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\engine\training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Python310\lib\site-packages\keras\engine\training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "c:\Python310\lib\site-packages\keras\engine\training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Python310\lib\site-packages\keras\engine\training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "c:\Python310\lib\site-packages\keras\engine\training.py", line 1050, in train_step
      y_pred = self(x, training=True)
    File "c:\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\engine\training.py", line 558, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\engine\base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\engine\functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "c:\Python310\lib\site-packages\keras\engine\functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\engine\base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Python310\lib\site-packages\keras\layers\core\embedding.py", line 272, in call
      out = tf.nn.embedding_lookup(self.embeddings, inputs)
Node: 'model_1/Anime-Embedding/embedding_lookup'
indices[7,0] = 14513 is not in [0, 12295)
	 [[{{node model_1/Anime-Embedding/embedding_lookup}}]] [Op:__inference_train_function_2189]