# 1. Environment Setup

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

# 2. Load and Preprocess the Anime Recommendations Database


In [None]:
ratings = pd.read_csv('/content/rating.csv')
ratings.replace({-1: np.nan}, inplace = True)
ratings.dropna(inplace = True)

# 3. Encode Users and Anime IDs

In [None]:
user_ids = ratings["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
anime_ids = ratings["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
ratings["user"] = ratings["user_id"].map(user2user_encoded)
ratings["anime"] = ratings["anime_id"].map(anime2anime_encoded)

# 4. Split the Data into Training and Testing Sets

In [None]:
num_users = len(user2user_encoded)
num_animes = len(anime_encoded2anime)
ratings["rating"] = ratings["rating"].values.astype(np.float32)
X = ratings[["user", "anime"]].values
y = ratings["rating"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
embedding_size = 50
user_input = Input(shape=(1,), name="user_input")
user_embedding = Embedding(num_users, embedding_size, name="user_embedding")(user_input)
user_vec = Flatten(name="flatten_users")(user_embedding)
anime_input = Input(shape=(1,), name="anime_input")
anime_embedding = Embedding(num_animes, embedding_size, name="anime_embedding")(anime_input)
anime_vec = Flatten(name="flatten_animes")(anime_embedding)
dot_product = Dot(name="dot_product", axes=1)([user_vec, anime_vec])
model = Model(inputs=[user_input, anime_input], outputs=dot_product)

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit([X_train[:, 0], X_train[:, 1]], y_train, batch_size=64, epochs=5, verbose=1, validation_data=([X_test[:, 0], X_test[:, 1]], y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
user_id = user_ids[0]
user_enc = user2user_encoded[user_id]
user_anime_ids = ratings[ratings["user_id"]==user_id]["anime_id"].values
user_anime_ids = [anime2anime_encoded[x] for x in user_anime_ids]
all_anime_ids = list(set(range(num_animes)) - set(user_anime_ids))
user_encs = np.array([user_enc] * len(all_anime_ids))
all_anime_ids = np.array(all_anime_ids)
all_anime_ids = all_anime_ids.reshape(-1, 1)
ratings_pred = model.predict([user_encs, all_anime_ids])
top_10_indices = ratings_pred.flatten().argsort()[-10:][::-1]
recommended_anime_ids = [anime_encoded2anime[x] for x in top_10_indices]
print("Recommended anime ids:", recommended_anime_ids)

Recommended anime ids: [25731, 1462, 32438, 10491, 9471, 9041, 2404, 2213, 10016, 3784]


In [None]:
# Make predictions on the test set
y_pred = model.predict([X_test[:, 0], X_test[:, 1]])
from sklearn.metrics import mean_squared_error
# Compute the mean squared error of the predictions
mse = mean_squared_error(y_test, y_pred)

# Compute the root mean squared error
rmse = np.sqrt(mse)

print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)

Mean Squared Error: 1.6484803
Root Mean Squared Error: 1.2839316


In [None]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 836438 entries, 47 to 1026767
Data columns (total 5 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   user_id   836438 non-null  int64  
 1   anime_id  836438 non-null  int64  
 2   rating    836438 non-null  float32
 3   user      836438 non-null  int64  
 4   anime     836438 non-null  int64  
dtypes: float32(1), int64(4)
memory usage: 35.1 MB
