In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, Concatenate,Dot
from tensorflow.keras.models import Model
from tensorflow.keras import mixed_precision
import tensorflow.data
from tensorflow.keras.regularizers import L2
from joblib import load
import pydot
from datetime import datetime

In [2]:
rating_df = pd.read_csv('data\\users-score-2023.csv')
anime_metadata_df = pd.read_csv('data\\anime-dataset-2023.csv')

In [3]:
model=tf.keras.models.load_model('models/merged_model.keras')

In [4]:
anime_metadata_df.drop(["Name",'English name','Other name','Synopsis','Premiered','Episodes','Duration','Rating','Popularity',	'Members',	'Favorites','Status','Scored By','Image URL'	,	'Licensors','Producers' , 'Aired' , 'Rank'],axis=1,inplace=True)

In [5]:
anime_metadata_df = anime_metadata_df[anime_metadata_df['anime_id'].isin([21,48,320])]
anime_metadata_df

Unnamed: 0,anime_id,Score,Genres,Type,Studios,Source
11,21,8.69,"Action, Adventure, Fantasy",TV,Toei Animation,Manga
29,48,6.95,"Adventure, Fantasy, Mystery",TV,Bee Train,Original
296,320,6.54,"Action, Drama, Hentai",OVA,Arms,Original


In [6]:
user_mean_score=7.37
user_age=39
user_gender='Male'

In [7]:
mean_score_scaler=load('intermediate_data_all_embeddings/mean_score_scaler.pkl')

rating_scaler=load('intermediate_data_all_embeddings/rating_scaler.pkl')

score_scaler=load('intermediate_data_all_embeddings/score_scaler.pkl')

type_encoder=load('intermediate_data_all_embeddings/Type_encoder.pkl')


gender_encoder=load('intermediate_data_all_embeddings/gender_encoder.pkl')

user_input_shape=6

anime_input_shape=8

studio_max_length = 10

genre_max_length=9

source_max_length=1



In [8]:
@tf.keras.utils.register_keras_serializable()
def split_func(input_str):
  return tf.strings.split(input_str, sep=", ")

In [9]:

studio_vectorize_layer_model=tf.keras.models.load_model('intermediate_data_all_embeddings/studio_vectorize_layer_model')
studio_vectorize_layer = studio_vectorize_layer_model.layers[0]

num_studios=len(studio_vectorize_layer.get_vocabulary())
num_studios



914

In [10]:
genre_vectorize_layer_model=tf.keras.models.load_model('intermediate_data_all_embeddings/genre_vectorize_layer_model')
genre_vectorize_layer = genre_vectorize_layer_model.layers[0]

num_genres=len(genre_vectorize_layer.get_vocabulary())
num_genres



23

In [11]:
source_vectorize_layer_model=tf.keras.models.load_model('intermediate_data_all_embeddings/source_vectorize_layer_model')
source_vectorize_layer = source_vectorize_layer_model.layers[0]


num_sources=len(source_vectorize_layer.get_vocabulary())
num_sources



19

In [12]:
type_encoder=load('intermediate_data_all_embeddings/Type_encoder.pkl')

In [13]:
anime_metadata_df.loc[:,'Score']=score_scaler.transform(anime_metadata_df[['Score']]).astype(np.float32)



In [14]:
type(anime_metadata_df['Score'].iloc[0])

float

In [15]:
encoded_Types = type_encoder.transform(anime_metadata_df[['Type']])  # Fit and transform the column
encoded_df = pd.DataFrame(encoded_Types.toarray(), columns=type_encoder.categories_[0],index=anime_metadata_df.index)  # Create a DataFrame from the encoded columns

anime_metadata_df=pd.concat([anime_metadata_df, encoded_df], axis=1)  # Concatenate the original DataFrame with the encoded DataFrame
anime_metadata_df.drop(['Type'],axis=1,inplace=True)

In [16]:
anime_metadata_df

Unnamed: 0,anime_id,Score,Genres,Studios,Source,Movie,Music,ONA,OVA,Special,TV,UNKNOWN
11,21,2.587209,"Action, Adventure, Fantasy",Toei Animation,Manga,0,0,0,0,0,1,0
29,48,0.602828,"Adventure, Fantasy, Mystery",Bee Train,Original,0,0,0,0,0,1,0
296,320,0.153191,"Action, Drama, Hentai",Arms,Original,0,0,0,1,0,0,0


In [17]:
sequences=studio_vectorize_layer(anime_metadata_df.loc[:,'Studios'])
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=studio_max_length, padding='post')

# Create separate columns for each index in the sequence
for i in range(studio_max_length):
    anime_metadata_df[f'Studio_Index_{i+1}'] = padded_sequences[:, i]

anime_metadata_df.drop('Studios', axis=1, inplace=True)

In [18]:
sequences=genre_vectorize_layer(anime_metadata_df.loc[:,'Genres'])

padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=genre_max_length, padding='post')

# Create separate columns for each index in the sequence
for i in range(genre_max_length):
    anime_metadata_df[f'Genre_Index_{i+1}'] = padded_sequences[:, i]

anime_metadata_df.drop('Genres', axis=1, inplace=True)

In [19]:
sequences=source_vectorize_layer(anime_metadata_df.loc[:,'Source'])

padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=source_max_length, padding='post')

# Create separate columns for each index in the sequence
for i in range(source_max_length):
    anime_metadata_df[f'Source_Index_{i+1}'] = padded_sequences[:, i]

anime_metadata_df.drop('Source', axis=1, inplace=True)

In [20]:
anime_metadata_df

Unnamed: 0,anime_id,Score,Movie,Music,ONA,OVA,Special,TV,UNKNOWN,Studio_Index_1,...,Genre_Index_1,Genre_Index_2,Genre_Index_3,Genre_Index_4,Genre_Index_5,Genre_Index_6,Genre_Index_7,Genre_Index_8,Genre_Index_9,Source_Index_1
11,21,2.587209,0,0,0,0,0,1,0,2,...,3,5,4,0,0,0,0,0,0,3
29,48,0.602828,0,0,0,0,0,1,0,90,...,5,4,13,0,0,0,0,0,0,2
296,320,0.153191,0,0,0,1,0,0,0,23,...,3,7,9,0,0,0,0,0,0,2


In [21]:
user_df=pd.DataFrame([[user_gender,user_mean_score,user_age]],columns=['Gender','Mean Score','age'])

In [22]:
user_df['age'].fillna(33.691945637815515, inplace=True)

In [23]:
user_df

Unnamed: 0,Gender,Mean Score,age
0,Male,7.37,39


In [24]:
encoded_gender = gender_encoder.transform(user_df[['Gender']])  # Fit and transform the column
encoded_df = pd.DataFrame(encoded_gender.toarray(), columns=gender_encoder.categories_[0])  # Create a DataFrame from the encoded columns

user_df=pd.concat([user_df, encoded_df], axis=1)  # Concatenate the original DataFrame with the encoded DataFrame
user_df.drop(['Gender'],axis=1,inplace=True)

In [25]:
user_df

Unnamed: 0,Mean Score,age,Female,Male,Non-Binary,Unknown_Gender
0,7.37,39,0,1,0,0


In [26]:
user_df.loc[:,'Mean Score']=mean_score_scaler.transform(user_df[['Mean Score']])



In [27]:
user_df

Unnamed: 0,Mean Score,age,Female,Male,Non-Binary,Unknown_Gender
0,-0.696093,39,0,1,0,0


In [28]:
anime_metadata_df.shape[0]

3

In [29]:
user_df=user_df.reindex(user_df.index.repeat(anime_metadata_df.shape[0]))

In [30]:
user_input=user_df.to_numpy()

In [34]:
anime_columns=anime_metadata_df.iloc[:,1:1+anime_input_shape].to_numpy(dtype='float32')
anime_studio_embedding_columns = anime_metadata_df.iloc[:,1+anime_input_shape:1+anime_input_shape+studio_max_length].to_numpy()
anime_genre_embedding_columns= anime_metadata_df.iloc[:,1+anime_input_shape+studio_max_length:1+anime_input_shape+studio_max_length+genre_max_length].to_numpy()
anime_source_embedding_columns = anime_metadata_df.iloc[:,-1:].to_numpy()

In [35]:
anime_columns

array([[2.5872092 , 0.        , 0.        , 0.        , 0.        ,
        0.        , 1.        , 0.        ],
       [0.60282815, 0.        , 0.        , 0.        , 0.        ,
        0.        , 1.        , 0.        ],
       [0.15319087, 0.        , 0.        , 0.        , 1.        ,
        0.        , 0.        , 0.        ]], dtype=float32)

In [36]:
anime_genre_embedding_columns

array([[ 3,  5,  4,  0,  0,  0,  0,  0,  0],
       [ 5,  4, 13,  0,  0,  0,  0,  0,  0],
       [ 3,  7,  9,  0,  0,  0,  0,  0,  0]])

In [37]:
anime_studio_embedding_columns

array([[ 2,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [90,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [23,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [38]:
anime_source_embedding_columns

array([[3],
       [2],
       [2]])

In [39]:
input={'user_input':user_input, 'anime_input':anime_columns,'anime_genre_embedding_input':anime_genre_embedding_columns,'anime_studio_embedding_input':anime_studio_embedding_columns,'anime_source_embedding_input':anime_source_embedding_columns}

In [40]:
ratings=model(input)

In [41]:
ratings

<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[ 0.2614975 ],
       [-0.5352173 ],
       [-0.59625983]], dtype=float32)>

In [42]:
rating_scaler.inverse_transform(ratings)

array([[8.18147972],
       [6.93125094],
       [6.82658651]])

In [43]:
(rating_df[rating_df['anime_id'].isin([21,48,320])])[rating_df['user_id']==1]

  (rating_df[rating_df['anime_id'].isin([21,48,320])])[rating_df['user_id']==1]


Unnamed: 0,user_id,Username,anime_id,Anime Title,rating
0,1,Xinil,21,One Piece,9
1,1,Xinil,48,.hack//Sign,7
2,1,Xinil,320,A Kite,5
