In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import warnings
warnings.filterwarnings(action='ignore')
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dot, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from wordcloud import WordCloud
from collections import defaultdict
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [3]:
df =pd.read_csv('anime-dataset-2023.csv')
df_score =pd.read_csv('users-score-2023.csv')
df_user =pd.read_csv('users-details-2023.csv')


In [4]:
scores = (df['Score'][df['Score'] != 'UNKNOWN']).astype('float')
mean= round(scores.mean() , 2)
df['Score'] = df['Score'].replace('UNKNOWN', mean)
df['Score'] = df['Score'].astype('float')

In [5]:
df['Genres'] = df['Genres'].replace('UNKNOWN',np.nan)
df['Genres'].value_counts()

Genres
Comedy                                           2279
Fantasy                                          1341
Hentai                                           1181
Drama                                             624
Slice of Life                                     623
                                                 ... 
Avant Garde, Fantasy, Romance                       1
Action, Comedy, Romance, Supernatural, Hentai       1
Drama, Horror, Supernatural, Ecchi                  1
Adventure, Comedy, Romance, Sports                  1
Action, Adventure, Fantasy, Horror                  1
Name: count, Length: 1005, dtype: int64

In [6]:
df['Rank'] = df['Rank'].replace('UNKNOWN',0)
df['Rank'] = df['Rank'].astype('float')

In [7]:
filtered_popularity = df[df['Popularity'] > 0]

top_10_popular = filtered_popularity.sort_values(by='Popularity').head(10)

fig = px.bar(top_10_popular, x='Name', y='Popularity',
             labels={'Name': 'Anime Title', 'Popularity': 'Popularity'},
             title='Top 10 Most Popular Animes',
             color='Name')

fig.show()

In [8]:
top_10_scored = df.sort_values(by='Members', ascending=False).head(10)

fig = px.bar(top_10_scored, x='Name', y='Members',
             labels={'Members':'Number of Users', 'Name':'Anime Title'},
             color='Name',
             title='Top 10 Animes by Number of Users')

fig.show()

In [9]:
df['Genres'] = df['Genres'].fillna('').astype(str)

genre_counts = df[df['Genres'] != "UNKNOWN"]['Genres'].apply(lambda x: x.split(', ')).explode().value_counts()

# Create a bar chart
fig = px.bar(genre_counts, x=genre_counts.index, y=genre_counts.values,
             labels={'x':'Genre', 'y':'Count'},
             title='Count of Anime Titles by Genre',
             color=genre_counts.index)

fig.show()

In [10]:
import plotly.graph_objects as go
correlation_matrix = df[['Score', 'Popularity', 'Rank']].corr()

fig = go.Figure(data=go.Heatmap(
        z=correlation_matrix.values,
        x=correlation_matrix.columns,
        y=correlation_matrix.columns,
        colorscale='RdBu',
        colorbar=dict(title='Correlation')
))

fig.update_layout(
    title='Correlation Heatmap of Score, Popularity, and Rank',
    xaxis=dict(title='Features'),
    yaxis=dict(title='Features')
)

fig.show()

In [11]:
df_score= df_score[["user_id","anime_id","rating"]]
df_score.head()

Unnamed: 0,user_id,anime_id,rating
0,1,21,9
1,1,48,7
2,1,320,5
3,1,49,8
4,1,304,8


In [13]:
min_rating = min(df_score['rating'])
max_rating = max(df_score['rating'])
df_score['rating'] = df_score["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

AvgRating = np.mean(df_score['rating'])
print('Avg', AvgRating)

Avg 0.7208891076033028


In [14]:
user_encoder = LabelEncoder()
df_score["user_encoded"] = user_encoder.fit_transform(df_score["user_id"])
num_users = len(user_encoder.classes_)

anime_encoder = LabelEncoder()
df_score["anime_encoded"] = anime_encoder.fit_transform(df_score["anime_id"])
num_animes = len(anime_encoder.classes_)

print(f"{num_users} users, {num_animes} anime ")


12628 users, 12319 anime 


In [15]:
df = shuffle(df, random_state=100)

X = df_score[['user_encoded', 'anime_encoded']].values
y = df_score["rating"].values

print( X.shape)
print( y.shape)

(1459031, 2)
(1459031,)


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000, random_state=42)

In [17]:
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_test_array = [X_test[:, 0], X_test[:, 1]]

In [18]:
def RecommenderNet(num_users, num_animes, embedding_size=128):
    # User input layer and embedding layer
    user = Input(name='user_encoded', shape=[1])
    user_embedding = Embedding(name='user_embedding', input_dim=num_users, output_dim=embedding_size)(user)

    # Anime input layer and embedding layer
    anime = Input(name='anime_encoded', shape=[1])
    anime_embedding = Embedding(name='anime_embedding', input_dim=num_animes, output_dim=embedding_size)(anime)

    # Dot product of user and anime embeddings
    dot_product = Dot(name='dot_product', normalize=True, axes=2)([user_embedding, anime_embedding])
    flattened = Flatten()(dot_product)

    # Dense layers for prediction
    dense = Dense(16, activation='relu')(flattened)
    output = Dense(1, activation='sigmoid')(dense)

    # Create and compile the model
    model = Model(inputs=[user, anime], outputs=output)
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=["mae", "mse"])

    return model
model = RecommenderNet(num_users, num_animes)

# Printing my model summary
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_encoded (InputLayer)   [(None, 1)]                  0         []                            
                                                                                                  
 anime_encoded (InputLayer)  [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 128)               1616384   ['user_encoded[0][0]']        
                                                                                                  
 anime_embedding (Embedding  (None, 1, 128)               1576832   ['anime_encoded[0][0]']       
 )                                                                                            

In [19]:
## Import necessary callbacks
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping

# Define the initial learning rate, minimum learning rate, maximum learning rate, and batch size
start_lr = 0.00001
min_lr = 0.00001
max_lr = 0.00005
batch_size = 10000


# Define the number of epochs for ramp-up, sustain, and exponential decay
rampup_epochs = 5
sustain_epochs = 0
exp_decay = .8

# Learning rate schedule function
def lrfn(epoch):
    if epoch < rampup_epochs:
        return (max_lr - start_lr) / rampup_epochs * epoch + start_lr
    elif epoch < rampup_epochs + sustain_epochs:
        return max_lr
    else:
        return (max_lr - min_lr) * exp_decay**(epoch - rampup_epochs - sustain_epochs) + min_lr

# Learning rate scheduler callback
lr_callback = LearningRateScheduler(lambda epoch: lrfn(epoch), verbose=0)

# File path for saving the model weights
checkpoint_filepath = 'C:/Users/about/Downloads/animemyanimeweights.h5'

# Model checkpoint callback to save the best weights
model_checkpoints = ModelCheckpoint(filepath=checkpoint_filepath,
                                    save_weights_only=True,
                                    monitor='val_loss',
                                    mode='min',
                                    save_best_only=True)

# Early stopping callback to prevent overfitting
early_stopping = EarlyStopping(patience=3, monitor='val_loss', mode='min', restore_best_weights=True)

# Define the list of callbacks
my_callbacks = [
    model_checkpoints,
    lr_callback,
    early_stopping
]

In [20]:
history = model.fit(
    x=X_train_array,
    y=y_train,
    batch_size=batch_size,
    epochs=20,
    verbose=1,
    validation_data=(X_test_array, y_test),
    callbacks=my_callbacks
)

model.load_weights(checkpoint_filepath)



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [21]:
def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis=1).reshape((-1, 1))

    return weights


anime_weights = extract_weights('anime_embedding', model)
user_weights = extract_weights('user_embedding', model)

In [24]:
def find_similar_animes(name, n=10, return_dist=False, neg=False):
    try:
        anime_row = df[df['Name'] == name].iloc[0]
        index = anime_row['anime_id']
        encoded_index = anime_encoder.transform([index])[0]
        weights = anime_weights
        dists = np.dot(weights, weights[encoded_index])
        sorted_dists = np.argsort(dists)
        n = n + 1
        if neg:
            closest = sorted_dists[:n]
        else:
            closest = sorted_dists[-n:]
        print('Animes closest to {}'.format(name))
        if return_dist:
            return dists, closest
        SimilarityArr = []

        for close in closest:
            decoded_id = anime_encoder.inverse_transform([close])[0]
            anime_frame = df[df['anime_id'] == decoded_id]

            anime_name = anime_frame['Name'].values[0]
            english_name = anime_frame['English name'].values[0]
            name = english_name if english_name != "UNKNOWN" else anime_name
            genre = anime_frame['Genres'].values[0]
            Synopsis = anime_frame['Synopsis'].values[0]
            similarity = dists[close]
            similarity = "{:.2f}%".format(similarity * 100)
            SimilarityArr.append({"Name": name, "Similarity": similarity, "Genres": genre, "Synopsis":Synopsis})
            Frame = pd.DataFrame(SimilarityArr).sort_values(by="Similarity", ascending=False)
        return Frame[Frame.Name != name]
    except:
        print('{} not found in Anime list'.format(name))

pd.set_option('display.max_colwidth', None)

In [25]:
find_similar_animes('One Punch Man', n=5, neg=False)

Animes closest to One Punch Man


Unnamed: 0,Name,Similarity,Genres,Synopsis
4,Wolf's Rain,43.11%,"Action, Adventure, Drama, Fantasy, Mystery, Sci-Fi","In a dying world, there exists an ancient legend: when the world ends, the gateway to paradise will be opened. This utopia is the sole salvation for the remnants of life in this barren land, but the legend also dictates that only wolves can find their way to this mythical realm. Though long thought to be extinct, wolves still exist and live amongst humans, disguising themselves through elaborate illusions. \n\nA lone wolf named Kiba finds himself drawn by an intoxicating scent to Freeze City, an impoverished town under the rule of the callous Lord Orkham. Here, Kiba discovers that wolves Hige, Tsume, and Toboe have been drawn in by the same aroma. By following the fragrance of ""Lunar Flowers,"" said to be the key to opening the door to their ideal world, the wolves set off on a journey across desolate landscapes and crumbling cities to find their legendary promised land. However, they are not the only ones seeking paradise, and those with more sinister intentions will do anything in their power to reach it first."
3,Berserk: The Golden Age Arc I - The Egg of the King,41.34%,"Action, Adventure, Drama, Fantasy, Horror","In the Kingdom of Midland, a mercenary named Guts wanders the land, preferring a life of conflict over a life of peace. Despite the odds never being in his favor, he is an unstoppable force that overcomes every opponent, wielding a massive sword larger than himself.\n\nOne day, Griffith, the mysterious leader of the mercenary group Band of the Hawk, witnesses the warrior's battle prowess and invites the wandering swordsman to join his squadron. Rejecting the offer, Guts challenges Griffith to a duel—and, much to the former's surprise, is subsequently defeated and forced to join.\n\nNow, Guts must fight alongside Griffith and his crew to help Midland defeat the Empire of Chuder. However, Griffith seems to harbor ulterior motives, desiring something much larger than just settling the war..."
2,Hajime no Ippo: Mashiba vs. Kimura,41.27%,Sports,"Tatsuya Kimura is beginning to feel left behind. With his friends Ippo Makunouchi and Mamoru Takamura holding the championship belt in their respective weight classes, Kimura's inability to become Japan's Junior Lightweight boxing champion eats away at him. Scheduled to fight with the current champion Ryo Mashiba, Kimura begins to train with Ippo's rival Ichirou Miyata, who previously suffered a crushing defeat from Mashiba's signature ""Hitman"" style of boxing.\n\nBut Mashiba has his own desire to hold onto the belt, hoping to soon shoot for the World title and secure financial stability for his sister Kumi. Will Kimura's overwhelming drive to prove his strength as a boxer overcome Mashiba's desperate wish to provide a better life for Kumi?"
1,Vinland Saga,40.28%,"Action, Adventure, Drama","Young Thorfinn grew up listening to the stories of old sailors that had traveled the ocean and reached the place of legend, Vinland. It's said to be warm and fertile, a place where there would be no need for fighting—not at all like the frozen village in Iceland where he was born, and certainly not like his current life as a mercenary. War is his home now. Though his father once told him, ""You have no enemies, nobody does. There is nobody who it's okay to hurt,"" as he grew, Thorfinn knew that nothing was further from the truth.\n\nThe war between England and the Danes grows worse with each passing year. Death has become commonplace, and the viking mercenaries are loving every moment of it. Allying with either side will cause a massive swing in the balance of power, and the vikings are happy to make names for themselves and take any spoils they earn along the way. Among the chaos, Thorfinn must take his revenge and kill Askeladd, the man who murdered his father. The only paradise for the vikings, it seems, is the era of war and death that rages on."
0,Darker than Black,40.11%,"Action, Mystery, Sci-Fi","It has been 10 years since Heaven's Gate appeared in South America and Hell's Gate appeared in Japan, veiling the once familiar night sky with an oppressive skyscape. Their purposes unknown, these Gates are spaces in which the very laws of physics are ignored. With the appearance of the Gates emerged Contractors, who, in exchange for their humanity, are granted supernatural abilities.\n\nIn the Japanese city surrounding Hell’s Gate, Section 4 Chief Misaki Kirihara finds herself at odds with an infamous Contractor codenamed Hei. Called ""Black Reaper"" in the underground world, Hei, like his associates, undertakes missions for the mysterious and ruthless Syndicate while slowly peeling back the dark layers covering a nefarious plot that threatens the very existence of Contractors.\n\nFrom the mind of Tensai Okamura comes a sci-fi thriller taking the form of a subtle exposé on a war in which political positions and justice have no sway—a war waged exclusively in the shadows."


In [28]:
find_similar_animes('Shigatsu wa Kimi no Uso', n=5, neg=False)

Animes closest to Shigatsu wa Kimi no Uso


Unnamed: 0,Name,Similarity,Genres,Synopsis
4,Mobile Suit Gundam 0083: Stardust Memory Picture Drama - The Mayfly of Space 2,37.46%,Action,"Picture drama included in the Blu-ray release of 0083.\n\nUniversal Century 0083. Inside the Zanzibar-class mobile cruiser Lili Marleen, Cima Garahau, the former Zeon M.A.U. (Marine Amphibious Unit) Commander, was battling her recurring nightmare of being abandoned by Zeon military during One Year War and drifting in Space in the past 4 years. This picture drama also recounts Cima's past and present encounter with Anavel Gato, as well as introducing a new character, the Zeon fugitive Geil Hunt.\n\n(Source: Gundam Wiki)"
3,Cheonnyeon-yeowoo Yeowoobi,34.26%,"Drama, Fantasy, Sci-Fi","Yobi, The Five Tailed Fox revolves around a kumiho, a nine-tailed fox that can assume different forms including human. Kumiho are familiar figures in Korean folk tales, and they are usually depicted as magical creatures that take female form to lure and prey on humans. This time, however, our protagonist is a young, kind-hearted fox with only five tails, instead of nine.\n\nAfter losing her family to fox hunters, five-tailed Yobi lives in the forest with some shipwrecked aliens, far away from the humans. When one of her alien friends gets captured by a villager, Yobi has no choice but to adventure into the human world to rescue him. At the village, Yobi meets many humans, including Geum Yee who studies at a school for maladjusted children. Interested in Geum Yee, Yobi joins the students and revels in the fun of human life, but both a fox hunter and a mysterious shadow man are on her trail.\n\n(Source: YesAsia)"
2,Candy Boy: Nonchalant Talk of the Certain Twin Sisters in Daily Life,33.95%,"Girls Love, Romance","Yukino and Kanade Sakurai are twin sisters that live together in a high school dorm. Although they are twins, they tend to act romantic toward each other, holding hands and going on dates. One day, Kanade sees Yukino hanging out with a new girl, Sakuya Kamiyama. Realizing how much time these two spend together, Kanade assumes they are dating, and she begins to distance herself from her sister. Yukino promptly notices and pleads with Kanade not to leave. In reality, Sakuya has only been asking for advice about Kanade, revealing her obsessive love. How will this love triangle play out?"
1,Renkin San-kyuu Magical? Pokaan Specials,33.72%,"Comedy, Ecchi",DVD specials.
0,Rozen Maiden,33.05%,"Action, Comedy, Drama","Traumatized by school, Jun Sakurada spends his days at home as a shut-in, purchasing things online, only to send them back before the free trial period ends. So when a note appears on his desk, asking whether or not he would wind something, he assumes it was something he ordered and carelessly circles ""yes,"" changing his life forever.\n \nA box arrives with a wind up doll inside, but this is no ordinary toy: after Jun winds her up, she begins walking and talking as if a normal person. With a haughty attitude, she introduces herself as Shinku, the fifth doll in the Rozen Maiden collection, a group of special dolls made by the legendary dollmaker Rozen. These sisters must battle each other in a competition called the Alice Game with the help of a human to ensure victory. The winner becomes Alice, a real girl who is worthy of meeting their creator.\n\nAs more sentient dolls end up taking residence in Jun's house, and a foe from Shinku's past makes her appearance, Jun's life becomes far more complicated than he ever thought possible."
