In [39]:
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation,BatchNormalization,Input,Embedding,Dot,Dense,Flatten
from tensorflow.keras.callbacks import ModelCheckpoint,LearningRateScheduler,TensorBoard,EarlyStopping

# from wordcloud import WordCloud
# %matplotlib inline

### READING ANIMELIST.CSV

In [40]:
import os

In [41]:
INPUT_DIR = os.path.join("..","artifacts","raw")

In [42]:
rating_df = pd.read_csv(INPUT_DIR+"/animelist.csv" , low_memory=True,usecols=["user_id","anime_id","rating"])

In [43]:
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,0,67,9
1,0,6702,7
2,0,242,10
3,0,4898,0
4,0,21,10


In [44]:
len(rating_df)

9000000

#### DATA PROCESSING

In [45]:
n_ratings = rating_df["user_id"].value_counts()

In [46]:
n_ratings[n_ratings>200].index

Index([20807, 11100, 22022, 16869, 10255,  4773, 28205,  6852, 18355, 26654,
       ...
        3667, 16843,  3893, 26483, 25134, 16540,  8834, 13287, 16905, 18756],
      dtype='int64', name='user_id', length=14480)

In [47]:


rating_df = rating_df[rating_df["user_id"].isin(n_ratings[n_ratings>=400].index)].copy()

In [48]:
user_ids = rating_df["user_id"].unique().tolist()
user_ids


[2,
 6,
 12,
 16,
 17,
 19,
 21,
 41,
 42,
 44,
 47,
 53,
 55,
 60,
 66,
 73,
 74,
 85,
 89,
 90,
 94,
 98,
 102,
 108,
 111,
 112,
 120,
 121,
 122,
 135,
 145,
 146,
 147,
 153,
 155,
 156,
 172,
 174,
 184,
 190,
 193,
 194,
 198,
 204,
 205,
 209,
 214,
 219,
 222,
 227,
 228,
 235,
 238,
 240,
 243,
 248,
 251,
 252,
 257,
 264,
 267,
 272,
 274,
 275,
 284,
 285,
 286,
 290,
 291,
 293,
 300,
 301,
 306,
 308,
 310,
 313,
 314,
 316,
 320,
 321,
 324,
 325,
 326,
 327,
 330,
 336,
 340,
 345,
 346,
 349,
 350,
 366,
 367,
 371,
 372,
 375,
 381,
 382,
 386,
 389,
 398,
 405,
 406,
 413,
 414,
 418,
 423,
 426,
 428,
 431,
 432,
 436,
 437,
 438,
 440,
 442,
 444,
 445,
 446,
 455,
 457,
 459,
 464,
 467,
 468,
 469,
 478,
 481,
 483,
 484,
 486,
 493,
 498,
 500,
 516,
 517,
 519,
 524,
 526,
 529,
 531,
 538,
 542,
 547,
 549,
 559,
 563,
 564,
 566,
 569,
 571,
 577,
 590,
 593,
 596,
 601,
 603,
 608,
 613,
 614,
 617,
 620,
 629,
 631,
 639,
 642,
 643,
 644,
 649,
 652,
 655

In [49]:
user2user_encoded = {x : i for i , x in enumerate(user_ids)}
user2user_encoded


{2: 0,
 6: 1,
 12: 2,
 16: 3,
 17: 4,
 19: 5,
 21: 6,
 41: 7,
 42: 8,
 44: 9,
 47: 10,
 53: 11,
 55: 12,
 60: 13,
 66: 14,
 73: 15,
 74: 16,
 85: 17,
 89: 18,
 90: 19,
 94: 20,
 98: 21,
 102: 22,
 108: 23,
 111: 24,
 112: 25,
 120: 26,
 121: 27,
 122: 28,
 135: 29,
 145: 30,
 146: 31,
 147: 32,
 153: 33,
 155: 34,
 156: 35,
 172: 36,
 174: 37,
 184: 38,
 190: 39,
 193: 40,
 194: 41,
 198: 42,
 204: 43,
 205: 44,
 209: 45,
 214: 46,
 219: 47,
 222: 48,
 227: 49,
 228: 50,
 235: 51,
 238: 52,
 240: 53,
 243: 54,
 248: 55,
 251: 56,
 252: 57,
 257: 58,
 264: 59,
 267: 60,
 272: 61,
 274: 62,
 275: 63,
 284: 64,
 285: 65,
 286: 66,
 290: 67,
 291: 68,
 293: 69,
 300: 70,
 301: 71,
 306: 72,
 308: 73,
 310: 74,
 313: 75,
 314: 76,
 316: 77,
 320: 78,
 321: 79,
 324: 80,
 325: 81,
 326: 82,
 327: 83,
 330: 84,
 336: 85,
 340: 86,
 345: 87,
 346: 88,
 349: 89,
 350: 90,
 366: 91,
 367: 92,
 371: 93,
 372: 94,
 375: 95,
 381: 96,
 382: 97,
 386: 98,
 389: 99,
 398: 100,
 405: 101,
 406: 102,
 

In [50]:
user2user_decoded = {i : x for i , x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)


In [51]:
rating_df.sample(100)

Unnamed: 0,user_id,anime_id,rating,user
6470556,21200,514,9,5411
6526106,21377,30806,0,5462
5868831,19246,37569,7,4923
2141986,7215,32866,6,1815
6802820,22310,6880,5,5680
...,...,...,...,...
8850650,28902,37980,5,7400
5601822,18369,39326,7,4683
4167299,13746,24,0,3502
8023720,26188,1195,7,6706


In [52]:
len(rating_df)

5866296

In [53]:
min_rating =min(rating_df["rating"])

In [54]:
max_rating =max(rating_df["rating"])

In [55]:
max_rating

10

In [56]:
avg_rating =np.mean(rating_df["rating"])

In [57]:
avg_rating

np.float64(4.096097435247045)

In [58]:
rating_df["rating"] = rating_df["rating"].apply(lambda x: (x-min_rating)/(max_rating-min_rating)).values.astype(np.float64)

In [59]:
rating_df.duplicated().sum()

np.int64(0)

In [60]:
rating_df.isnull().sum()

user_id     0
anime_id    0
rating      0
user        0
dtype: int64

In [61]:
user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x : i for i , x in enumerate(user_ids)}
user2user_decoded = {i : x for i , x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)


In [62]:
n_users = len(user2user_encoded)

In [63]:
n_users

7528

In [64]:
## 11054 : 12  -> user2user encoded
## 12 : 11054 ---> usser2userdecocded

In [65]:
anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x : i for i , x in enumerate(anime_ids)}
anime2anime_decoded = {i : x for i , x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)

In [66]:
n_anime = len(anime2anime_encoded)

In [67]:
n_anime

17554

In [68]:
rating_df = rating_df.sample(frac=1,random_state=43).reset_index(drop=True)

In [69]:
rating_df.head(2)

Unnamed: 0,user_id,anime_id,rating,user,anime
0,6624,23321,0.8,1658,1193
1,6359,12549,0.0,1603,1302


In [70]:
X = rating_df[["user","anime"]].values
y = rating_df["rating"]

In [71]:
test_size = 1000
train_indices = rating_df.shape[0] - test_size

In [72]:
X_train , X_test , y_train , y_test = (
    X[:train_indices],
    X[train_indices :],
    y[:train_indices],
    y[train_indices:],
)

In [73]:
len(X_train)

5865296

In [74]:
len(X_test)

1000

In [77]:
type(X_train)

numpy.ndarray

In [78]:
X_train_array = [X_train[: , 0] , X_train[: ,1]]
X_test_array = [X_test[: , 0] , X_test[: ,1]]

In [79]:
type(X_test_array)

list

In [80]:
type(X_test_array[0])

numpy.ndarray

#### MODEL ARCHITECTURE

In [81]:
def RecommenderNet():
    embedding_size =128

    user = Input(name="user",shape=[1])

    user_embedding = Embedding(name="user_embedding",input_dim=n_users,output_dim=embedding_size)(user)

    anime = Input(name="anime",shape=[1])

    anime_embedding = Embedding(name="anime_embedding",input_dim=n_anime,output_dim=embedding_size)(anime)

    x = Dot(name="dot_product" , normalize=True , axes=2)([user_embedding,anime_embedding])

    x = Flatten()(x)

    x = Dense(1,kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation("sigmoid")(x)

    model = Model(inputs=[user,anime], outputs=x)
    model.compile(loss="binary_crossentropy",metrics=["mae","mse"],optimizer='Adam')
    return model

In [82]:
model = RecommenderNet()

In [83]:
model.summary()

In [84]:
start_lr = 0.00001
min_lr = 0.0001
max_lr = 0.00005
batch_size = 10000

ramup_epochs = 5
sustain_epochs = 0
exp_decay = 0.8

def lrfn(epoch):
    if epoch<ramup_epochs:
        return (max_lr-start_lr)/ramup_epochs*epoch + start_lr
    elif epoch<ramup_epochs+sustain_epochs:
        return max_lr
    else:
        return (max_lr-min_lr) * exp_decay ** (epoch-ramup_epochs-sustain_epochs)+min_lr

In [85]:
lr_callback = LearningRateScheduler(lambda epoch:lrfn(epoch) , verbose=0)
checkpoint_filepath = './weights.weights.h5'

model_checkpoint = ModelCheckpoint(filepath=checkpoint_filepath,save_weights_only=True,monitor="val_loss",mode="min",save_best_only=True)

early_stopping = EarlyStopping(patience=3,monitor="val_loss",mode="min",restore_best_weights=True)

In [86]:
my_callbacks = [model_checkpoint,lr_callback,early_stopping]

In [87]:
history = model.fit(
    x=X_train_array,
    y=y_train,
    batch_size=batch_size,
    epochs=20,
    verbose=1,
    validation_data = (X_test_array,y_test),
    callbacks=my_callbacks
)

Epoch 1/20
[1m587/587[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 84ms/step - loss: 0.7901 - mae: 0.3806 - mse: 0.1943 - val_loss: 0.7657 - val_mae: 0.3798 - val_mse: 0.1872 - learning_rate: 1.0000e-05
Epoch 2/20
[1m587/587[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 84ms/step - loss: 0.7853 - mae: 0.3791 - mse: 0.1924 - val_loss: 0.7883 - val_mae: 0.3844 - val_mse: 0.1950 - learning_rate: 1.8000e-05
Epoch 3/20
[1m587/587[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 85ms/step - loss: 0.7772 - mae: 0.3763 - mse: 0.1892 - val_loss: 0.7834 - val_mae: 0.3831 - val_mse: 0.1931 - learning_rate: 2.6000e-05
Epoch 4/20
[1m587/587[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 85ms/step - loss: 0.7654 - mae: 0.3719 - mse: 0.1845 - val_loss: 0.7750 - val_mae: 0.3803 - val_mse: 0.1899 - learning_rate: 3.4000e-05


In [88]:
model.load_weights(checkpoint_filepath)

In [89]:
metrics = ["loss", "mae", "mse"]

# Create subplots
fig, axes = plt.subplots(len(metrics), 1, figsize=(8, len(metrics) * 4))

for i, metric in enumerate(metrics):
    ax = axes[i]
    ax.plot(history.history[metric][0:-2], marker="o", label=f"train {metric}")
    ax.plot(history.history[f"val_{metric}"][0:-2], marker="o", label=f"test {metric}")
    ax.set_title(f"Model {metric.capitalize()}")
    ax.set_ylabel(metric.capitalize())
    ax.set_xlabel("Epoch")
    ax.legend(loc="upper left")
    ax.grid(True)

plt.tight_layout()
plt.show()

NameError: name 'plt' is not defined

In [90]:
def extract_weights(name,model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights/np.linalg.norm(weights,axis=1).reshape((-1,1))
    return weights

In [91]:
anime_weights = extract_weights("anime_embedding",model)

In [92]:
user_weights = extract_weights("user_embedding",model)

#### READING ANIME.CSV

In [93]:
df = pd.read_csv(INPUT_DIR+"/anime.csv",low_memory=True)
df.head(2)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0


In [94]:
df = df.replace("Unknown",np.nan)

In [95]:
def getAnimeName(anime_id):
    try:
        name = df[df.anime_id == anime_id].eng_version.values[0]
        if name is np.nan:
            name = df[df.anime_id == anime_id].Name.values[0]
    except:
        print("Error")
    return name

In [96]:
df["anime_id"] = df["MAL_ID"]
df["eng_version"] = df["English name"]
df["eng_version"] = df.anime_id.apply(lambda x:getAnimeName(x))

In [97]:
getAnimeName(6702)

'Fairy Tail'

In [98]:
df.sort_values(by=["Score"],
               inplace=True,
               ascending=False,
               kind="quicksort",
               na_position="last")

In [99]:
df.columns

Index(['MAL_ID', 'Name', 'Score', 'Genres', 'English name', 'Japanese name',
       'Type', 'Episodes', 'Aired', 'Premiered', 'Producers', 'Licensors',
       'Studios', 'Source', 'Duration', 'Rating', 'Ranked', 'Popularity',
       'Members', 'Favorites', 'Watching', 'Completed', 'On-Hold', 'Dropped',
       'Plan to Watch', 'Score-10', 'Score-9', 'Score-8', 'Score-7', 'Score-6',
       'Score-5', 'Score-4', 'Score-3', 'Score-2', 'Score-1', 'anime_id',
       'eng_version'],
      dtype='object')

In [100]:
df = df[["anime_id" , "eng_version","Score","Genres","Episodes","Type","Premiered","Members"]]

In [101]:
df.head()

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
3971,5114,Fullmetal Alchemist:Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Ma...",64,TV,Spring 2009,2248456
15926,40028,Attack on Titan Final Season,9.17,"Action, Military, Mystery, Super Power, Drama,...",16,TV,Winter 2021,733260
5683,9253,Steins;Gate,9.11,"Thriller, Sci-Fi",24,TV,Spring 2011,1771162
14963,38524,Attack on Titan Season 3 Part 2,9.1,"Action, Drama, Fantasy, Military, Mystery, Sho...",10,TV,Spring 2019,1073626
9913,28977,Gintama Season 4,9.1,"Action, Comedy, Historical, Parody, Samurai, S...",51,TV,Spring 2015,404121


In [102]:
def getAnimeFrame(anime,df):
    if isinstance(anime,int):
        return df[df.anime_id == anime]
    if isinstance(anime,str):
        return df[df.eng_version == anime]
    

In [105]:
getAnimeFrame('Attack on Titan Final Season' , df)

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
15926,40028,Attack on Titan Final Season,9.17,"Action, Military, Mystery, Super Power, Drama,...",16,TV,Winter 2021,733260


In [106]:
getAnimeFrame('Attack on Titan Final Season' , df).anime_id.values[0]

np.int64(40028)

In [107]:
anime2anime_encoded.get(40028)

1666

In [108]:
weights = anime_weights


In [115]:
# Compute the similarity distances
dists = np.dot(weights, weights[1666])  # Ensure weights[encoded_index] is a 1D array
dists[17228]
    

np.float32(0.3113764)

In [110]:
sorted_dists = np.argsort(dists)
sorted_dists

array([10430, 12053, 12738, ..., 11201, 15462,  1666], shape=(17554,))

In [111]:
closest = sorted_dists[-5:]

In [112]:
closest

array([17228,  1532, 11201, 15462,  1666])

In [113]:
SimilarityArr = []
for close in closest:
    decoded_id = anime2anime_decoded.get(close)

    anime_frame = getAnimeFrame(decoded_id, df)
    print(anime_frame)
    anime_name = anime_frame.eng_version.values[0]
    print(anime_name)
    genre = anime_frame.Genres.values[0]
    similarity = dists[close]
    print(similarity)

    SimilarityArr.append({
            "anime_id": decoded_id,
            "name": anime_name,
            "similarity": similarity,
            "genre": genre,
    })

       anime_id eng_version Score    Genres Episodes   Type Premiered  Members
15185     38868   Blow Up 2   NaN  Dementia        1  Movie       NaN       70
Blow Up 2
0.3113764
       anime_id          eng_version Score                         Genres  \
14055     37105  Grand Blue Dreaming  8.41  Slice of Life, Comedy, Seinen   

      Episodes Type    Premiered  Members  
14055       12   TV  Summer 2018   421403  
Grand Blue Dreaming
0.316614
      anime_id                  eng_version Score  \
2104      2301  Tekkaman Blade:Missing Link  6.05   

                              Genres Episodes Type Premiered  Members  
2104  Action, Mecha, Sci-Fi, Shounen        1  OVA       NaN     1674  
Tekkaman Blade:Missing Link
0.31672096
      anime_id               eng_version Score Genres Episodes   Type  \
5697      9292  Kiki to Lala no Aoi Tori   NaN   Kids        1  Movie   

     Premiered  Members  
5697       NaN      329  
Kiki to Lala no Aoi Tori
0.35014546
       anime_id          

#### ANIME_WITH_SYNOPSIS.CSV 

In [116]:
cols = ["MAL_ID","Name","Genres","sypnopsis"]

In [117]:
synopsis_df = pd.read_csv(INPUT_DIR+"/anime_with_synopsis.csv",usecols=cols)

In [128]:
synopsis_df.head(1)

Unnamed: 0,MAL_ID,Name,Genres,sypnopsis
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as ""Cowboys."" The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member's dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebop is a space Western classic and an homage to the smooth and improvised music it is named after."


In [129]:
synopsis_df.columns

Index(['MAL_ID', 'Name', 'Genres', 'sypnopsis'], dtype='object')

In [130]:
def getSynopsis(anime,synopsis_df):
    if isinstance(anime,int):
        return synopsis_df[synopsis_df.MAL_ID == anime].sypnopsis.values[0]
    if isinstance(anime,str):
        return synopsis_df[synopsis_df.Name == anime].sypnopsis.values[0]
    

In [131]:
getSynopsis(40028 , synopsis_df)

"Gabi Braun and Falco Grice have been training their entire lives to inherit one of the seven titans under Marley's control and aid their nation in eradicating the Eldians on Paradis. However, just as all seems well for the two cadets, their peace is suddenly shaken by the arrival of Eren Yeager and the remaining members of the Survey Corps. Having finally reached the Yeager family basement and learned about the dark history surrounding the titans, the Survey Corps has at long last found the answer they so desperately fought to uncover. With the truth now in their hands, the group set out for the world beyond the walls. In Shingeki no Kyojin: The Final Season , two utterly different worlds collide as each party pursues its own agenda in the long-awaited conclusion to Paradis' fight for freedom."

In [132]:
getSynopsis("Steins;Gate",synopsis_df)

'The self-proclaimed mad scientist Rintarou Okabe rents out a room in a rickety old building in Akihabara, where he indulges himself in his hobby of inventing prospective "future gadgets" with fellow lab members: Mayuri Shiina, his air-headed childhood friend, and Hashida Itaru, a perverted hacker nicknamed "Daru." The three pass the time by tinkering with their most promising contraption yet, a machine dubbed the "Phone Microwave," which performs the strange function of morphing bananas into piles of green gel. Though miraculous in itself, the phenomenon doesn\'t provide anything concrete in Okabe\'s search for a scientific breakthrough; that is, until the lab members are spurred into action by a string of mysterious happenings before stumbling upon an unexpected success—the Phone Microwave can send emails to the past, altering the flow of history. Adapted from the critically acclaimed visual novel by 5pb. and Nitroplus, Steins;Gate takes Okabe through the depths of scientific theory 

### CONTENT/ITEM BASED RECOMMENDATION

In [133]:
pd.set_option("max_colwidth",None)

In [134]:
def find_similar_animes(name, anime_weights, anime2anime_encoded, anime2anime_decoded, df, synopsis_df, n=10, return_dist=False, neg=False):
    # Get the anime_id for the given name
    index = getAnimeFrame(name, df).anime_id.values[0]
    encoded_index = anime2anime_encoded.get(index)

    if encoded_index is None:
        raise ValueError(f"Encoded index not found for anime ID: {index}")

    weights = anime_weights

    # Compute the similarity distances
    dists = np.dot(weights, weights[encoded_index])  # Ensure weights[encoded_index] is a 1D array
    sorted_dists = np.argsort(dists)

    n = n + 1

    # Select closest or farthest based on 'neg' flag
    if neg:
        closest = sorted_dists[:n]
    else:
        closest = sorted_dists[-n:]

    # Return distances and closest indices if requested
    if return_dist:
        return dists, closest

    # Build the similarity array
    SimilarityArr = []
    for close in closest:
        decoded_id = anime2anime_decoded.get(close)
       

       
        anime_frame = getAnimeFrame(decoded_id, df)

        anime_name = anime_frame.eng_version.values[0]
        genre = anime_frame.Genres.values[0]
        similarity = dists[close]
   

        SimilarityArr.append({
            "anime_id": decoded_id,
            "name": anime_name,
            "similarity": similarity,
            "genre": genre,
        })
       

    # Create a DataFrame with results and sort by similarity
    Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False)
    return Frame[Frame.anime_id != index].drop(['anime_id'], axis=1)


In [135]:
find_similar_animes(
    "Steins;Gate",
    anime_weights,
    anime2anime_encoded,
    anime2anime_decoded,
    df,
    synopsis_df,
)

Unnamed: 0,name,similarity,genre
9,Minna Tomodachi,0.345735,"Comedy, Fantasy, Kids"
8,Kimi ni Todoke:From Me To You 2,0.325021,"Slice of Life, Drama, Romance, School, Shoujo"
7,Yume Hakonda Randoseru,0.313089,"Music, Slice of Life, Kids"
6,Broots,0.310526,"Sci-Fi, Mecha"
5,Ojiichan no Tomato,0.307859,"Drama, Kids"
4,What Is Not Romance?,0.303482,Drama
3,Yume no Tochuu de,0.298758,"Kids, Music"
2,Luo Xiao Hei Zhan Ji: Fan Wai,0.296493,"Comedy, Music, Parody, School"
1,Lupin III Episode 0:The First Contact,0.292195,"Action, Adventure, Mystery, Comedy, Seinen"
0,Charamaru-kun to Dokumaru-kun,0.289313,"Kids, Super Power"


### USER BASED RECOMMENDATION

In [136]:
def find_similar_users(item_input , user_weights , user2user_encoded , user2user_decoded, n=10 , return_dist=False,neg=False):
    try:
        index=item_input
        encoded_index = user2user_encoded.get(index)

        weights = user_weights

        dists = np.dot(weights,weights[encoded_index])
        sorted_dists = np.argsort(dists)

        n=n+1

        if neg:
            closest = sorted_dists[:n]
        else:
            closest = sorted_dists[-n:]
            

        if return_dist:
            return dists,closest
        
        SimilarityArr = []

        for close in closest:
            similarity = dists[close]

            if isinstance(item_input,int):
                decoded_id = user2user_decoded.get(close)
                SimilarityArr.append({
                    "similar_users" : decoded_id,
                    "similarity" : similarity
                })
        similar_users = pd.DataFrame(SimilarityArr).sort_values(by="similarity",ascending=False)
        similar_users = similar_users[similar_users.similar_users != item_input]
        return similar_users
    except Exception as e:
        print("Error Occured",e)

        
        

In [137]:
find_similar_users(int(11880),user_weights,user2user_encoded,user2user_decoded)

Unnamed: 0,similar_users,similarity
9,18491,0.351586
8,10818,0.334305
7,10777,0.312984
6,20924,0.308878
5,18520,0.293058
4,20984,0.287794
3,27436,0.278564
2,16713,0.277292
1,5207,0.276962
0,13970,0.263403


In [138]:
def showWordCloud(all_genres):
    genres_cloud = WordCloud(width=700,height=400,background_color='white',colormap='gnuplot').generate_from_frequencies(all_genres)
    plt.figure(figsize=(10,8))
    plt.imshow(genres_cloud,interpolation="bilinear")
    plt.axis("off")
    plt.show()

In [139]:
from collections import defaultdict

In [140]:
df.head(1)

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
3971,5114,Fullmetal Alchemist:Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Magic, Fantasy, Shounen",64,TV,Spring 2009,2248456


In [141]:
def getFavGenre(frame , plot=False):
    frame.dropna(inplace=False)
    all_genres = defaultdict(int)

    genres_list = []
    for genres in frame["Genres"]:
        if isinstance(genres,str):
            for genre in genres.split(','):
                genres_list.append(genre)
                all_genres[genre.strip()] += 1

    if plot:
        showWordCloud(all_genres)
    
    return genres_list



In [142]:
rating_df.head(2)

Unnamed: 0,user_id,anime_id,rating,user,anime
0,6624,23321,0.8,1658,1193
1,6359,12549,0.0,1603,1302


In [143]:
def get_user_preferences(user_id , rating_df , df ,plot=False):

    animes_watched_by_user = rating_df[rating_df.user_id == user_id]

    user_rating_percentile = np.percentile(animes_watched_by_user.rating , 75)

    animes_watched_by_user = animes_watched_by_user[animes_watched_by_user.rating >= user_rating_percentile]

    top_animes_user = (
        animes_watched_by_user.sort_values(by="rating" , ascending=False).anime_id.values
    )

    anime_df_rows = df[df["anime_id"].isin(top_animes_user)]
    anime_df_rows = anime_df_rows[["eng_version","Genres"]]

    if plot:
        getFavGenre(anime_df_rows,plot)


    return anime_df_rows




In [144]:
get_user_preferences(11880 , rating_df, df , plot=True)

NameError: name 'WordCloud' is not defined

In [145]:
def get_user_recommendations(similar_users , user_pref ,df , synopsis_df, rating_df, n=10):

    recommended_animes = []
    anime_list = []

    for user_id in similar_users.similar_users.values:
        pref_list = get_user_preferences(int(user_id) , rating_df, df)

        pref_list = pref_list[~pref_list.eng_version.isin(user_pref.eng_version.values)]

        if not pref_list.empty:
            anime_list.append(pref_list.eng_version.values)

    if anime_list:
            anime_list = pd.DataFrame(anime_list)

            sorted_list = pd.DataFrame(pd.Series(anime_list.values.ravel()).value_counts()).head(n)

            for i,anime_name in enumerate(sorted_list.index):
                n_user_pref = sorted_list[sorted_list.index == anime_name].values[0][0]

                if isinstance(anime_name,str):
                    frame = getAnimeFrame(anime_name,df)
                    anime_id = frame.anime_id.values[0]
                    genre = frame.Genres.values[0]
                    synopsis = getSynopsis(int(anime_id),synopsis_df)

                    recommended_animes.append({
                        "n" : n_user_pref,
                        "anime_name" : anime_name,
                        "Genres" : genre,
                        "Synopsis": synopsis
                    })
    return pd.DataFrame(recommended_animes).head(n)
            



    

In [175]:
similar_users =find_similar_users(int(11880),user_weights,user2user_encoded,user2user_decoded)
print(similar_users)

   similar_users  similarity
9          18491    0.351586
8          10818    0.334305
7          10777    0.312984
6          20924    0.308878
5          18520    0.293058
4          20984    0.287794
3          27436    0.278564
2          16713    0.277292
1           5207    0.276962
0          13970    0.263403


In [176]:
user_pref = get_user_preferences(11880 , rating_df, df , plot=False)
print(user_pref)

                          eng_version  \
3971  Fullmetal Alchemist:Brotherhood   
9913                 Gintama Season 4   
6474                  Hunter x Hunter   
6006                 Gintama Season 2   
741     Legend of the Galactic Heroes   
...                               ...   
2575              Shootfighter Tekken   
8064                 Samurai Flamenco   
510                        Venus Wars   
6864                Saint Seiya Omega   
9796                Garo:Crimson Moon   

                                                                   Genres  
3971  Action, Military, Adventure, Comedy, Drama, Magic, Fantasy, Shounen  
9913         Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen  
6474                     Action, Adventure, Fantasy, Shounen, Super Power  
6006         Action, Sci-Fi, Comedy, Historical, Parody, Samurai, Shounen  
741                                        Military, Sci-Fi, Space, Drama  
...                                                

In [177]:
recommended_animes = []
anime_list = []

for user_id in similar_users.similar_users.values:
    pref_list = get_user_preferences(int(user_id) , rating_df, df)
    print(user_id)
    print(pref_list)

    pref_list = pref_list[~pref_list.eng_version.isin(user_pref.eng_version.values)]
    print(pref_list)


18491
                                                             eng_version  \
3971                                     Fullmetal Alchemist:Brotherhood   
5683                                                         Steins;Gate   
9886                                                      A Silent Voice   
12898                              March Comes In Like A Lion 2nd Season   
3537                                               Clannad ~After Story~   
...                                                                  ...   
1020                                           Pokemon:Jirachi Wishmaker   
12453                         Death March to the Parallel World Rhapsody   
14822                                                 Demon Lord, Retry!   
15705  That Time I Got Reincarnated as a Slime:Tales - Veldora's Journal   
11608                                                    Eromanga Sensei   

                                                                    Genres  
3971

In [178]:
if not pref_list.empty:
    anime_list.append(pref_list.eng_version.values)
    print(anime_list)



[array(['Steins;Gate', 'A Silent Voice', 'Your Name.', 'Spirited Away',
       'Kizumonogatari III: Reiketsu-hen', 'Your Lie in April',
       'Kaguya-sama:Love is War Season 2', 'Princess Mononoke',
       'Rascal Does Not Dream of a Dreaming Girl', "Howl's Moving Castle",
       'The Promised Neverland', 'Death Note',
       'Kizumonogatari Part 2:Nekketsu', 'Steins;Gate 0',
       'Saenai Heroine no Sodatekata Fine', 'Attack on Titan',
       'Owarimonogatari', 'Kaguya-sama:Love is War',
       'Weathering With You', 'Kizumonogatari Part 1:Tekketsu',
       'Rascal Does Not Dream of Bunny Girl Senpai', 'Shirobako',
       'Shelter', 'My Neighbor Totoro',
       "KonoSuba:God's Blessing on This Wonderful World! 2",
       'Re:ZERO -Starting Life in Another World-',
       'Steins;Gate: Kyoukaimenjou no Missing Link - Divide By Zero',
       'Hinamatsuri', 'No Game, No Life', 'Asobi Asobase:Workshop of Fun',
       "Girls' Last Tour", 'Nisemonogatari',
       "KonoSuba:God's Blessing 

In [179]:
anime_list = pd.DataFrame(anime_list)
sorted_list = pd.DataFrame(pd.Series(anime_list.values.ravel()).value_counts()).head(2)
print(sorted_list)
for i,anime_name in enumerate(sorted_list.index):
    print(anime_name)
    n_user_pref = sorted_list[sorted_list.index == anime_name].values[0][0]
    print(n_user_pref)
    if isinstance(anime_name,str):
        frame = getAnimeFrame(anime_name,df)
        anime_id = frame.anime_id.values[0]
        genre = frame.Genres.values[0]
        synopsis = getSynopsis(int(anime_id),synopsis_df)
        recommended_animes.append({
                        "n" : n_user_pref,
                        "anime_name" : anime_name,
                        "Genres" : genre,
                        "Synopsis": synopsis
            })
recommended_animes=pd.DataFrame(recommended_animes).head()

                count
Steins;Gate         1
A Silent Voice      1
Steins;Gate
1
A Silent Voice
1


In [180]:
recommended_animes

Unnamed: 0,n,anime_name,Genres,Synopsis
0,1,Steins;Gate,"Thriller, Sci-Fi","The self-proclaimed mad scientist Rintarou Okabe rents out a room in a rickety old building in Akihabara, where he indulges himself in his hobby of inventing prospective ""future gadgets"" with fellow lab members: Mayuri Shiina, his air-headed childhood friend, and Hashida Itaru, a perverted hacker nicknamed ""Daru."" The three pass the time by tinkering with their most promising contraption yet, a machine dubbed the ""Phone Microwave,"" which performs the strange function of morphing bananas into piles of green gel. Though miraculous in itself, the phenomenon doesn't provide anything concrete in Okabe's search for a scientific breakthrough; that is, until the lab members are spurred into action by a string of mysterious happenings before stumbling upon an unexpected success—the Phone Microwave can send emails to the past, altering the flow of history. Adapted from the critically acclaimed visual novel by 5pb. and Nitroplus, Steins;Gate takes Okabe through the depths of scientific theory and practicality. Forced across the diverging threads of past and present, Okabe must shoulder the burdens that come with holding the key to the realm of time."
1,1,A Silent Voice,"Drama, School, Shounen","s a wild youth, elementary school student Shouya Ishida sought to beat boredom in the cruelest ways. When the deaf Shouko Nishimiya transfers into his class, Shouya and the rest of his class thoughtlessly bully her for fun. However, when her mother notifies the school, he is singled out and blamed for everything done to her. With Shouko transferring out of the school, Shouya is left at the mercy of his classmates. He is heartlessly ostracized all throughout elementary and middle school, while teachers turn a blind eye. Now in his third year of high school, Shouya is still plagued by his wrongdoings as a young boy. Sincerely regretting his past actions, he sets out on a journey of redemption: to meet Shouko once more and make amends. Koe no Katachi tells the heartwarming tale of Shouya's reunion with Shouko and his honest attempts to redeem himself, all while being continually haunted by the shadows of his past."


In [181]:
similar_users =find_similar_users(int(11880),user_weights,user2user_encoded,user2user_decoded)

In [182]:
user_pref = get_user_preferences(11880 , rating_df, df , plot=False)

In [183]:
get_user_recommendations(similar_users,user_pref,df, synopsis_df,rating_df,n=2)

Unnamed: 0,n,anime_name,Genres,Synopsis
0,8,Death Note,"Mystery, Police, Psychological, Supernatural, Thriller, Shounen","shinigami, as a god of death, can kill any person—provided they see their victim's face and write their victim's name in a notebook called a Death Note. One day, Ryuk, bored by the shinigami lifestyle and interested in seeing how a human would use a Death Note, drops one into the human realm. High school student and prodigy Light Yagami stumbles upon the Death Note and—since he deplores the state of the world—tests the deadly notebook by writing a criminal's name in it. When the criminal dies immediately following his experiment with the Death Note, Light is greatly surprised and quickly recognizes how devastating the power that has fallen into his hands could be. With this divine capability, Light decides to extinguish all criminals in order to build a new world where crime does not exist and people worship him as a god. Police, however, quickly discover that a serial killer is targeting criminals and, consequently, try to apprehend the culprit. To do this, the Japanese investigators count on the assistance of the best detective in the world: a young and eccentric man known only by the name of L."
1,7,Attack on Titan,"Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen","Centuries ago, mankind was slaughtered to near extinction by monstrous humanoid creatures called titans, forcing humans to hide in fear behind enormous concentric walls. What makes these giants truly terrifying is that their taste for human flesh is not born out of hunger but what appears to be out of pleasure. To ensure their survival, the remnants of humanity began living within defensive barriers, resulting in one hundred years without a single titan encounter. However, that fragile calm is soon shattered when a colossal titan manages to breach the supposedly impregnable outer wall, reigniting the fight for survival against the man-eating abominations. After witnessing a horrific personal loss at the hands of the invading creatures, Eren Yeager dedicates his life to their eradication by enlisting into the Survey Corps, an elite military unit that combats the merciless humanoids outside the protection of the walls. Based on Hajime Isayama's award-winning manga, Shingeki no Kyojin follows Eren, along with his adopted sister Mikasa Ackerman and his childhood friend Armin Arlert, as they join the brutal war against the titans and race to discover a way of defeating them before the last walls are breached."


### HYBRID RECOMMENDER SYSTEM

In [319]:
def hybrid_recommendation(user_id , user_weight=0.5, content_weight =0.5):

    ## User Recommndation

    similar_users =find_similar_users(user_id,user_weights,user2user_encoded,user2user_decoded)
    user_pref = get_user_preferences(user_id , rating_df, df)
    user_recommended_animes =get_user_recommendations(similar_users,user_pref,df, synopsis_df,rating_df)
    

    user_recommended_anime_list = user_recommended_animes["anime_name"].tolist()
    print(user_recommended_anime_list)

    #### Content recommendation
    content_recommended_animes = []

    for anime in user_recommended_anime_list:
        similar_animes = find_similar_animes(anime, anime_weights, anime2anime_encoded, anime2anime_decoded, df, synopsis_df)

        if similar_animes is not None and not similar_animes.empty:
            content_recommended_animes.extend(similar_animes["name"].tolist())
        else:
            print(f"No similar anime found {anime}")
    
    combined_scores = {}

    for anime in user_recommended_anime_list:
        combined_scores[anime] = combined_scores.get(anime,0) + user_weight

    for anime in content_recommended_animes:
        combined_scores[anime] = combined_scores.get(anime,0) + content_weight  

    sorted_animes = sorted(combined_scores.items() , key=lambda x:x[1] , reverse=True)

    return [anime for anime , score in sorted_animes[:10]] 



In [320]:
hybrid_recommendation(11880)

['Death Note', 'Angel Beats!', 'Clannad', 'The Girl Who Leapt Through Time', 'Code Geass:Lelouch of the Rebellion R2', 'Clannad ~After Story~', 'Hotarubi no Mori e', 'Nisemonogatari', 'No Game No Life: Zero', 'A Silent Voice']


['Death Note',
 'Angel Beats!',
 'Clannad',
 'The Girl Who Leapt Through Time',
 'Code Geass:Lelouch of the Rebellion R2',
 'Clannad ~After Story~',
 'Hotarubi no Mori e',
 'Nisemonogatari',
 'No Game No Life: Zero',
 'A Silent Voice']