# Check recommendation model

In [7]:
from website.embedding_model import RecommenderModel, Model, InitializedModel
from tensorflow import keras
import tensorflow as tf
import joblib
import numpy as np
import pandas as pd

In [8]:
%%time
init_model = InitializedModel()


[INFO] Calling model to load layers...


[INFO] Loaded weights.


[INFO] Model instance is initialized!

Wall time: 379 ms


In [13]:
%%time
anime_row, rec_df = init_model.get_recommendation(34572)
rec_df.head()

Recommending animes for Black Clover
Wall time: 31 ms


Unnamed: 0,MAL_ID,Name,img_url,Score,new_synopsis,Genres
0,34566,Boruto: Naruto Next Generations,https://cdn.myanimelist.net/images/anime/9/844...,5.81,Following the successful end of the Fourth Shi...,"Action, Adventure, Super Power, Martial Arts, ..."
1,37885,Super Dragon Ball Heroes,https://cdn.myanimelist.net/images/anime/1480/...,5.29,"In May 2018, V-Jump announced a promotional an...","Action, Comedy, Super Power, Martial Arts, Fan..."
2,40748,Jujutsu Kaisen (TV),https://cdn.myanimelist.net/images/anime/1171/...,8.54,Idly indulging in baseless paranormal activiti...,"Action, Demons, Supernatural, School, Shounen"
3,36516,Beatless,https://cdn.myanimelist.net/images/anime/1986/...,6.2,"With great advancements in technology, mankind...","Action, Drama, Romance, Sci-Fi"
4,21,One Piece,https://cdn.myanimelist.net/images/anime/6/732...,8.52,"Gol D. Roger was known as the ""Pirate King,"" t...","Action, Adventure, Comedy, Super Power, Drama,..."


# Understanding the weights of embeddings

In [20]:
ENCODERS_DICT = joblib.load('assets/encoder_dicts.joblib')
anime_id_to_idx = ENCODERS_DICT['anime_id_to_idx']
anime_idx_to_id = ENCODERS_DICT['anime_idx_to_id']
user_id_to_idx = ENCODERS_DICT['user_id_to_idx']

In [48]:
anime_df = pd.read_csv('data/full_anime_info.csv')
anime_df.head()

Unnamed: 0,MAL_ID,Name,English name,Score,Genres,Type,Episodes,Aired,Popularity,new_synopsis,img_url
0,20707,"""0""",Unknown,4.71,Music,Music,1,"Oct 23, 2013",7736,This music video tells how a shy girl with a secret love and curiosity about what surrounds her works.,https://cdn.myanimelist.net/images/anime/6/54815.jpg
1,25627,"""Aesop"" no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu",Unknown,,Kids,Movie,1,"Mar 21, 1970",13082,Based on Aesop's Fables.,https://cdn.myanimelist.net/images/anime/3/65151.jpg
2,7669,"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",Unknown,6.91,"Comedy, Fantasy, School",OVA,1,"Dec 26, 2009",3876,"Short episode bundled with the limited edition release of ""Bungaku Shoujo"" Minarai no, Shoushin .",https://cdn.myanimelist.net/images/anime/2/79900.jpg
3,8481,"""Bungaku Shoujo"" Memoire",Unknown,7.34,"Drama, Romance, School",OVA,3,"Jun 25, 2010 to Dec 24, 2010",3259,"Episodes which depict the background stories of ""Bungaku Shoujo"" Movie 's main heroines.",https://cdn.myanimelist.net/images/anime/6/26770.jpg
4,6408,"""Bungaku Shoujo"" Movie",Unknown,7.41,"Mystery, Drama, Romance, School",Movie,1,"May 1, 2010",2049,"The protagonist of the story, Konoha Inoue, is a seemingly normal senior high 2nd year student. His high school life, other than a hinted incident 2 years ago, can be summed up as normal- if one can dismiss the secret fact that he used to be a female bestselling romance author. Due to that incident, however, he has now vowed never to write again. This continued on until he was forced to join the literary club by the literary club president, the 3rd year female student Amano Tooko, a beautiful girl who has a taste for eating literary works. Now he has been tasked with writing her snack every day after school. (Source: To Say Nothing of the Dog)",https://cdn.myanimelist.net/images/anime/8/81162.jpg


In [41]:
pd.set_option("max_colwidth", None)

all_anime_types = set(anime_df['Type'].unique())

def check_anime_types(types):
    types = set([types]) if isinstance(types, str) else set(types)
    if types.issubset(all_anime_types):
        return
    else:
        for anime_type in types:
            if anime_type not in all_anime_types:
                raise Exception(f'Anime type "{anime_type}" is not valid!')

def get_anime_rows(df, anime_query, exact_name=False, types=None):
    df = df.copy()
    if isinstance(anime_query, int):
        df = df[df.MAL_ID == anime_query]
    else:
        if exact_name:
            # get exact name
            df = df[df.Name == anime_query]
        else:
            df = df[df.Name.str.contains(anime_query, case=False, regex=False)]
        
    if types:
        check_anime_types(types)
        df = df[df.Type.isin(types)]
        
    return df

In [14]:
# recommended embedding sizes by fastai: (600, 363)
# reduced a lot here to make it simpler
USER_EMB_SIZE = 128
ANIME_EMB_SIZE = 128
EMBEDDING_SIZES = (USER_EMB_SIZE, ANIME_EMB_SIZE)
n_users, n_animes = 145311, 17562

K = keras.backend

def get_model():
    print("[INFO] Using Subclassing API Model")

    # Resets all state generated by Keras to clear all models
    K.clear_session()
    
    model = RecommenderModel(n_users, n_animes, EMBEDDING_SIZES)

#     optimizer = Adam(lr=MAX_LR)
#     optimizer = Adam(lr=MAX_LR, decay=MAX_LR / EPOCHS)
    model.compile(loss='binary_crossentropy', metrics=['mae', 'mse'], optimizer='adam')
    
    return model

In [15]:
WEIGHTS_PATH = 'assets/weights.h5'

model = get_model()
print('Calling model to load layers...')
_ = model(tf.ones((1, 2)))
model.load_weights(WEIGHTS_PATH)
print('Loaded weights.')

[INFO] Using Subclassing API Model
Calling model to load layers...
Loaded weights.


## Inspecting weights and similarity

In [16]:
weight_layer = model.get_layer('anime_embedding')
weights = weight_layer.get_weights()[0]

In [17]:
weights.shape

(17562, 128)

In [18]:
weights.mean()

-0.0003077352

In [132]:
# because Dot layer was using normalize=True..?
weights_norm = weights / np.linalg.norm(weights, axis=1, keepdims=True)
weights_norm.shape

(17562, 128)

According to the documentation, `normalize=True` would L2-normalize samples along the dot product axis before taking the dot product. <br>
And if set to True, the output of the dot product is the **cosine proximity** between the two samples, <br>
which makes it easier to compare similar things (Animes in this case).

Formula used when `normalize=True`: <br>
output = x / sqrt(max(sum(x**2), epsilon))

In [135]:
x = weights
epsilon = 1e-12
norm = np.sqrt(np.sum(x**2, axis=1, keepdims=True))
output = x / norm
output.shape

(17562, 128)

In [139]:
# normalize=True in Dot layer will do this before performing the dot product.
weights_norm_tf = tf.math.l2_normalize(weights, axis=1).numpy()
weights_norm_tf.shape

(17562, 128)

In [138]:
# yes they are the same
weights_norm[0].mean(), weights_norm_tf[0].mean(), output[0].mean()

(0.011004937, 0.011004936, 0.011004937)

In [140]:
anime_id = 34572
anime_idx = anime_id_to_idx.get(anime_id)

distances = np.dot(weights_norm_tf, weights_norm_tf[anime_idx])

In [141]:
distances.shape

(17562,)

In [142]:
np.argsort(distances)[::-1][:5]

array([1394, 1396, 4371,  230, 1706], dtype=int64)

In [143]:
distances[1394], distances[1396], distances[4371]

(0.9999998, 0.7089701, 0.5757308)

In [144]:
def get_recommendation(anime_query, k=10, weights=weights, exact_name=False, types=None):
    if types:
        check_anime_types(types)
    anime_rows = get_anime_rows(anime_df, anime_query, 
                               exact_name=exact_name)
    if len(anime_rows) == 0:
        raise Exception(f'Anime not found for {anime_query}')
    anime_row = anime_rows.iloc[[0]]
    anime_id = anime_row.MAL_ID.values[0]
    anime_name = anime_row.Name.values[0]
    anime_idx = anime_id_to_idx.get(anime_id)

    # weights = anime_weights
    # weights = weights_norm
#         dists = np.dot(anime_weights[anime_idx], user_weights.T)
    distances = np.dot(weights, weights[anime_idx])

    sorted_dists_ind = np.argsort(distances)[::-1]

    print(f'Recommending animes for {anime_name}')
    display(anime_row.loc[:, 'MAL_ID': 'Aired'])

    anime_list = []
    # [1:] to skip the first row for anime_query
    for idx in sorted_dists_ind[1:]:
        similarity = distances[idx]
        anime_id = anime_idx_to_id.get(idx)
        anime_row = anime_df[anime_df.MAL_ID == anime_id]
        anime_type = anime_row.Type.values[0]
        if types and anime_type not in types:
            continue
        anime_name = anime_row.Name.values[0]
        score = anime_row.Score.values[0]
        genre = anime_row.Genres.values[0]

        anime_list.append({"Anime_id": anime_id, "Name": anime_name,
                           "Similarity": similarity, "Score": score,
                           "Type": anime_type, "Genre": genre
                          })
        if len(anime_list) == k:
            # enough number of recommendations
            break
    rec_df = pd.DataFrame(anime_list)
    return rec_df

In [56]:
get_recommendation(34572, weights=weights)

Recommending animes for Black Clover


Unnamed: 0,MAL_ID,Name,English name,Score,Genres,Type,Episodes,Aired
1371,34572,Black Clover,Black Clover,7.38,"Action, Comedy, Magic, Fantasy, Shounen",TV,170,"Oct 3, 2017 to Mar 30, 2021"


Unnamed: 0,Anime_id,Name,Similarity,Score,Type,Genre
0,34566,Boruto: Naruto Next Generations,0.754695,5.81,TV,"Action, Adventure, Super Power, Martial Arts, Shounen"
1,40748,Jujutsu Kaisen (TV),0.584814,8.54,TV,"Action, Demons, Supernatural, School, Shounen"
2,37885,Super Dragon Ball Heroes,0.572704,5.29,ONA,"Action, Comedy, Super Power, Martial Arts, Fantasy, Shounen"
3,36516,Beatless,0.489075,6.2,TV,"Action, Drama, Romance, Sci-Fi"
4,21,One Piece,0.486404,8.52,TV,"Action, Adventure, Comedy, Super Power, Drama, Fantasy, Shounen"
5,40028,Shingeki no Kyojin: The Final Season,0.485405,9.17,TV,"Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen"
6,36934,Captain Tsubasa (2018),0.460407,7.42,TV,"Action, Sports, Shounen"
7,32105,Sousei no Onmyouji,0.458422,7.32,TV,"Action, Supernatural, Romance, Fantasy, Shounen"
8,39535,Mushoku Tensei: Isekai Ittara Honki Dasu,0.45824,8.37,TV,"Drama, Magic, Fantasy"
9,34279,Grancrest Senki,0.45642,7.25,TV,"Action, Drama, Fantasy, Romance"


In [145]:
get_recommendation(34572, weights=weights_norm)

Recommending animes for Black Clover


Unnamed: 0,MAL_ID,Name,English name,Score,Genres,Type,Episodes,Aired
1371,34572,Black Clover,Black Clover,7.38,"Action, Comedy, Magic, Fantasy, Shounen",TV,170,"Oct 3, 2017 to Mar 30, 2021"


Unnamed: 0,Anime_id,Name,Similarity,Score,Type,Genre
0,34566,Boruto: Naruto Next Generations,0.70897,5.81,TV,"Action, Adventure, Super Power, Martial Arts, Shounen"
1,37885,Super Dragon Ball Heroes,0.575731,5.29,ONA,"Action, Comedy, Super Power, Martial Arts, Fantasy, Shounen"
2,40748,Jujutsu Kaisen (TV),0.564877,8.54,TV,"Action, Demons, Supernatural, School, Shounen"
3,36516,Beatless,0.47659,6.2,TV,"Action, Drama, Romance, Sci-Fi"
4,21,One Piece,0.462426,8.52,TV,"Action, Adventure, Comedy, Super Power, Drama, Fantasy, Shounen"
5,34279,Grancrest Senki,0.458046,7.25,TV,"Action, Drama, Fantasy, Romance"
6,39535,Mushoku Tensei: Isekai Ittara Honki Dasu,0.452521,8.37,TV,"Drama, Magic, Fantasy"
7,40028,Shingeki no Kyojin: The Final Season,0.448825,9.17,TV,"Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen"
8,32105,Sousei no Onmyouji,0.447679,7.32,TV,"Action, Supernatural, Romance, Fantasy, Shounen"
9,40594,Tatoeba Last Dungeon Mae no Mura no Shounen ga Joban no Machi de Kurasu Youna Monogatari,0.439266,6.75,TV,"Adventure, Fantasy"


In [146]:
get_recommendation(34572, weights=weights_norm_tf)

Recommending animes for Black Clover


Unnamed: 0,MAL_ID,Name,English name,Score,Genres,Type,Episodes,Aired
1371,34572,Black Clover,Black Clover,7.38,"Action, Comedy, Magic, Fantasy, Shounen",TV,170,"Oct 3, 2017 to Mar 30, 2021"


Unnamed: 0,Anime_id,Name,Similarity,Score,Type,Genre
0,34566,Boruto: Naruto Next Generations,0.70897,5.81,TV,"Action, Adventure, Super Power, Martial Arts, Shounen"
1,37885,Super Dragon Ball Heroes,0.575731,5.29,ONA,"Action, Comedy, Super Power, Martial Arts, Fantasy, Shounen"
2,40748,Jujutsu Kaisen (TV),0.564877,8.54,TV,"Action, Demons, Supernatural, School, Shounen"
3,36516,Beatless,0.47659,6.2,TV,"Action, Drama, Romance, Sci-Fi"
4,21,One Piece,0.462426,8.52,TV,"Action, Adventure, Comedy, Super Power, Drama, Fantasy, Shounen"
5,34279,Grancrest Senki,0.458046,7.25,TV,"Action, Drama, Fantasy, Romance"
6,39535,Mushoku Tensei: Isekai Ittara Honki Dasu,0.452521,8.37,TV,"Drama, Magic, Fantasy"
7,40028,Shingeki no Kyojin: The Final Season,0.448825,9.17,TV,"Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen"
8,32105,Sousei no Onmyouji,0.447679,7.32,TV,"Action, Supernatural, Romance, Fantasy, Shounen"
9,40594,Tatoeba Last Dungeon Mae no Mura no Shounen ga Joban no Machi de Kurasu Youna Monogatari,0.439266,6.75,TV,"Adventure, Fantasy"
