In [3]:
import pandas as pd
import numpy as np

from scipy.sparse import csr_matrix, load_npz
from sklearn.neighbors import NearestNeighbors

In [4]:
anime = pd.read_csv("../data/dataset/anime_clean.csv")
anime_metadata = np.load("../data/binary/anime_metadata.npy")
animeFeaturesTfidf = load_npz("../data/binary/animeFeaturesTfidf.npz")

In [5]:
def animeSearch(df_, nameQuery, n=5, sortByScore=True):
    df = df_.copy()
    nameQuery = nameQuery.lower()
    df["name_lower"] = df["Name"].apply(lambda x: x.lower())

    nameContains = df.loc[df.name_lower.str.contains(nameQuery, na=False)].drop(columns=['Features',
                                                                                         'name_lower'])

    if sortByScore:
        nameContains = nameContains.sort_values(by="Score", ascending=False)

    if n in ['all', 'All']:
        pd.set_option('display.max_rows', len(nameContains))
    else:
        pd.set_option('display.max_rows', n)
        nameContains = nameContains[:n]
        return nameContains
    return nameContains


def getSimilar(df=anime, vector=None, query_index=None, n=50):
    model_knn = NearestNeighbors(metric='cosine', n_neighbors=n)
    model_knn.fit(csr_matrix(vector.astype(np.float)))

    distances, indices = model_knn.kneighbors(
        vector[query_index, :].reshape(1, -1), n_neighbors=n)
    result, score = [], []
    for i in range(0, len(distances.flatten())):
        index = indices.flatten()[i]
        if index == query_index:
            continue
        result.append(df.iloc[index])
    results_df = pd.DataFrame(result)#.sort_values(by="Score", ascending=False)
    pd.set_option('display.max_rows', len(results_df))
    return results_df


def mostSimilarByIndex(query_index, n=50, showAll=True):
    vectorMeta = anime_metadata
    vectorSynop = animeFeaturesTfidf
    Meta = getSimilar(
        df=anime, vector=vectorMeta, query_index=query_index, n=n)
    Synop = getSimilar(
        df=anime, vector=vectorSynop, query_index=query_index, n=n)
    MetaSynop = Synop.append(Meta)
    MetaSynop = MetaSynop.drop_duplicates().sort_values(by="Score", ascending=False)
    MetaSynop = MetaSynop.drop(columns=['Features'])
    if showAll:
        pd.set_option('display.max_rows', len(MetaSynop))
        print(
            f"Generated total dataframe with {MetaSynop.shape[0]} rows and {MetaSynop.shape[1]} columns")
        return MetaSynop
    pd.set_option('display.max_rows', 10)
    print(
        f"Generated dataframe with {MetaSynop.shape[0]} rows and {MetaSynop.shape[1]} columns")
    return MetaSynop

def mostSimilarByName(name, n=50, showAll=True):
    query = animeSearch(nameQuery=name, df_=anime, n=1, sortByScore=False)
    query_index = query.index
    vectorMeta = anime_metadata
    vectorSynop = animeFeaturesTfidf
    Meta = getSimilar(
        df=anime, vector=vectorMeta, query_index=query_index, n=n)
    Synop = getSimilar(
        df=anime, vector=vectorSynop, query_index=query_index, n=n)
    MetaSynop = Synop.append(Meta)
    MetaSynop = MetaSynop.drop_duplicates().sort_values(by="Score", ascending=False)
    MetaSynop = MetaSynop.drop(columns=['Features'])
    if showAll:
        pd.set_option('display.max_rows', len(MetaSynop))
        print(
            f"Generated total dataframe with {MetaSynop.shape[0]} rows and {MetaSynop.shape[1]} columns")
        return query, MetaSynop
    pd.set_option('display.max_rows', 10)
    print(
        f"Generated dataframe with {MetaSynop.shape[0]} rows and {MetaSynop.shape[1]} columns")
    return query, MetaSynop

In [6]:
animeResults = animeSearch(anime, nameQuery="Jojo", n=1)
animeResults

Unnamed: 0,MAL_ID,Name,Score,Genres,Synopsis,Type,Episodes,Premiered,Studios,Source,Rating,Ranked,Popularity,Favorites
13401,37991,JoJo no Kimyou na Bouken Part 5: Ougon no Kaze,8.61,"Action, Adventure, Shounen","In the coastal city of Naples, corruption is t...",TV,39,Fall 2018,David Production,Manga,R - 17+ (violence & profanity),63.0,211,21884


In [7]:
testa, testb = mostSimilarByName('Jojo', n=20)

Generated total dataframe with 36 rows and 15 columns


In [9]:
mostSimilarByIndex(query_index=10213, n=20)

Generated total dataframe with 33 rows and 14 columns


Unnamed: 0,MAL_ID,Name,Score,Genres,Synopsis,Type,Episodes,Premiered,Studios,Source,Rating,Ranked,Popularity,Favorites
11021,33902,Suntory Minami Alps no Tennensui,6.0,"Drama, School",Collaboration commercials with Suntory and Kim...,Special,3,Unknown,Unknown,Original,G - All Ages,8238.0,4605,11659
15772,42785,Hadakanbou,0.0,"Kids, Music",usic video for the song Hadakanbou by Rumi Shi...,Music,1,Unknown,CoMix Wave Films,Original,G - All Ages,14223.0,17360,54
13906,38826,Tenki no Ko,8.41,"Slice of Life, Drama, Romance, Fantasy",Tokyo is currently experiencing rain showers t...,Movie,1,Unknown,CoMix Wave Films,Original,PG-13 - Teens 13 or older,150.0,259,475388
13015,37396,Shikioriori,7.17,"Drama, Romance, Slice of Life","The rigorous city life of China, while bustlin...",Movie,3,Unknown,CoMix Wave Films,Original,PG-13 - Teens 13 or older,2879.0,1436,97673
1470,1689,Byousoku 5 Centimeter,7.73,"Drama, Romance, Slice of Life",hat happens when two people love each other bu...,Movie,3,Unknown,CoMix Wave Films,Original,PG-13 - Teens 13 or older,949.0,136,695375
8284,25143,"Kono Danshi, Sekka ni Nayandemasu.",6.97,"Drama, School, Shounen Ai","umu Tamari suffers from a condition known as ""...",OVA,1,Unknown,CoMix Wave Films,Original,PG-13 - Teens 13 or older,3756.0,3716,19508
6086,13283,One Off,6.44,Slice of Life,slice of life of high school girls in the earl...,OVA,4,Unknown,TYO Animations,Original,PG-13 - Teens 13 or older,6205.0,4327,13646
7808,22839,Cross Road,7.4,"Slice of Life, School","s college entrance examinations draw near, two...",Special,1,Unknown,CoMix Wave Films,Original,G - All Ages,1889.0,2097,56490
5127,9760,Hoshi wo Ou Kodomo,7.58,"Adventure, Romance, Fantasy",If you could turn all your memories into a son...,Movie,1,Unknown,CoMix Wave Films,Original,PG-13 - Teens 13 or older,1333.0,909,164081
6688,17121,Dareka no Manazashi,7.39,"Drama, Slice of Life","In near-future Japan, Aya Okamura lives alone ...",Movie,1,Unknown,CoMix Wave Films,Original,G - All Ages,1944.0,2070,57283
