In [1]:
import pandas as pd
import numpy as np
import mysql.connector as mysql

from mysql.connector import Error
from getpass import getpass
from scipy.sparse import csr_matrix, load_npz
from sklearn.neighbors import NearestNeighbors

def connections(host='localhost', user='root', password=getpass()):
    try:
        print("Success connect to MySQL!")
        return mysql.connect(host=host, user=user, password=password)
    except:
        print("Error connecting to MySQL")

def connect_to_db(conn, db_name):
    try:
        if conn.is_connected():
            cursor = conn.cursor()
            cursor.execute(f"create database if not exists {db_name};")
            print(f"Database: {db_name} is created")
            cursor.execute(f"use {db_name};")
            print(f"You're connected to database: {db_name}")
            return
    except Error as e:
        print("Error while connecting to MySQL", e)
conn = connections()

Success connect to MySQL!


In [6]:
anime = pd.read_sql(sql="select * from animedb.anime;", con=conn, index_col='animeIndex')
anime.index = anime.index - 1
anime_metadata = np.load("../data/binary/anime_metadata.npy")
animeFeaturesTfidf = load_npz("../data/binary/animeFeaturesTfidf.npz")

True

In [19]:
def animeSearch(df_, nameQuery, n=5, sortByScore=True):
    df = df_.copy()
    nameQuery = nameQuery.lower()

    nameContains = df.loc[df.animeNameLower.str.contains(nameQuery, na=False)].drop(columns=['animeFeatures',
                                                                                         'animeNameLower'])

    if sortByScore:
        nameContains = nameContains.sort_values(by="animeScore", ascending=False)

    if n in ['all', 'All']:
        pd.set_option('display.max_rows', len(nameContains))
    else:
        pd.set_option('display.max_rows', n)
        nameContains = nameContains[:n]
        return nameContains
    return nameContains


def getSimilar(df=anime, vector=None, query_index=None, n=50):
    model_knn = NearestNeighbors(metric='cosine', n_neighbors=n)
    model_knn.fit(csr_matrix(vector.astype(np.float)))

    distances, indices = model_knn.kneighbors(
        vector[query_index, :].reshape(1, -1), n_neighbors=n)
    result, score = [], []
    for i in range(0, len(distances.flatten())):
        index = indices.flatten()[i]
        if index == query_index:
            continue
        result.append(df.iloc[index])
    results_df = pd.DataFrame(result)#.sort_values(by="Score", ascending=False)
    pd.set_option('display.max_rows', len(results_df))
    return results_df


def mostSimilarByIndex(mal_id, n=50, showAll=True):
    query = anime[anime.MAL_ID == mal_id]
    query_index = query.index
    vectorMeta = anime_metadata
    vectorSynop = animeFeaturesTfidf
    Meta = getSimilar(
        df=anime, vector=vectorMeta, query_index=query_index, n=n)
    Synop = getSimilar(
        df=anime, vector=vectorSynop, query_index=query_index, n=n)
    MetaSynop = Synop.append(Meta)
    MetaSynop = MetaSynop.drop_duplicates().sort_values(by="animeScore", ascending=False)
    MetaSynop = MetaSynop.drop(columns=['animeFeatures'])
    if showAll:
        pd.set_option('display.max_rows', len(MetaSynop))
        print(
            f"Generated total dataframe with {MetaSynop.shape[0]} rows and {MetaSynop.shape[1]} columns")
        return MetaSynop
    pd.set_option('display.max_rows', 10)
    print(
        f"Generated dataframe with {MetaSynop.shape[0]} rows and {MetaSynop.shape[1]} columns")
    return MetaSynop

def mostSimilarByName(name, n=50, showAll=True):
    query = animeSearch(nameQuery=name, df_=anime, n=1, sortByScore=False)
    query_index = query.index
    vectorMeta = anime_metadata
    vectorSynop = animeFeaturesTfidf
    Meta = getSimilar(
        df=anime, vector=vectorMeta, query_index=query_index, n=n)
    Synop = getSimilar(
        df=anime, vector=vectorSynop, query_index=query_index, n=n)
    MetaSynop = Synop.append(Meta)
    MetaSynop = MetaSynop.drop_duplicates().sort_values(by="animeScore", ascending=False)
    MetaSynop = MetaSynop.drop(columns=['animeFeatures'])
    if showAll:
        pd.set_option('display.max_rows', len(MetaSynop))
        print(
            f"Generated total dataframe with {MetaSynop.shape[0]} rows and {MetaSynop.shape[1]} columns")
        return query, MetaSynop
    pd.set_option('display.max_rows', 10)
    print(
        f"Generated dataframe with {MetaSynop.shape[0]} rows and {MetaSynop.shape[1]} columns")
    return query, MetaSynop

In [20]:
animeResults = animeSearch(anime, nameQuery="Jojo", n=1)
animeResults

Unnamed: 0_level_0,animeID,animeName,animeScore,animeGenres,animeSynopsis,animeType,animeEpisodes,animePremiered,animeStudios,animeSource,animeRating,animeRanked,animePopularity,animeFavorites
animeIndex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
13402,37991,JoJo no Kimyou na Bouken Part 5: Ougon no Kaze,8.61,"Action, Adventure, Shounen","In the coastal city of Naples, corruption is t...",TV,39,Fall 2018,David Production,Manga,R - 17+ (violence & profanity),63,211,21884


In [21]:
real, predicted = mostSimilarByName('Jojo', n=20)

Generated total dataframe with 37 rows and 15 columns


In [22]:
real

Unnamed: 0_level_0,animeID,animeName,animeScore,animeGenres,animeSynopsis,animeType,animeEpisodes,animePremiered,animeStudios,animeSource,animeRating,animeRanked,animePopularity,animeFavorites
animeIndex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
609,665,JoJo no Kimyou na Bouken: Adventure,7.18,"Adventure, Drama, Fantasy, Horror, Shounen, Su...","Kujo Jotaro is a normal, popular Japanese high...",OVA,7,Unknown,APPP,Manga,R+ - Mild Nudity,2855,2102,338


In [23]:
predicted

Unnamed: 0,animeID,animeName,animeScore,animeGenres,animeSynopsis,animeType,animeEpisodes,animePremiered,animeStudios,animeSource,animeRating,animeRanked,animePopularity,animeFavorites,animeNameLower
10021,31758,Kizumonogatari III: Reiketsu-hen,8.82,"Action, Mystery, Supernatural, Vampire",fter helping revive the legendary vampire Kiss...,Movie,1,Unknown,Shaft,Light novel,R - 17+ (violence & profanity),23,448,4898,kizumonogatari iii: reiketsu-hen
26,44,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,8.73,"Action, Historical, Drama, Romance, Martial Ar...",hen mankind's savagery surpasses his fear of d...,OVA,4,Unknown,Studio Deen,Manga,R - 17+ (violence & profanity),37,637,6111,rurouni kenshin: meiji kenkaku romantan - tsui...
10020,31757,Kizumonogatari II: Nekketsu-hen,8.61,"Action, Mystery, Supernatural, Vampire","No longer truly human, Koyomi Araragi decides ...",Movie,1,Unknown,Shaft,Light novel,R - 17+ (violence & profanity),64,460,1847,kizumonogatari ii: nekketsu-hen
13402,37991,JoJo no Kimyou na Bouken Part 5: Ougon no Kaze,8.61,"Action, Adventure, Shounen","In the coastal city of Naples, corruption is t...",TV,39,Fall 2018,David Production,Manga,R - 17+ (violence & profanity),63,211,21884,jojo no kimyou na bouken part 5: ougon no kaze
10092,31933,JoJo no Kimyou na Bouken Part 4: Diamond wa Ku...,8.51,"Action, Adventure, Comedy, Supernatural, Drama...","The year is 1999. Morioh, a normally quiet and...",TV,39,Spring 2016,David Production,Manga,R - 17+ (violence & profanity),100,163,23817,jojo no kimyou na bouken part 4: diamond wa ku...
8426,26055,JoJo no Kimyou na Bouken Part 3: Stardust Crus...,8.45,"Action, Adventure, Supernatural, Drama, Shounen",Joutarou Kuujou and his allies have finally ma...,TV,24,Winter 2015,David Production,Manga,R - 17+ (violence & profanity),122,159,13852,jojo no kimyou na bouken part 3: stardust crus...
2282,2685,Tsubasa: Tokyo Revelations,8.31,"Action, Adventure, Drama, Romance, Fantasy, Sh...",Continuing their journey from Record Country (...,OVA,3,Unknown,Production I.G,Manga,R - 17+ (violence & profanity),225,1628,899,tsubasa: tokyo revelations
2879,3702,Detroit Metal City,8.14,"Music, Comedy, Seinen","Dominating the world of indie music, Detroit M...",OVA,12,Unknown,Studio 4°C,Manga,R - 17+ (violence & profanity),372,847,3100,detroit metal city
7397,20899,JoJo no Kimyou na Bouken Part 3: Stardust Crus...,8.12,"Action, Adventure, Supernatural, Drama, Shounen",Years after an ancient evil was salvaged from ...,TV,24,Spring 2014,David Production,Manga,R - 17+ (violence & profanity),390,136,14056,jojo no kimyou na bouken part 3: stardust crus...
7917,23317,Kuroshitsuji: Book of Murder,8.12,"Action, Mystery, Comedy, Historical, Demons, S...","he behest of the Queen, Earl Ciel Phantomhive ...",OVA,2,Unknown,A-1 Pictures,Manga,R - 17+ (violence & profanity),395,822,1217,kuroshitsuji: book of murder


In [17]:
mostSimilarByIndex(query_index=10213, n=20)

TypeError: mostSimilarByIndex() got an unexpected keyword argument 'query_index'