In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
animeSynopsis = pd.read_csv("data/anime_with_synopsis.csv").rename(columns={'sypnopsis': 'Synopsis'})
animeSynopsis = animeSynopsis.dropna().reset_index().drop(columns=['index'])
print(animeSynopsis.shape)
print(animeSynopsis.columns)
print(animeSynopsis.isna().any().all())
animeSynopsis.head()

(16206, 5)
Index(['MAL_ID', 'Name', 'Score', 'Genres', 'Synopsis'], dtype='object')
False


Unnamed: 0,MAL_ID,Name,Score,Genres,Synopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...


In [3]:
# Create TF-IDF Matrix Features
# Convert raw documents like text or similar to a matrix of TF-IDF features
# Returning text to int with unique values of int

matVet = TfidfVectorizer(stop_words='english', analyzer='word') # currently only support english, and analyze based on word (since this is document dataset)
tfidMatrix = matVet.fit_transform(animeSynopsis["Synopsis"])

# Create cosine similarity
# to measure how similar the documents based from vectors

cosSim = cosine_similarity(tfidMatrix)

In [64]:
def animeSearch(df, nameContains):
    return df.loc[df.Name.str.contains(nameContains, na=False)]


def getHighestScoreAnime(df, n=5, query=None):
    pd.set_option('display.max_rows', n)
    getHighest = df[df.Score != 'Unknown'].copy()
    getHighest["Score"] = getHighest["Score"].astype("float64")
    if query != None:
        getHighest = getHighest.sort_values(
            by="Score", ascending=False).query(query)[:n]
        print(
            f"Generated {getHighest.shape[0]} Rows, and {getHighest.shape[1]} Columns")
        return getHighest
    getHighest = getHighest.sort_values(by="Score", ascending=False)[:n]
    print(
        f"Generated {getHighest.shape[0]} Rows, and {getHighest.shape[1]} Columns")
    return getHighest


def filterByScore(df, query=None, showAll=False, sort=True, ascending=False):
    getDf = df[df.Score != 'Unknown'].copy()
    getDf["Score"] = getDf["Score"].astype("float64")
    getDf = getDf.query(query)
    if sort:
        getDf = getDf.sort_values(by="Score", ascending=ascending)
    if showAll:
        pd.set_option('display.max_rows', len(getDf))
        print(f"Generated {getDf.shape[0]} Rows, and {getDf.shape[1]} Columns")
        return getDf
    print(f"Generated {getDf.shape[0]} Rows, and {getDf.shape[1]} Columns")
    return getDf


def filterByGenre(df, genreContains, query=None, sortByScore=False, ascending=False, showAll=False):
    getGenres = df.copy()
    getGenres = getGenres.loc[getGenres.Genres.str.contains(
        genreContains, na=False)]
    if query:
        getGenres = getGenres[getGenres.Score != 'Unknown']
        getGenres["Score"] = getGenres["Score"].astype("float64")
        getGenres = getGenres.query(query)
    if sortByScore:
        getGenres = getGenres[getGenres.Score != 'Unknown']
        getGenres["Score"] = getGenres["Score"].astype("float64")
        getGenres = getGenres.sort_values(by="Score", ascending=ascending)
    if showAll:
        pd.set_option('display.max_rows', len(getGenres))
        print(f"Filterting {genreContains} ...")
        print(
            f"Generated {getGenres.shape[0]} Rows, and {getGenres.shape[1]} Columns")
        return getGenres
    print(f"Filterting {genreContains} ...")
    print(
        f"Generated {getGenres.shape[0]} Rows, and {getGenres.shape[1]} Columns")
    return getGenres

In [16]:
def getSimilarAnime(df, index, model=cosSim, n=10):
    getSimilar = pd.DataFrame(cosSim[index], columns = ["Similarity"])
    filteringAboveZeros = getSimilar.query("Similarity > 0")
    sortSimilar = filteringAboveZeros.sort_values(by="Similarity", ascending=False)
    similarityScore = sortSimilar.iloc[1:n+1]
    similarAnime = similarityScore.index
    return df.iloc[similarAnime].join(similarityScore).set_index('MAL_ID')

In [65]:
animeResults = animeSearch(animeSynopsis, nameContains="Gintama")
animeResults

Unnamed: 0,MAL_ID,Name,Score,Genres,Synopsis
811,918,Gintama,8.96,"Action, Comedy, Historical, Parody, Samurai, S...","The Amanto, aliens from outer space, have inva..."
2470,2951,Gintama: Nanigoto mo Saiyo ga Kanjin nano de T...,8.08,"Action, Sci-Fi, Comedy, Historical, Parody, Sa...",This special is a set of short comedy stories ...
4161,6945,Gintama: Shiroyasha Koutan,8.34,"Action, Sci-Fi, Comedy, Historical, Parody","ccording to Aniplex, a 10min short anime of Gi..."
4333,7472,Gintama Movie 1: Shinyaku Benizakura-hen,8.52,"Action, Sci-Fi, Comedy, Historical, Parody, Sa...","Gintoki and his Yorozuya friends (or rather, e..."
...,...,...,...,...,...
13074,37491,Gintama.: Shirogane no Tamashii-hen - Kouhan-sen,8.86,"Action, Sci-Fi, Comedy, Historical, Parody, Sa...",Second Season of the final arc of Gintama .
14361,39486,Gintama: The Final,8.88,"Action, Sci-Fi, Comedy, Historical, Parody, Dr...",New Gintama movie.
14797,40323,Gintama: Monster Strike-hen,7.35,"Action, Comedy, Parody, Samurai",Collaboration anime between Gintama and Monste...
16004,44087,Gintama: The Semi-Final,8.58,"Action, Comedy, Historical, Parody, Samurai, S...",New special that will tie into the third new G...


In [66]:
getSimilarAnime(df=animeSynopsis, index=811, n=20)

Unnamed: 0_level_0,Name,Score,Genres,Synopsis,Similarity
MAL_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
28977,Gintama°,9.1,"Action, Comedy, Historical, Parody, Samurai, S...","Gintoki, Shinpachi, and Kagura return as the f...",0.190739
7472,Gintama Movie 1: Shinyaku Benizakura-hen,8.52,"Action, Sci-Fi, Comedy, Historical, Parody, Sa...","Gintoki and his Yorozuya friends (or rather, e...",0.187055
15417,Gintama': Enchousen,9.04,"Action, Comedy, Historical, Parody, Samurai, S...","hile Gintoki Sakata was away, the Yorozuya fou...",0.135824
35843,Gintama.: Porori-hen,8.53,"Action, Comedy, Historical, Parody, Samurai, S...","Following the grim events of Iga, Kokujou Isla...",0.132422
...,...,...,...,...,...
10213,Maji de Watashi ni Koi Shinasai!,6.77,"Harem, Comedy, Super Power, Romance, Ecchi, Ma...",The samurai are a very important part of Japan...,0.091945
19261,Gintama x Mameshiba,6.73,"Comedy, Parody",d as a commercial to promote the Gintama: Kank...,0.091263
9335,Megane na Kanojo,6.68,"Comedy, Romance, School, Shounen","For most people, wearing glasses is a necessit...",0.089676
282,Angel Heart,7.3,"Action, Mystery, Drama, Romance, Seinen","oung Taiwanese assassin codenamed ""Glass Heart...",0.088638


In [63]:
filterByGenre(df=animeSynopsis, genreContains='Historical', sortByScore=True)

Filterting Historical ...
Generated 713 Rows, and 5 Columns


Unnamed: 0,MAL_ID,Name,Score,Genres,Synopsis
8879,28977,Gintama°,9.10,"Action, Comedy, Historical, Parody, Samurai, S...","Gintoki, Shinpachi, and Kagura return as the f..."
5234,9969,Gintama',9.08,"Action, Sci-Fi, Comedy, Historical, Parody, Sa...","fter a one-year hiatus, Shinpachi Shimura retu..."
6377,15417,Gintama': Enchousen,9.04,"Action, Comedy, Historical, Parody, Samurai, S...","hile Gintoki Sakata was away, the Yorozuya fou..."
11109,34096,Gintama.,8.99,"Action, Comedy, Historical, Parody, Samurai, S...",fter joining the resistance against the bakufu...
...,...,...,...,...,...
1265,1451,Shuranosuke Zanmaken: Shikamamon no Otoko,4.74,"Action, Supernatural, Historical, Thriller",Shurannosuke Sakaki is a masterless samurai wi...
11263,34411,Taishou Mebiusline: Chicchai-san,4.73,"Historical, Supernatural",Hiiragi Kyouichirou is a boy who travels from ...
4340,7485,Urashima Tarou,4.73,Historical,Urashima Taro is based on a well-known Japanes...
977,1102,Urda,4.59,"Action, Historical, Military, Sci-Fi",This fictional story took place in Europe arou...


In [52]:
filterByScore(df=animeSynopsis, query="Score > 9", showAll=True)

Generated 9 Rows, and 5 Columns


Unnamed: 0,MAL_ID,Name,Score,Genres,Synopsis
3446,5114,Fullmetal Alchemist: Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Ma...","""In order for something to be obtained, someth..."
14644,40028,Shingeki no Kyojin: The Final Season,9.17,"Action, Military, Mystery, Super Power, Drama,...",Gabi Braun and Falco Grice have been training ...
4953,9253,Steins;Gate,9.11,"Thriller, Sci-Fi",The self-proclaimed mad scientist Rintarou Oka...
5660,11061,Hunter x Hunter (2011),9.1,"Action, Adventure, Fantasy, Shounen, Super Power",Hunter x Hunter is set in a world where Hunter...
8879,28977,Gintama°,9.1,"Action, Comedy, Historical, Parody, Samurai, S...","Gintoki, Shinpachi, and Kagura return as the f..."
13717,38524,Shingeki no Kyojin Season 3 Part 2,9.1,"Action, Drama, Fantasy, Military, Mystery, Sho...",Seeking to restore humanity's diminishing hope...
5234,9969,Gintama',9.08,"Action, Sci-Fi, Comedy, Historical, Parody, Sa...","fter a one-year hiatus, Shinpachi Shimura retu..."
723,820,Ginga Eiyuu Densetsu,9.07,"Military, Sci-Fi, Space, Drama",The 150-year-long stalemate between the two in...
6377,15417,Gintama': Enchousen,9.04,"Action, Comedy, Historical, Parody, Samurai, S...","hile Gintoki Sakata was away, the Yorozuya fou..."


In [45]:
getHighestScoreAnime(df=animeSynopsis, n=15, query="Score > 9")

Generated 9 Rows, and 5 Columns


Unnamed: 0,MAL_ID,Name,Score,Genres,Synopsis
3446,5114,Fullmetal Alchemist: Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Ma...","""In order for something to be obtained, someth..."
14644,40028,Shingeki no Kyojin: The Final Season,9.17,"Action, Military, Mystery, Super Power, Drama,...",Gabi Braun and Falco Grice have been training ...
4953,9253,Steins;Gate,9.11,"Thriller, Sci-Fi",The self-proclaimed mad scientist Rintarou Oka...
13717,38524,Shingeki no Kyojin Season 3 Part 2,9.1,"Action, Drama, Fantasy, Military, Mystery, Sho...",Seeking to restore humanity's diminishing hope...
8879,28977,Gintama°,9.1,"Action, Comedy, Historical, Parody, Samurai, S...","Gintoki, Shinpachi, and Kagura return as the f..."
5660,11061,Hunter x Hunter (2011),9.1,"Action, Adventure, Fantasy, Shounen, Super Power",Hunter x Hunter is set in a world where Hunter...
5234,9969,Gintama',9.08,"Action, Sci-Fi, Comedy, Historical, Parody, Sa...","fter a one-year hiatus, Shinpachi Shimura retu..."
723,820,Ginga Eiyuu Densetsu,9.07,"Military, Sci-Fi, Space, Drama",The 150-year-long stalemate between the two in...
6377,15417,Gintama': Enchousen,9.04,"Action, Comedy, Historical, Parody, Samurai, S...","hile Gintoki Sakata was away, the Yorozuya fou..."
