In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer, MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from sklearn.decomposition import PCA

pd.set_option("max_colwidth", None)

In [12]:
from jikanpy import Jikan
jikan = Jikan()

mushishi = jikan.anime(457)
mushishi_with_eps = jikan.anime(457, extension='episodes')

search_result = jikan.search('anime', 'Mushishi', page=2)

winter_2018_anime = jikan.season(year=2018, season='winter')

archive = jikan.season_archive()

In [26]:
print(jikan.user(username='Xinil', request='animelist')['anime'][0]['score'])

9


In [2]:
usecols = ["MAL_ID", "Name", "Score", "Genres", "Type", "Episodes", "Premiered",
           "Studios", "Source", "Rating", "Members"]

anime_data=pd.read_csv('anime.csv',usecols=usecols)

anime_data.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Members
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,Spring 1998,Sunrise,Original,R - 17+ (violence & profanity),1251960
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Movie,1,Unknown,Bones,Original,R - 17+ (violence & profanity),273145
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",TV,26,Spring 1998,Madhouse,Manga,PG-13 - Teens 13 or older,558913
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, Magic",TV,26,Summer 2002,Sunrise,Original,PG-13 - Teens 13 or older,94683
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",TV,52,Fall 2004,Toei Animation,Manga,PG - Children,13224


In [3]:
def process_multilabel(series):
    series = series.split(",")
    if "Unknown" in series:
        series.remove("Unknown")
    return series

anime_data["Genres"] = anime_data["Genres"].map(process_multilabel)
anime_data["Studios"] = anime_data["Studios"].map(process_multilabel)
anime_data["Score"] = anime_data["Score"].replace("Unknown", 0).astype(float)
anime_data["Episodes"] = anime_data["Episodes"].replace("Unknown", 0).astype(int)

In [4]:
def preprocessing_category(df, column, is_multilabel=False):
    # Binarise labels
    lb = LabelBinarizer()
    if is_multilabel:
        lb = MultiLabelBinarizer()
        
    expandedLabelData = lb.fit_transform(df[column])
    labelClasses = lb.classes_

    # Create a pandas.DataFrame from our output
    category_df = pd.DataFrame(expandedLabelData, columns=labelClasses)
    del df[column]
    return pd.concat([df, category_df], axis=1)

anime_metadata = anime_data.copy()
anime_metadata = preprocessing_category(anime_metadata, "Type")
anime_metadata = preprocessing_category(anime_metadata, "Premiered")
anime_metadata = preprocessing_category(anime_metadata, "Studios", is_multilabel=True)
anime_metadata = preprocessing_category(anime_metadata, "Source")
anime_metadata = preprocessing_category(anime_metadata, "Rating")

Genders = anime_metadata["Genres"]
ID_NAME = anime_metadata[["MAL_ID", "Name"]]

del anime_metadata["Genres"]
del anime_metadata["MAL_ID"]
del anime_metadata["Name"]
del anime_metadata["Unknown"]

anime_metadata[["Score", "Episodes", "Members"]] = MinMaxScaler().fit_transform(anime_metadata[["Score", "Episodes", "Members"]])
anime_metadata = anime_metadata.values

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfv = TfidfVectorizer(min_df=3,  max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3),
            stop_words = 'english')

# Filling NaNs with empty string
genres_original = anime_data['Genres'].fillna('').astype(str)
genres_vector_tf_idf = tfv.fit_transform(genres_original)

genres_vector_one_hot = preprocessing_category(pd.DataFrame(Genders), "Genres", True).values

In [6]:
def get_recommended(vector, query_index, n_neighbors=10):
    model_knn = NearestNeighbors(metric='cosine', n_neighbors=n_neighbors)
    model_knn.fit(csr_matrix(vector))

    distances, indices = model_knn.kneighbors(vector[query_index,:].reshape(1, -1), n_neighbors = n_neighbors)
    result = []
    for i in range(0, len(distances.flatten())):
        index = indices.flatten()[i]
        if index == query_index:
            continue
        result.append(anime_data.iloc[index])
        
    return pd.DataFrame(result)

In [7]:
query_index1 = ID_NAME[ID_NAME.MAL_ID == 1].index[0]
anime_data.iloc[[query_index1]]

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Members
0,1,Cowboy Bebop,8.78,"[Action, Adventure, Comedy, Drama, Sci-Fi, Space]",TV,26,Spring 1998,[Sunrise],Original,R - 17+ (violence & profanity),1251960


In [8]:
get_recommended(anime_metadata, query_index1, 5)

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Members
2656,2904,Code Geass: Hangyaku no Lelouch R2,8.91,"[Action, Military, Sci-Fi, Super Power, Drama, Mecha]",TV,25,Spring 2008,[Sunrise],Original,R - 17+ (violence & profanity),1268320
1431,1575,Code Geass: Hangyaku no Lelouch,8.72,"[Action, Military, Sci-Fi, Super Power, Drama, Mecha, School]",TV,25,Fall 2006,[Sunrise],Original,R - 17+ (violence & profanity),1583882
2368,2581,Mobile Suit Gundam 00,8.14,"[Action, Military, Sci-Fi, Space, Drama, Mecha]",TV,25,Fall 2007,[Sunrise],Original,R - 17+ (violence & profanity),183997
10880,31251,Mobile Suit Gundam: Iron-Blooded Orphans,8.02,"[Action, Drama, Mecha, Sci-Fi, Space]",TV,25,Fall 2015,[Sunrise],Original,R - 17+ (violence & profanity),165393


In [9]:
query_index2 = ID_NAME[ID_NAME.MAL_ID == 7].index[0]
anime_data.iloc[[query_index2]]

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Members
3,7,Witch Hunter Robin,7.27,"[Action, Mystery, Police, Supernatural, Drama, Magic]",TV,26,Summer 2002,[Sunrise],Original,PG-13 - Teens 13 or older,94683


In [10]:
get_recommended(anime_metadata, query_index2, 5)

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Members
663,721,Princess Tutu,8.15,"[Comedy, Drama, Fantasy, Magic, Mystery, Romance]",TV,38,Summer 2002,[Hal Film Maker],Original,PG-13 - Teens 13 or older,129950
5988,9941,Tiger & Bunny,7.94,"[Action, Mystery, Comedy, Super Power]",TV,25,Spring 2011,[Sunrise],Original,PG-13 - Teens 13 or older,182872
64,85,Mobile Suit Zeta Gundam,7.92,"[Military, Sci-Fi, Space, Drama, Romance, Mecha]",TV,50,Spring 1985,[Sunrise],Original,PG-13 - Teens 13 or older,53822
59,80,Mobile Suit Gundam,7.78,"[Action, Military, Sci-Fi, Space, Mecha]",TV,43,Spring 1979,[Sunrise],Original,PG-13 - Teens 13 or older,97216


In [11]:
query_index3 = ID_NAME[ID_NAME.MAL_ID == 100].index[0]
anime_data.iloc[[query_index3]]

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Members
79,100,Shin Shirayuki-hime Densetsu Prétear,7.2,"[Comedy, Drama, Fantasy, Magic, Romance, Shoujo, Super Power]",TV,13,Spring 2001,[Hal Film Maker],Manga,PG-13 - Teens 13 or older,53508


In [12]:
get_recommended(anime_metadata, query_index3, 5)

Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Members
3793,4722,Skip Beat!,8.13,"[Comedy, Drama, Romance, Shoujo]",TV,25,Fall 2008,[Hal Film Maker],Manga,PG-13 - Teens 13 or older,219475
229,253,Jungle wa Itsumo Hare nochi Guu,7.84,"[Slice of Life, Comedy, School]",TV,26,Spring 2001,[Shin-Ei Animation],Manga,PG-13 - Teens 13 or older,29653
1568,1728,Super GALS! Kotobuki Ran,7.42,"[Comedy, Slice of Life, Shoujo]",TV,52,Spring 2001,[Studio Pierrot],Manga,PG-13 - Teens 13 or older,15854
1834,2014,Taiho Shichau zo: Second Season,7.41,"[Action, Comedy, Police, Shounen]",TV,26,Spring 2001,[Studio Deen],Manga,PG-13 - Teens 13 or older,8412


In [19]:
query_index3 = ID_NAME[ID_NAME.Name=='Boku no Pico'].index[0]
print(anime_data.iloc[[query_index3]])
get_recommended(anime_metadata, query_index3, 5)

      MAL_ID          Name  Score           Genres Type  Episodes Premiered  \
1490    1639  Boku no Pico   4.37  [Hentai,  Yaoi]  OVA         1   Unknown   

                     Studios    Source       Rating  Members  
1490  [Sugar Boy,  Blue Cat]  Original  Rx - Hentai   137560  


Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes,Premiered,Studios,Source,Rating,Members
4113,5391,Pico to Chico,4.49,"[Hentai, Yaoi]",OVA,1,Unknown,"[Sugar Boy, Blue Cat]",Original,Rx - Hentai,58893
3862,4866,Pico x CoCo x Chico,4.46,"[Hentai, Yaoi]",OVA,1,Unknown,"[Sugar Boy, Blue Cat]",Original,Rx - Hentai,55155
2728,2990,Mini Skirt Gakuen,4.54,[Hentai],OVA,1,Unknown,[],Original,Rx - Hentai,1270
3114,3541,Kouin Tenshi: Haitoku no Lycéenne,4.91,[Hentai],OVA,1,Unknown,[],Original,Rx - Hentai,1106
