In [1]:
import pandas as pd
from fuzzywuzzy import fuzz
from sklearn.neighbors import NearestNeighbors
import numpy as np
import time
anime = pd.read_csv('anime.csv')
anime = anime.dropna()
print(anime.shape)
anime.head()

(12017, 7)


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [2]:
genre_vector = anime.genre.values
genre = []
for i in range(genre_vector.shape[0]):
    try:
        genre.append('|'.join(i.strip()for i in genre_vector[i].split(',')))
    except:
        genre.append('')
anime.genre=genre

In [3]:
list_genres=list(filter(None, list(set('|'.join(anime.genre.values.tolist()).split('|')))))
list_genres

['School',
 'Super Power',
 'Yuri',
 'Parody',
 'Game',
 'Samurai',
 'Music',
 'Fantasy',
 'Ecchi',
 'Comedy',
 'Slice of Life',
 'Space',
 'Thriller',
 'Mecha',
 'Psychological',
 'Historical',
 'Police',
 'Cars',
 'Dementia',
 'Sci-Fi',
 'Mystery',
 'Vampire',
 'Magic',
 'Drama',
 'Josei',
 'Yaoi',
 'Shounen Ai',
 'Shoujo Ai',
 'Shoujo',
 'Hentai',
 'Adventure',
 'Romance',
 'Demons',
 'Martial Arts',
 'Action',
 'Supernatural',
 'Shounen',
 'Military',
 'Horror',
 'Seinen',
 'Sports',
 'Harem',
 'Kids']

In [4]:
list_types = anime.type.unique().tolist()
list_types

['Movie', 'TV', 'OVA', 'Special', 'Music', 'ONA']

![alt text](CodeCogsEqn.png)

In [5]:
gaussian_filter_2d = lambda x,y,sigma: np.exp((-x**2 -y**2)/(2 * sigma ** 2))

In [6]:
def add_dimension(df, variables, columns):
    shape_len = df.shape[1]
    df = np.hstack([df, np.zeros((df.shape[0], len(variables)))])
    for i in columns:
        for k in range(df.shape[0]):
            if not df[k,i]:
                continue
            try:
                for s in df[k,i].split('|'):
                    if s in variables:
                        df[k,shape_len+variables.index(s)] += 1
            except Exception as e:
                print(e)
                continue
    return df

In [7]:
def recommend(chosen_index, count_neigh):
    df_copy = anime.copy().values   
    variables = list_genres + list_types
    df_copy = add_dimension(df_copy, variables, [2, 3])
    df_copy = df_copy[:,anime.shape[1]:].astype('float')
    df_copy = df_copy[~np.isnan(df_copy).any(axis=1)]
    neighbors = NearestNeighbors(n_neighbors=count_neigh+1, algorithm='auto', metric='sqeuclidean').fit(df_copy)
    xtest = df_copy[chosen_index, :]
    xtest = xtest.reshape(1, -1)
    distances, indices = neighbors.kneighbors(xtest)
    results = indices[0][:].tolist()
    try:
        del results[results.index(chosen_index)]
    except:
        results=results[:count_neigh]
        pass
    return results

In [8]:
def sequel(title_1, title_2):    
    if fuzz.ratio(title_1, title_2) > 60 or fuzz.token_set_ratio(title_1, title_2) > 60:
        return True
    else:
        return False

def sort_distribution(title_main, std_users, title, score, user):
    if score:
        factor_e = gaussian_filter_2d(score, user, std_users)
    else:
        factor_e = -999999999
    if sequel(title_main, title):
        gaussian = 0
    else:
        gaussian = (1 / (np.pi * 2 * std_users ** 2)) * factor_e
    return gaussian

In [9]:
def extract_ratings(chosen_index, list_films, count_neigh):
    parametres_anime = ['_' for _ in range(count_neigh)]
    users = []
    for i, index in enumerate(list_films):
        parametres_anime[i] = anime[['name','rating','members']].iloc[index].tolist()
        parametres_anime[i].append(index)
        users.append(parametres_anime[i][2])
    
    title_main = anime['name'].iloc[chosen_index]
    users.append(anime['members'].iloc[chosen_index])
    std_users = np.std(users)
    results_gaussian = [sort_distribution(title_main,std_users,x[0],x[1],x[2]) for x in parametres_anime]
    results = np.array(parametres_anime)[(np.array(results_gaussian).argsort())]
    return results[:,-1].tolist()

In [10]:
dragonball_index = np.argmax([fuzz.ratio('dragon ball z', i) for i in anime.name.values])
print(dragonball_index)
suggest_number = 10
print('random selected:')
print(anime.iloc[dragonball_index,:])
recommended = recommend(dragonball_index, suggest_number)
sorted_recommended = np.array(extract_ratings(dragonball_index,recommended,suggest_number)).astype('int')

206
random selected:
anime_id                                                  813
name                                            Dragon Ball Z
genre       Action|Adventure|Comedy|Fantasy|Martial Arts|S...
type                                                       TV
episodes                                                  291
rating                                                   8.32
members                                                375662
Name: 206, dtype: object


## without sorting

In [11]:
anime.iloc[recommended, :]

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
588,6033,Dragon Ball Kai,Action|Adventure|Comedy|Fantasy|Martial Arts|S...,TV,97,7.95,116832
1930,30694,Dragon Ball Super,Action|Adventure|Comedy|Fantasy|Martial Arts|S...,TV,Unknown,7.4,111443
515,22777,Dragon Ball Kai (2014),Action|Adventure|Comedy|Fantasy|Martial Arts|S...,TV,61,8.01,42666
346,223,Dragon Ball,Adventure|Comedy|Fantasy|Martial Arts|Shounen|...,TV,153,8.16,316102
1796,238,Rekka no Honoo,Action|Adventure|Martial Arts|Shounen|Super Power,TV,42,7.44,35258
4701,4427,Tatakae!! Ramenman,Action|Adventure|Comedy|Martial Arts|Shounen,TV,35,6.66,431
4275,6714,Dragon Ball Z: Atsumare! Gokuu World,Action|Adventure|Comedy|Fantasy|Martial Arts|S...,OVA,1,6.76,10044
841,20,Naruto,Action|Comedy|Martial Arts|Shounen|Super Power,TV,220,7.81,683297
4595,11385,Digimon Xros Wars: Toki wo Kakeru Shounen Hunt...,Action|Adventure|Comedy|Fantasy|Shounen,TV,25,6.68,11963
3203,22695,Dragon Ball Z: Summer Vacation Special,Action|Adventure|Comedy|Fantasy|Martial Arts|S...,Special,1,7.05,4251


## with gaussian sorting

In [12]:
anime.iloc[sorted_recommended,:]

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
588,6033,Dragon Ball Kai,Action|Adventure|Comedy|Fantasy|Martial Arts|S...,TV,97,7.95,116832
1930,30694,Dragon Ball Super,Action|Adventure|Comedy|Fantasy|Martial Arts|S...,TV,Unknown,7.4,111443
515,22777,Dragon Ball Kai (2014),Action|Adventure|Comedy|Fantasy|Martial Arts|S...,TV,61,8.01,42666
346,223,Dragon Ball,Adventure|Comedy|Fantasy|Martial Arts|Shounen|...,TV,153,8.16,316102
4275,6714,Dragon Ball Z: Atsumare! Gokuu World,Action|Adventure|Comedy|Fantasy|Martial Arts|S...,OVA,1,6.76,10044
3203,22695,Dragon Ball Z: Summer Vacation Special,Action|Adventure|Comedy|Fantasy|Martial Arts|S...,Special,1,7.05,4251
841,20,Naruto,Action|Comedy|Martial Arts|Shounen|Super Power,TV,220,7.81,683297
1796,238,Rekka no Honoo,Action|Adventure|Martial Arts|Shounen|Super Power,TV,42,7.44,35258
4595,11385,Digimon Xros Wars: Toki wo Kakeru Shounen Hunt...,Action|Adventure|Comedy|Fantasy|Shounen,TV,25,6.68,11963
4701,4427,Tatakae!! Ramenman,Action|Adventure|Comedy|Martial Arts|Shounen,TV,35,6.66,431


In [13]:
outputs, i = [], 0
while len(outputs) < suggest_number:
    recommended = recommend(dragonball_index, suggest_number * (i+2))
    if not sequel(anime.name.iloc[dragonball_index], anime.name.iloc[recommended[i]]) and recommended[i] not in outputs:
        outputs.append(recommended[i])
    i += 1
sorted_recommended = np.array(extract_ratings(dragonball_index,outputs,suggest_number)).astype('int')

## remove sequel

In [14]:
anime.iloc[sorted_recommended,:]

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
841,20,Naruto,Action|Comedy|Martial Arts|Shounen|Super Power,TV,220,7.81,683297
615,1735,Naruto: Shippuuden,Action|Comedy|Martial Arts|Shounen|Super Power,TV,Unknown,7.94,533578
74,21,One Piece,Action|Adventure|Comedy|Drama|Fantasy|Shounen|...,TV,Unknown,8.58,504862
989,10033,Toriko,Action|Adventure|Comedy|Fantasy|Shounen,TV,147,7.74,54790
2723,461,One Piece Movie 3: Chinjuu-jima no Chopper Oukoku,Action|Adventure|Comedy|Fantasy|Shounen|Super ...,Movie,1,7.18,36558
1796,238,Rekka no Honoo,Action|Adventure|Martial Arts|Shounen|Super Power,TV,42,7.44,35258
4960,12929,Saint Seiya Omega,Action|Adventure|Fantasy|Shounen,TV,97,6.59,22684
2913,2335,Kenyuu Densetsu Yaiba,Action|Comedy|Fantasy|Martial Arts|Shounen,TV,52,7.13,5158
2934,961,Virtua Fighter,Action|Adventure|Comedy|Martial Arts|Shounen,TV,35,7.13,3875
9673,27943,Nano Invaders,Action|Adventure|Shounen|Super Power,TV,52,7.08,519


In [15]:
def suggest_me_anime(keyword, suggest_count, remove_sequel=True):
    lasttime = time.time()
    your_index = np.argmax([fuzz.ratio(keyword, i) for i in anime.name.values])
    print('your selection:')
    print(anime.iloc[your_index,:])
    if remove_sequel:
        outputs, i = [], 0
        while len(outputs) < suggest_count:
            recommended = recommend(your_index, suggest_count * (i+2))
            if not sequel(anime.name.iloc[your_index], anime.name.iloc[recommended[i]]) and recommended[i] not in outputs:
                outputs.append(recommended[i])
            i += 1
        results = np.array(extract_ratings(your_index,outputs,suggest_count)).astype('int')
    else:
        recommended = recommend(your_index, suggest_count)
        results = np.array(extract_ratings(your_index,recommended,suggest_count)).astype('int')
    print('time taken to suggest:', time.time()-lasttime, 'seconds')
    return results

In [16]:
anime.iloc[suggest_me_anime('kimi no nawa', 20, remove_sequel=True),:]

your selection:
anime_id                                32281
name                           Kimi no Na wa.
genre       Drama|Romance|School|Supernatural
type                                    Movie
episodes                                    1
rating                                   9.37
members                                200630
Name: 0, dtype: object
time taken to suggest: 2.4941344261169434 seconds


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
60,10408,Hotarubi no Mori e,Drama|Romance|Shoujo|Supernatural,Movie,1,8.61,197439
2103,1723,Clannad Movie,Drama|Fantasy|Romance|School,Movie,1,7.35,99506
1959,713,Air Movie,Drama|Romance|Supernatural,Movie,1,7.39,44179
1697,31245,Zutto Mae kara Suki deshita.: Kokuhaku Jikkou ...,Romance|School,Movie,1,7.47,35058
1111,14669,Aura: Maryuuin Kouga Saigo no Tatakai,Comedy|Drama|Romance|School|Supernatural,Movie,1,7.67,22599
3530,9988,Otome wa Boku ni Koishiteru: Futari no Elder,Drama|Romance|School,OVA,3,6.96,10579
6394,546,Wind: A Breath of Heart (TV),Drama|Romance|School|Supernatural,TV,13,6.14,7778
6315,22847,Kara no Kyoukai: Mirai Fukuin - Manner Movie,Drama,Movie,1,6.17,4260
3914,17585,Myself ; Yourself Specials,Drama|Romance|School,Special,2,6.85,4128
3908,18053,Koi to Senkyo to Chocolate: Ikenai Hazuki-sensei,Drama|Romance|School,Special,1,6.85,3526


In [17]:
anime.iloc[suggest_me_anime('doraemon', 20, remove_sequel=True),:]

your selection:
anime_id                                      501
name                                     Doraemon
genre       Adventure|Comedy|Fantasy|Kids|Shounen
type                                           TV
episodes                                       26
rating                                       7.49
members                                      9366
Name: 1587, dtype: object
time taken to suggest: 2.4843499660491943 seconds


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
998,1606,Kekkaishi,Adventure|Comedy|Fantasy|Shounen,TV,52,7.73,61884
2057,4028,Slayers Revolution,Adventure|Comedy|Fantasy,TV,13,7.37,23679
3454,2363,Pokemon Housoukyoku,Adventure|Comedy|Fantasy|Kids,TV,16,6.99,15461
5906,445,Tales of Eternia The Animation,Adventure|Comedy|Fantasy|Romance|Shounen,TV,13,6.32,11582
5583,33314,Digimon Universe: Appli Monsters,Action|Adventure|Comedy|Fantasy|Kids,TV,Unknown,6.41,3676
2031,2813,Chiisana Ahiru no Ooki na Ai no Monogatari: Ah...,Adventure|Comedy|Drama|Fantasy|Kids,TV,52,7.37,3363
1230,4060,Grimm Masterpiece Theater,Adventure|Comedy|Fantasy|Kids|Magic,TV,24,7.62,1938
4885,1960,Sore Ike! Anpanman,Comedy|Fantasy|Kids,TV,Unknown,6.61,940
3351,2578,TaoTao Ehonkan Sekai Doubutsu Banashi,Adventure|Comedy|Fantasy|Kids,TV,52,7.02,656
5118,1872,Saru Getchu: On Air,Adventure|Comedy|Fantasy|Kids|Romance|Shounen,TV,26,6.55,632


In [18]:
anime.iloc[suggest_me_anime('doraemon', 20, remove_sequel=False),:]

your selection:
anime_id                                      501
name                                     Doraemon
genre       Adventure|Comedy|Fantasy|Kids|Shounen
type                                           TV
episodes                                       26
rating                                       7.49
members                                      9366
Name: 1587, dtype: object
time taken to suggest: 0.13530182838439941 seconds


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
4107,2653,Doraemon: Ganbare! Gian!!,Adventure|Comedy|Fantasy|Kids|Shounen,Movie,1,6.8,524
929,2471,Doraemon (1979),Adventure|Comedy|Fantasy|Kids|Sci-Fi|Shounen,TV,1787,7.76,14233
998,1606,Kekkaishi,Adventure|Comedy|Fantasy|Shounen,TV,52,7.73,61884
1508,19291,Pokemon XY,Action|Adventure|Comedy|Fantasy|Kids,TV,93,7.52,30722
3454,2363,Pokemon Housoukyoku,Adventure|Comedy|Fantasy|Kids,TV,16,6.99,15461
4878,17873,Pokemon Best Wishes! Season 2: Decolora Adventure,Action|Adventure|Comedy|Fantasy|Kids,TV,20,6.61,12499
6587,1098,Samurai Girl Real Bout High School,Adventure|Comedy|Fantasy|Shounen,TV,13,6.05,6689
5673,2789,B-Densetsu! Battle Bedaman,Adventure|Fantasy|Kids|Shounen,TV,52,6.38,6545
5347,4667,B-Densetsu! Battle Bedaman Enkon,Adventure|Comedy|Kids|Shounen,TV,51,6.48,2512
1230,4060,Grimm Masterpiece Theater,Adventure|Comedy|Fantasy|Kids|Magic,TV,24,7.62,1938
