In [1]:
import pandas as pd
from collections import defaultdict
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import apriori, association_rules 

# 1. Frequent patterns on anime

In [2]:
anime_selected = pd.read_csv("./Data/animeList_selected.csv", index_col=0)
anime_selected = anime_selected.reset_index(drop=True)
anime_selected.head(5)
username_and_anime_id = anime_selected.drop(columns=["my_watched_episodes", "my_score", "my_status", "my_rewatching", "my_rewatching_ep", "my_tags"])
list_anime_id = anime_selected['anime_id'].unique().tolist()


In [3]:
userToAnimeList = defaultdict(list)

for idx, row in tqdm(username_and_anime_id.iterrows()):
    userToAnimeList[row["username"]].append(row["anime_id"])
    userToAnimeList[row["username"]].sort()
#Dictionary: Key as username, value as list of anime id


0it [00:00, ?it/s]

7433047it [10:46, 11492.12it/s]


In [None]:
frequency_matrix = defaultdict(list)
for id in tqdm(list_anime_id):
    for v in userToAnimeList.values():
        if id in v:
            frequency_matrix[id].append(1)
        else:
            frequency_matrix[id].append(0)

100%|██████████| 200/200 [00:24<00:00,  8.13it/s]


In [5]:
anime_cleaned = pd.read_csv("./Data/anime_cleaned.csv", index_col=0)
anime_cleaned.head(5)
id_to_title = {}
for idx, row in tqdm(anime_cleaned.iterrows()):
    id_to_title[idx] = row["title"]
print(id_to_title)

6668it [00:00, 11606.48it/s]

{11013: 'Inu x Boku SS', 2104: 'Seto no Hanayome', 5262: 'Shugo Chara!! Doki', 721: 'Princess Tutu', 12365: 'Bakuman. 3rd Season', 6586: 'Yume-iro Pâtissière', 178: 'Ultra Maniac', 2787: 'Shakugan no Shana II (Second)', 4477: 'Nodame Cantabile: Paris-hen', 853: 'Ouran Koukou Host Club', 4814: 'Junjou Romantica 2', 7054: 'Kaichou wa Maid-sama!', 11123: 'Sekaiichi Hatsukoi 2', 14227: 'Tonari no Kaibutsu-kun', 269: 'Bleach', 59: 'Chobits', 6045: 'Kimi ni Todoke', 1735: 'Naruto: Shippuuden', 210: 'Ranma ½', 4224: 'Toradora!', 10030: 'Bakuman. 2nd Season', 74: 'Gakuen Alice', 4722: 'Skip Beat!', 14397: 'Chihayafuru 2', 1557: 'Shounen Onmyouji', 10800: 'Chihayafuru', 3731: 'Itazura na Kiss', 9513: 'Beelzebub', 5835: 'Hanasakeru Seishounen', 9863: 'SKET Dance', 7817: 'B-gata H-kei', 966: 'Crayon Shin-chan', 120: 'Fruits Basket', 957: 'Saiunkoku Monogatari', 21: 'One Piece', 1974: 'Glass no Kamen (2005)', 857: 'Air Gear', 1914: 'Saiunkoku Monogatari 2nd Season', 249: 'InuYasha', 6645: 'Chu-Bra




In [None]:
#Now, we have the frequency matrix for 200 animes
frequency_matrix_named = {}
for k, v in frequency_matrix.items():
    frequency_matrix_named[id_to_title[k]] = v
frequency_df = pd.DataFrame.from_dict(frequency_matrix_named)
frequency_df

Unnamed: 0,One Piece,Chobits,Fruits Basket,InuYasha,Bleach,Ouran Koukou Host Club,Air Gear,Naruto: Shippuuden,Seto no Hanayome,Shakugan no Shana II (Second),...,Gekkan Shoujo Nozaki-kun,Ansatsu Kyoushitsu,Overlord,Kono Subarashii Sekai ni Shukufuku wo!,Kokoro Connect,Mahouka Koukou no Rettousei,Shokugeki no Souma,Koe no Katachi,Nichijou,Watashi ga Motenai no wa Dou Kangaetemo Omaera ga Warui!
0,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
3,1,1,0,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
4,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108291,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
108292,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
108293,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
108294,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
results = apriori(frequency_df, min_support=0.35, use_colnames=True, max_len=4, verbose=1, low_memory=True)
results = association_rules(results, metric = "lift", min_threshold = 0.05)
results = results.sort_values(by = "lift", ascending = False)



Processing 5 combinations | Sampling itemset size 4 3


In [None]:
#1. Normally, people will watch by sequences of animeS
results

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
534,"(Clannad, Toradora!)",(Clannad: After Story),0.433275,0.438760,0.350364,0.808640,1.843010,0.160260,2.932897,0.807111
535,(Clannad: After Story),"(Clannad, Toradora!)",0.438760,0.433275,0.350364,0.798531,1.843010,0.160260,2.812962,0.814998
536,(Clannad),"(Clannad: After Story, Toradora!)",0.551747,0.358656,0.350364,0.635008,1.770522,0.152476,1.757146,0.970869
533,"(Clannad: After Story, Toradora!)",(Clannad),0.358656,0.551747,0.350364,0.976880,1.770522,0.152476,19.388187,0.678567
725,"(Death Note, Clannad)",(Clannad: After Story),0.458410,0.438760,0.355701,0.775945,1.768493,0.154569,2.504916,0.802354
...,...,...,...,...,...,...,...,...,...,...
98,(Angel Beats!),(Naruto),0.572145,0.583872,0.363928,0.636078,1.089413,0.029869,1.143452,0.191827
23,(Bleach),(Shingeki no Kyojin),0.561304,0.575534,0.351740,0.626647,1.088810,0.028690,1.136904,0.185930
22,(Shingeki no Kyojin),(Bleach),0.575534,0.561304,0.351740,0.611154,1.088810,0.028690,1.128199,0.192163
5,(Bleach),(Toradora!),0.561304,0.584306,0.354390,0.631369,1.080544,0.026416,1.127668,0.169914


In [None]:
# 2. 
temp_results = results[results["antecedents"].apply(lambda x: all("Clannad" not in item for item in list(x)))]
temp_results

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
976,"(Death Note, Code Geass: Hangyaku no Lelouch R2)","(Code Geass: Hangyaku no Lelouch, Fullmetal Al...",0.451910,0.456065,0.355507,0.786678,1.724925,0.149407,2.549826,0.766780
977,"(Code Geass: Hangyaku no Lelouch, Fullmetal Al...","(Death Note, Code Geass: Hangyaku no Lelouch R2)",0.456065,0.451910,0.355507,0.779510,1.724925,0.149407,2.485784,0.772637
414,(Sword Art Online II),(Sword Art Online),0.360263,0.577196,0.357945,0.993567,1.721368,0.150003,65.719938,0.655061
415,(Sword Art Online),(Sword Art Online II),0.577196,0.360263,0.357945,0.620145,1.721368,0.150003,1.684160,0.991160
445,(Naruto: Shippuuden),"(Bleach, Naruto)",0.472215,0.440764,0.357723,0.757543,1.718704,0.149588,2.306538,0.792304
...,...,...,...,...,...,...,...,...,...,...
98,(Angel Beats!),(Naruto),0.572145,0.583872,0.363928,0.636078,1.089413,0.029869,1.143452,0.191827
23,(Bleach),(Shingeki no Kyojin),0.561304,0.575534,0.351740,0.626647,1.088810,0.028690,1.136904,0.185930
22,(Shingeki no Kyojin),(Bleach),0.575534,0.561304,0.351740,0.611154,1.088810,0.028690,1.128199,0.192163
5,(Bleach),(Toradora!),0.561304,0.584306,0.354390,0.631369,1.080544,0.026416,1.127668,0.169914


In [None]:
temp_results = temp_results[temp_results["antecedents"].apply(lambda x: all("Code Geass" not in item for item in list(x)))]
temp_results


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
414,(Sword Art Online II),(Sword Art Online),0.360263,0.577196,0.357945,0.993567,1.721368,0.150003,65.719938,0.655061
415,(Sword Art Online),(Sword Art Online II),0.577196,0.360263,0.357945,0.620145,1.721368,0.150003,1.684160,0.991160
445,(Naruto: Shippuuden),"(Bleach, Naruto)",0.472215,0.440764,0.357723,0.757543,1.718704,0.149588,2.306538,0.792304
444,"(Bleach, Naruto)",(Naruto: Shippuuden),0.440764,0.472215,0.357723,0.811598,1.718704,0.149588,2.801374,0.747746
479,"(Death Note, Naruto)",(Naruto: Shippuuden),0.499806,0.472215,0.390799,0.781902,1.655817,0.154783,2.419942,0.791830
...,...,...,...,...,...,...,...,...,...,...
98,(Angel Beats!),(Naruto),0.572145,0.583872,0.363928,0.636078,1.089413,0.029869,1.143452,0.191827
23,(Bleach),(Shingeki no Kyojin),0.561304,0.575534,0.351740,0.626647,1.088810,0.028690,1.136904,0.185930
22,(Shingeki no Kyojin),(Bleach),0.575534,0.561304,0.351740,0.611154,1.088810,0.028690,1.128199,0.192163
5,(Bleach),(Toradora!),0.561304,0.584306,0.354390,0.631369,1.080544,0.026416,1.127668,0.169914


In [None]:
temp_results = temp_results[temp_results["antecedents"].apply(lambda x: all("Code Geass" not in item for item in list(x)))]
temp_results.head(10)   

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
414,(Sword Art Online II),(Sword Art Online),0.360263,0.577196,0.357945,0.993567,1.721368,0.150003,65.719938,0.655061
415,(Sword Art Online),(Sword Art Online II),0.577196,0.360263,0.357945,0.620145,1.721368,0.150003,1.68416,0.99116
445,(Naruto: Shippuuden),"(Bleach, Naruto)",0.472215,0.440764,0.357723,0.757543,1.718704,0.149588,2.306538,0.792304
444,"(Bleach, Naruto)",(Naruto: Shippuuden),0.440764,0.472215,0.357723,0.811598,1.718704,0.149588,2.801374,0.747746
479,"(Death Note, Naruto)",(Naruto: Shippuuden),0.499806,0.472215,0.390799,0.781902,1.655817,0.154783,2.419942,0.79183
482,(Naruto: Shippuuden),"(Death Note, Naruto)",0.472215,0.499806,0.390799,0.827588,1.655817,0.154783,2.901147,0.750436
393,(Another),(Mirai Nikki (TV)),0.452417,0.499215,0.371269,0.820635,1.64385,0.145416,2.791984,0.715275
392,(Mirai Nikki (TV)),(Another),0.499215,0.452417,0.371269,0.743706,1.64385,0.145416,2.136544,0.782116
924,"(Steins;Gate, Sword Art Online)",(Mirai Nikki (TV)),0.428381,0.499215,0.351029,0.81943,1.641437,0.137174,2.773357,0.683634
925,(Mirai Nikki (TV)),"(Steins;Gate, Sword Art Online)",0.499215,0.428381,0.351029,0.703161,1.641437,0.137174,1.925686,0.78033


In [None]:
anime_cleaned["genre"]

anime_id
11013      Comedy, Supernatural, Romance, Shounen
2104     Comedy, Parody, Romance, School, Shounen
5262                Comedy, Magic, School, Shoujo
721        Comedy, Drama, Magic, Romance, Fantasy
12365             Comedy, Drama, Romance, Shounen
                           ...                   
37405                                      Hentai
37886               Slice of Life, Drama, Romance
37255                                 Music, Kids
35229                                        Kids
36315                                        Kids
Name: genre, Length: 6668, dtype: object

# 2. Frequent patterns on genre

In [6]:
# anime_cleaned['anime_id'] = anime_cleaned.index
# anime_cleaned.reset_index(inplace=True)
user_genre = username_and_anime_id.merge(anime_cleaned, how='left', on='anime_id')
user_genre = user_genre[["username", "genre"]]

In [7]:
all_genres = set()
for item in tqdm(user_genre["genre"].to_list()):
    all_genres.update(set(item.split(', ')))
all_genres = list(all_genres)
print(all_genres)

100%|██████████| 7433047/7433047 [00:12<00:00, 610225.41it/s]

['Comedy', 'Ecchi', 'Demons', 'Harem', 'Fantasy', 'Romance', 'Shoujo', 'Vampire', 'Police', 'Mecha', 'Music', 'Sports', 'Thriller', 'Supernatural', 'Adventure', 'Action', 'Space', 'Dementia', 'Magic', 'Game', 'Samurai', 'Parody', 'Mystery', 'Military', 'Sci-Fi', 'Seinen', 'Martial Arts', 'Super Power', 'Psychological', 'Kids', 'Historical', 'Shounen', 'Drama', 'Horror', 'Slice of Life', 'Josei', 'School']





In [8]:
genre_dict = {genre: user_genre["genre"].apply(lambda x: genre in x.split(', ')).values.tolist() for genre in tqdm(all_genres)}

genre_df = pd.DataFrame(genre_dict)

100%|██████████| 37/37 [02:40<00:00,  4.35s/it]


In [11]:
results = apriori(genre_df, min_support=0.1, use_colnames=True, max_len=4, verbose=1, low_memory=True)
results = association_rules(results, metric = "lift", min_threshold = 0.05)
results = results.sort_values(by = "lift", ascending = False)

Processing 24 combinations | Sampling itemset size 3


In [12]:
results

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
75,(Adventure),"(Fantasy, Action)",0.244890,0.175682,0.109959,0.449013,2.555834,0.066936,1.496076,0.806159
70,"(Fantasy, Action)",(Adventure),0.175682,0.244890,0.109959,0.625899,2.555834,0.066936,2.018466,0.738475
62,(Adventure),"(Comedy, Action)",0.244890,0.221647,0.130658,0.533535,2.407142,0.076378,1.668621,0.774152
59,"(Comedy, Action)",(Adventure),0.221647,0.244890,0.130658,0.589486,2.407142,0.076378,1.839424,0.751034
19,(Adventure),(Fantasy),0.244890,0.249535,0.144909,0.591729,2.371323,0.083800,1.838154,0.765842
...,...,...,...,...,...,...,...,...,...,...
48,(Drama),(Action),0.343222,0.492132,0.138317,0.402997,0.818879,-0.030593,0.850695,-0.251927
5,(Supernatural),(Comedy),0.361625,0.487764,0.126160,0.348869,0.715243,-0.050228,0.786688,-0.384106
4,(Comedy),(Supernatural),0.487764,0.361625,0.126160,0.258650,0.715243,-0.050228,0.861098,-0.437327
13,(Drama),(Comedy),0.343222,0.487764,0.119225,0.347369,0.712167,-0.048187,0.784879,-0.380949
