# [Manga and Anime Dataset](https://www.kaggle.com/datasets/duongtruongbinh/manga-and-anime-dataset)

Source: MyAnimeList

In [4]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt


In [5]:
anime_data = pd.read_csv('data/anime.csv')
manga_data = pd.read_csv('data/manga.csv')

# Data Cleaning
# replacing obj values as int in manga.csv
manga_data['Members'] = manga_data['Members'].replace(',', '', regex=True).astype(int)
manga_data['Favorite'] = manga_data['Favorite'].replace(',', '', regex=True).astype(int)

# Display dfs
anime_data.drop_duplicates(subset=['Title', 'Score', 'Vote', 'Popularity'], inplace=True)
print('Anime df')
display(anime_data.head())
print('Manga df:')
display(manga_data.head())


Anime df


Unnamed: 0,Title,Score,Vote,Ranked,Popularity,Episodes,Status,Aired,Premiered,Producers,Licensors,Studios,Source,Duration,Rating
0,Sousou no FrierenFrieren: Beyond Journey's End,9.14,128768,1,508,28,Currently Airing,"Sep 29, 2023 to Mar 2024",Fall 2023,"['Aniplex', 'Dentsu', 'Shogakukan-Shueisha Pro...","None found, add some",Madhouse,Manga,24 min. per ep.,PG-13 - Teens 13 or older
1,Fullmetal Alchemist: Brotherhood,9.09,2080863,2,3,64,Finished Airing,"Apr 5, 2009 to Jul 4, 2010",Spring 2009,"['Aniplex', 'Square Enix', 'Mainichi Broadcast...","Funimation, Aniplex of America",Bones,Manga,24 min. per ep.,R - 17+ (violence & profanity)
2,Steins;Gate,9.07,1375512,3,13,24,Finished Airing,"Apr 6, 2011 to Sep 14, 2011",Spring 2011,"['Frontier Works', 'Media Factory', 'Kadokawa ...",Funimation,White Fox,Visual novel,24 min. per ep.,PG-13 - Teens 13 or older
3,Gintama°Gintama Season 4,9.06,246431,4,337,51,Finished Airing,"Apr 8, 2015 to Mar 30, 2016",Spring 2015,"['TV Tokyo', 'Aniplex', 'Dentsu']","Funimation, Crunchyroll",Bandai Namco Pictures,Manga,24 min. per ep.,PG-13 - Teens 13 or older
4,Shingeki no Kyojin Season 3 Part 2Attack on Ti...,9.05,1545108,5,21,10,Finished Airing,"Apr 29, 2019 to Jul 1, 2019",Spring 2019,"['Production I.G', 'Dentsu', 'Mainichi Broadca...",Funimation,Wit Studio,Manga,23 min. per ep.,R - 17+ (violence & profanity)


Manga df:


Unnamed: 0,Title,Score,Vote,Ranked,Popularity,Members,Favorite,Volumes,Chapters,Status,Published,Genres,Themes,Demographics,Serialization,Author
0,Berserk,9.47,334154,1,1,670559,123574,Unknown,Unknown,Publishing,"Aug 25, 1989 to ?","['Action', 'Adventure', 'Award Winning', 'Dram...","['Gore', 'Military', 'Mythology', 'Psychologic...",['Seinen'],Young Animal,"Miura, Kentarou (Story & Art), Studio Gaga (Art)"
1,JoJo no Kimyou na Bouken Part 7: Steel Ball Run,9.3,157522,2,26,257957,43113,24,96,Finished,"Jan 19, 2004 to Apr 19, 2011","['Action', 'Adventure', 'Mystery', 'Supernatur...",[],"['Seinen', 'Shounen']",Ultra Jump,"Araki, Hirohiko (Story & Art)"
2,Vagabond,9.25,138009,3,15,368332,40575,37,327,On Hiatus,"Sep 3, 1998 to May 21, 2015","['Action', 'Adventure', 'Award Winning']","['Historical', 'Samurai']",['Seinen'],Morning,"Inoue, Takehiko (Story & Art), Yoshikawa, Eiji..."
3,One Piece,9.22,368951,4,3,603122,115123,Unknown,Unknown,Publishing,"Jul 22, 1997 to ?","['Action', 'Adventure', 'Fantasy']",[],['Shounen'],Shounen Jump (Weekly),"Oda, Eiichiro (Story & Art)"
4,Monster,9.15,94806,5,29,238291,20674,18,162,Finished,"Dec 5, 1994 to Dec 20, 2001","['Award Winning', 'Drama', 'Mystery']","['Adult Cast', 'Psychological']",['Seinen'],Big Comic Original,"Urasawa, Naoki (Story & Art)"


In [6]:
anime_data.dtypes

Title          object
Score         float64
Vote            int64
Ranked          int64
Popularity      int64
Episodes       object
Status         object
Aired          object
Premiered      object
Producers      object
Licensors      object
Studios        object
Source         object
Duration       object
Rating         object
dtype: object

## Anime df

### Most popular anime

In [7]:
# number of anime
len(anime_data)

7356

In [8]:
# ranking anime by weighted averages based on score and number of votes
weights = {'Score': 1/2, 'Vote': 1/2}

anime_stats = anime_data[['Title', 'Score', 'Vote']].set_index('Title')
anime_norm = (anime_stats - anime_stats.min()) / (anime_stats.max() - anime_stats.min())
anime_ranks = anime_norm['Score'] * weights['Score'] + anime_norm['Vote'] * weights['Vote'] # + anime_norm['Popularity'] * weights['Popularity']
anime_ranks = anime_ranks.sort_values(ascending=True).\
    reset_index()\
    .drop_duplicates(subset=['Title'])\
    .rename(columns={0:'score'})\
    .sort_values(by = 'score', ascending = False)\
    .reset_index()\
    .drop(['index'], axis = 1)
anime_ranks

Unnamed: 0,Title,score
0,Death Note,0.880055
1,Shingeki no KyojinAttack on Titan,0.871795
2,Fullmetal Alchemist: Brotherhood,0.868337
3,Hunter x Hunter (2011)Hunter x Hunter,0.791651
4,Kimi no Na wa.Your Name.,0.775676
...,...,...
4992,Crayon Shin-chan Movie 30: Mononoke Ninja Chin...,0.002190
4993,Meme Iroiro Yume no TabiThe Many Dream Journey...,0.002172
4994,Sue Cat,0.002159
4995,Hai! Akko desu,0.002158


In [9]:
#ranking anime
anime_ranks['rank'] = anime_ranks['score'].rank(ascending=False)
anime_ranks

Unnamed: 0,Title,score,rank
0,Death Note,0.880055,1.0
1,Shingeki no KyojinAttack on Titan,0.871795,2.0
2,Fullmetal Alchemist: Brotherhood,0.868337,3.0
3,Hunter x Hunter (2011)Hunter x Hunter,0.791651,4.0
4,Kimi no Na wa.Your Name.,0.775676,5.0
...,...,...,...
4992,Crayon Shin-chan Movie 30: Mononoke Ninja Chin...,0.002190,4993.0
4993,Meme Iroiro Yume no TabiThe Many Dream Journey...,0.002172,4994.0
4994,Sue Cat,0.002159,4995.0
4995,Hai! Akko desu,0.002158,4996.0


In [94]:
#save as csv fil
anime_ranks.to_csv('data/anime_ranks.csv')

### Studio Analysis

In [18]:
# number of unique studios
len(anime_data['Studios'].unique())

545

In [19]:
studios_votes = anime_data.groupby('Studios')['Vote'].sum().reset_index().sort_values(by = 'Vote')
# studios_popularity = anime_data.groupby('Studios')['Popularity'].mean().reset_index().sort_values(by = 'Popularity')
studios_avg_score = anime_data.groupby('Studios')['Score'].mean().reset_index()

# merging dfs
# df1_merge = pd.merge(studios_votes, studios_popularity, on='Studios', how='inner')
studios_stats = pd.merge(studios_avg_score, studios_votes, on='Studios', how = 'inner').sort_values(by = 'Score', ascending = False)
studios_stats

Unnamed: 0,Studios,Score,Vote
183,"K-Factory, Kitty Film Mitaka Studio",9.020,77708
476,"Toei Animation, DandeLion Animation Studio",8.810,9854
269,"OLM, TOHO animation STUDIO",8.800,95119
305,"Pierrot, Studio Signpost",8.775,61769
267,"OLM, P.I.C.S.",8.670,206103
...,...,...,...
251,Namu Animation,6.820,62692
281,"Ordet, SANZIGEN",6.810,175137
198,Knack Productions,6.810,21908
385,Staple Entertainment,6.810,212719


In [20]:
# ranking studios by weighted average based on score and vote
weights = {'Score': 1/2, 'Vote': 1/2}
studios_stats = studios_stats.set_index('Studios')
# normalizing values
studios_stats = (studios_stats - studios_stats.min()) / (studios_stats.max() - studios_stats.min())

In [21]:
rank = studios_stats['Score'] * weights['Score'] + studios_stats['Vote'] * weights['Vote'] #+ studios_stats['Popularity'] * weights['Popularity']
rank.sort_values(ascending=False).reset_index()\
    .rename(columns={0:'rank'})\
    .head(20)

Unnamed: 0,Studios,rank
0,A-1 Pictures,0.659726
1,Bones,0.644222
2,Madhouse,0.618768
3,J.C.Staff,0.533191
4,"K-Factory, Kitty Film Mitaka Studio",0.500962
5,Production I.G,0.460705
6,"Toei Animation, DandeLion Animation Studio",0.452609
7,"OLM, TOHO animation STUDIO",0.451404
8,Kyoto Animation,0.448835
9,"Pierrot, Studio Signpost",0.445334


## Manga df

In [22]:
manga_data.head()

Unnamed: 0,Title,Score,Vote,Ranked,Popularity,Members,Favorite,Volumes,Chapters,Status,Published,Genres,Themes,Demographics,Serialization,Author
0,Berserk,9.47,334154,1,1,670559,123574,Unknown,Unknown,Publishing,"Aug 25, 1989 to ?","['Action', 'Adventure', 'Award Winning', 'Dram...","['Gore', 'Military', 'Mythology', 'Psychologic...",['Seinen'],Young Animal,"Miura, Kentarou (Story & Art), Studio Gaga (Art)"
1,JoJo no Kimyou na Bouken Part 7: Steel Ball Run,9.3,157522,2,26,257957,43113,24,96,Finished,"Jan 19, 2004 to Apr 19, 2011","['Action', 'Adventure', 'Mystery', 'Supernatur...",[],"['Seinen', 'Shounen']",Ultra Jump,"Araki, Hirohiko (Story & Art)"
2,Vagabond,9.25,138009,3,15,368332,40575,37,327,On Hiatus,"Sep 3, 1998 to May 21, 2015","['Action', 'Adventure', 'Award Winning']","['Historical', 'Samurai']",['Seinen'],Morning,"Inoue, Takehiko (Story & Art), Yoshikawa, Eiji..."
3,One Piece,9.22,368951,4,3,603122,115123,Unknown,Unknown,Publishing,"Jul 22, 1997 to ?","['Action', 'Adventure', 'Fantasy']",[],['Shounen'],Shounen Jump (Weekly),"Oda, Eiichiro (Story & Art)"
4,Monster,9.15,94806,5,29,238291,20674,18,162,Finished,"Dec 5, 1994 to Dec 20, 2001","['Award Winning', 'Drama', 'Mystery']","['Adult Cast', 'Psychological']",['Seinen'],Big Comic Original,"Urasawa, Naoki (Story & Art)"


In [23]:
manga_data.dtypes

Title             object
Score            float64
Vote               int64
Ranked             int64
Popularity         int64
Members            int64
Favorite           int64
Volumes           object
Chapters          object
Status            object
Published         object
Genres            object
Themes            object
Demographics      object
Serialization     object
Author            object
dtype: object

### Most popular manga

In [24]:
# number of manga
len(manga_data)


10000

In [25]:
# ranking by weighted average
weights = {'Score': 0.25, 'Vote': 0.25, 'Favorite': 0.25, 'Members': 0.25}

manga_stats = manga_data[['Title', 'Score', 'Vote', 'Favorite', 'Members']].set_index('Title')
manga_norm = (manga_stats - manga_stats.min())/(manga_stats.max() - manga_stats.min())
manga_ranks = manga_norm['Score'] * weights['Score'] + manga_norm['Vote'] * weights['Vote'] + manga_norm['Favorite'] * weights['Favorite'] + manga_norm['Members'] * weights['Members'] 
manga_ranks = manga_ranks.reset_index().rename(columns={0:'score'})\
    .sort_values(by = 'score', ascending = False)\
    .reset_index()
manga_ranks

Unnamed: 0,index,Title,score
0,0,Berserk,0.957892
1,3,One Piece,0.912982
2,111,Shingeki no Kyojin (Attack on Titan),0.791294
3,44,Chainsaw Man,0.789310
4,129,Tokyo Ghoul,0.597951
...,...,...,...
9995,9947,Ohitorisama Monogatari (Story of Herself),0.001058
9996,9942,Bingo!,0.001025
9997,9980,Acaria,0.001013
9998,9967,Heart no Okurimono (Heart of Gift),0.000992


### Publisher analysis

In [28]:
# number of publishers
len(manga_data['Serialization'].unique())

645

In [29]:
publisher_votes = manga_data.groupby('Serialization')['Vote'].sum()
publisher_avg_score = manga_data.groupby('Serialization')['Score'].mean()
publisher_favourites = manga_data.groupby('Serialization')['Favorite'].sum()
publisher_members = manga_data.groupby('Serialization')['Members'].sum()

# merging dfs
df1_merge = pd.merge(publisher_avg_score, publisher_votes, on = 'Serialization', how = 'inner')
df2_merge = pd.merge(df1_merge, publisher_favourites, on = 'Serialization', how = 'inner')
publisher_stats = pd.merge(df2_merge, publisher_members, on = 'Serialization', how = 'inner')
publisher_stats

Unnamed: 0_level_0,Score,Vote,Favorite,Members
Serialization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
.Bloom,7.331667,4166,69,8560
.hack//G.U. The World,7.235000,6004,220,13678
4-koma Nano Ace,7.057500,3202,104,10630
@vitamin,7.492000,5302,220,15602
ARIA,7.338235,99961,4812,252656
...,...,...,...,...
pocopoco,7.175000,2771,89,6549
super Robot Magazine,7.520000,937,19,1893
twi4,7.405000,75742,4113,180552
u17,7.230000,2832,83,10474


In [30]:
# ranking by weighted average
weights = {'Score': 0.25, 'Vote': 0.25, 'Favorite': 0.25, 'Members': 0.25}

# normalizing values
publisher_norm = (publisher_stats - publisher_stats.min()) / (publisher_stats.max() - publisher_stats.min())
publisher_norm

# ranking
publisher_rank = publisher_norm['Score'] * weights['Score'] + publisher_norm['Vote'] * weights['Vote'] + publisher_norm['Favorite'] * weights['Favorite'] + publisher_norm['Members'] * weights['Members'] 
publisher_rank.sort_values(ascending=False).reset_index()\
    .rename(columns={0: 'rank'})\
    .head(10)

Unnamed: 0,Serialization,rank
0,Shounen Jump (Weekly),0.863458
1,Shounen Magazine (Weekly),0.395743
2,Young Jump,0.337416
3,Shounen Jump+,0.288738
4,Kindai Mahjong Gold,0.250345
5,Comica,0.248732
6,Big Comic Spirits,0.242962
7,Shounen Sunday,0.234382
8,For Mrs.,0.229408
9,Canna Comics,0.227579


In [31]:
from flask import jsonify
manga = manga_data.sample(1).to_dict('list')
title = manga['Title']
output = (['TITLE:', title, 'ANIME:', manga])

In [58]:
anime = anime_data.sample(1).to_dict('records')
anime

[{'Title': 'Shuumatsu no Walküre IIRecord of Ragnarok II',
  'Score': 7.51,
  'Vote': 59249,
  'Ranked': 1763,
  'Popularity': 1946,
  'Episodes': '10',
  'Status': 'Finished Airing',
  'Aired': 'Jan 26, 2023',
  'Premiered': nan,
  'Producers': "['Mainichi Broadcasting System', 'Warner Bros. Japan', 'KlockWorx', 'Coamix', 'Sammy']",
  'Licensors': 'None found, add some',
  'Studios': 'Yumeta Company,       Graphinica',
  'Source': 'Manga',
  'Duration': '24 min. per ep.',
  'Rating': 'R - 17+ (violence & profanity)'}]