# Scrape MAL data

In [62]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from jikanpy import Jikan
import numpy as np

## Grab data from MAL API

In [63]:
# MAL id of all LL animes ever, grouped by "generations"
label_map = {
    32526: 'Sunshine S1', 
    34973: 'Sunshine S2', 
    37027: 'Sunshine Movie',
    15051: 'School Idol Project S1', 
    19111: 'School Idol Project S2', 
    24997: 'School Idol Project Movie', 
    41169: 'Superstar',
    40879: 'Nijigasaki', 
    10278: 'The iDOLM@STER', 
    17437: 'The iDOLM@STER Movie',
    30344: 'The iDOLM@STER CG S2',
    23587: 'The iDOLM@STER CG S1',
    33573: 'Bandori S1', 
    37869: 'Bandori S2', 
    37870: 'Bandori S3',
    39619: 'Bandori Film Live'
}

group_ids = {
    "Love Live!": [15051, 24997, 19111],
    "Love Live! Sunshine!!": [32526, 34973, 37027],
    "Love Live! Nijigasaki": [40879], 
    "Love Live! Superstar!!": [41169], 
    'iM@S': [10278, 17437], 
    'iM@S CG': [30344, 23587],
    'Bandori': [33573, 37869, 37870, 39619]
}

In [69]:
# Calculate the drop ratio of each anime
drop_ratio = dict()
jikan = Jikan()

# For each group, calculate the sample mean and std statistics for all animes of that generation
grp_stats = []
for grp in group_ids:
    print('Processing group:', grp)
    ids = group_ids[grp]
    grp_sample = np.array([])
    # For each anime of the current group
    for anime_id in ids:
        print('\tProcessing anime:', label_map[anime_id])
        jikan.anime(anime_id)   # Bootstrap
        response = jikan.anime(anime_id, extension='stats')
        if not response: 
            print('\tFailed for:', label_map[anime_id])
            continue
        # Calculate drop ratio
        drop_ratio[label_map[anime_id]] = int(response['dropped'])/int(response['total']) * 100
        # Build sample for current anime
        scores = response['scores']
        for score in scores:
            votes = scores[score]['votes']
            s = np.repeat(int(score), votes)
            grp_sample = np.append(grp_sample, s)
    grp_stats.append({
        'group': grp, 
        'mean': np.mean(grp_sample), 
        'std': np.std(grp_sample), 
        'count': len(grp_sample)
    })

Processing group: Love Live!
	Processing anime: School Idol Project S1
	Processing anime: School Idol Project Movie
	Processing anime: School Idol Project S2
Processing group: Love Live! Sunshine!!
	Processing anime: Sunshine S1
	Processing anime: Sunshine S2
	Processing anime: Sunshine Movie
Processing group: Love Live! Nijigasaki High School Idol Club
	Processing anime: Nijigasaki
Processing group: Love Live! Superstar!!
	Processing anime: Superstar
Processing group: iM@S
	Processing anime: The iDOLM@STER
	Processing anime: The iDOLM@STER Movie
Processing group: iM@S CG
	Processing anime: The iDOLM@STER CG S2
	Processing anime: The iDOLM@STER CG S1
Processing group: Bandori
	Processing anime: Bandori S1
	Processing anime: Bandori S2
	Processing anime: Bandori S3
	Processing anime: Bandori Film Live


In [None]:
drop_df = pd.DataFrame.from_dict(drop_ratio, orient='index', columns=['ratio']).sort_values('ratio')
drop_df.to_csv('drop_ratio.csv')

score_df = pd.DataFrame(grp_stats)
score_df.to_csv('scores.csv', index=False)