# Scrape MAL data

In [1]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from jikanpy import Jikan
import numpy as np

## Grab data from MAL API

In [2]:
# MAL id of all LL animes ever, grouped by "generations"
label_map = {
    32526: 'Sunshine S1', 
    34973: 'Sunshine S2', 
    37027: 'Sunshine Movie',
    15051: 'School Idol Project S1', 
    19111: 'School Idol Project S2', 
    24997: 'School Idol Project Movie', 
    41169: 'Superstar',
    40879: 'Nijigasaki', 
    10278: 'The iDOLM@STER', 
    17437: 'The iDOLM@STER Movie',
    30344: 'The iDOLM@STER CG S2',
    23587: 'The iDOLM@STER CG S1',
    33573: 'Bandori S1', 
    37869: 'Bandori S2', 
    37870: 'Bandori S3',
    39619: 'Bandori Film Live'
}

In [3]:
# Calculate the drop ratio of each anime
jikan = Jikan()
# For each group, calculate the sample mean and std statistics for all animes of that generation
stats = []
for anime_id in label_map:
    anime_sample = np.array([])
    print('Processing anime:', label_map[anime_id])
    jikan.anime(anime_id)   # Bootstrap
    response = jikan.anime(anime_id, extension='stats')
    if not response: 
        print('\tFailed for:', label_map[anime_id])
        continue
    # Calculate drop ratio
    # Build sample for current anime
    scores = response['scores']
    for score in scores:
        votes = scores[score]['votes']
        s = np.repeat(int(score), votes)
        anime_sample = np.append(anime_sample, s)
    
    std = np.std(anime_sample)
    mean = np.mean(anime_sample)
    data_row = {
        'id': anime_id,
        'name': label_map[anime_id],
        'drop_ratio': int(response['dropped']) / int(response['total']), 
        'std': std, 
        'mean': mean, 
        'count': len(anime_sample)
    }
    stats.append(data_row)

Processing anime: Sunshine S1
Processing anime: Sunshine S2
Processing anime: Sunshine Movie
Processing anime: School Idol Project S1
Processing anime: School Idol Project S2
Processing anime: School Idol Project Movie
Processing anime: Superstar
Processing anime: Nijigasaki
Processing anime: The iDOLM@STER
Processing anime: The iDOLM@STER Movie
Processing anime: The iDOLM@STER CG S2
Processing anime: The iDOLM@STER CG S1
Processing anime: Bandori S1
Processing anime: Bandori S2
Processing anime: Bandori S3
Processing anime: Bandori Film Live


In [4]:
stats_df = pd.DataFrame(stats).set_index('id')
stats_df.to_csv('scores.csv')
stats_df

Unnamed: 0_level_0,name,drop_ratio,std,mean,count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
32526,Sunshine S1,0.039016,1.671071,7.368589,77528
34973,Sunshine S2,0.024927,1.672406,7.584746,47058
37027,Sunshine Movie,0.006827,1.682756,7.639397,14204
15051,School Idol Project S1,0.034777,1.660956,7.397653,199156
19111,School Idol Project S2,0.018094,1.545763,7.78431,130882
24997,School Idol Project Movie,0.006421,1.568111,7.951345,63734
41169,Superstar,0.014702,1.653526,8.107192,5103
40879,Nijigasaki,0.035694,1.655863,7.562497,20057
10278,The iDOLM@STER,0.061465,1.61489,7.491589,49338
17437,The iDOLM@STER Movie,0.008054,1.552832,7.623547,10150
