In [12]:
from jikanpy import Jikan
import json 
import pandas as pd 
import numpy as np
from time import sleep

In [13]:
jikan = Jikan()

In [20]:
archive = jikan.season_archive()

In [95]:
# example of request for a specific season of a specific year 
winter_2018_anime = jikan.season(year=2018, season='fall')

In [98]:
for anime in winter_2018_anime['anime']:
    anime['year'] = "2018"
    anime['season'] = 'fall'

In [14]:
# get anime for a single year 
seasons = ['winter', 'spring', 'summer', 'fall']
def get_anime_year(year):
    anime_year = []
    for season in seasons: 
        # get all the anime for that season 
        try: 
            anime_season = jikan.season(year=year, season=season)['anime']
            for anime in anime_season: 
                anime['air_year'] = year 
                anime['air_season'] = season
            anime_year.extend(anime_season)
        except: 
            continue
    return anime_year

In [26]:
# get all anime since 1920 
all_anime = []

In [27]:
for year in range(1917, 2022):
    anime_year = get_anime_year(year)
    sleep(5)
    all_anime.extend(anime_year)

In [28]:
len(all_anime)

22474

In [29]:
with open('mal_scrape.json', 'w') as outfile:
    json.dump(all_anime, outfile)

In [30]:
with open('mal_scrape.json') as json_file:
    data = json.load(json_file)

# Data formatting

In [31]:
df = pd.DataFrame.from_dict(data)

In [32]:
df.columns

Index(['mal_id', 'url', 'title', 'image_url', 'synopsis', 'type',
       'airing_start', 'episodes', 'members', 'genres', 'source', 'producers',
       'score', 'licensors', 'r18', 'kids', 'continuing', 'air_year',
       'air_season'],
      dtype='object')

In [33]:
# get list of genres 
df['genres_clean'] = df['genres'].apply(lambda x : [element['name'] for element in x])
# get list of producers 
df['producers_clean'] = df['producers'].apply(lambda x : [element['name'] for element in x])
# drop the anime for which there is no start date, note that there is only about 100 of them 
df = df.dropna(subset=['airing_start'])

In [34]:
# get the dates in datetime format 
df['airing_start_clean'] = df['airing_start'].apply(lambda x : x.split("T")[0])
df['airing_start_clean'] = pd.to_datetime(df['airing_start_clean'])
df['year'] = df['airing_start_clean'].dt.year

In [35]:
df.head()

Unnamed: 0,mal_id,url,title,image_url,synopsis,type,airing_start,episodes,members,genres,...,licensors,r18,kids,continuing,air_year,air_season,genres_clean,producers_clean,airing_start_clean,year
1,6654,https://myanimelist.net/anime/6654/Namakura_Ga...,Namakura Gatana,https://cdn.myanimelist.net/images/anime/4/152...,"Namakura Gatana, meaning ""dull-edged sword,"" i...",Movie,1917-06-29T15:00:00+00:00,1.0,5505,"[{'mal_id': 4, 'type': 'anime', 'name': 'Comed...",...,[],False,False,False,1917,spring,"[Comedy, Samurai]",[],1917-06-29,1917
2,10742,https://myanimelist.net/anime/10742/Saru_to_Ka...,Saru to Kani no Gassen,https://cdn.myanimelist.net/images/anime/4/837...,A monkey tricks a crab and steals his food. Mo...,Movie,1917-05-19T15:00:00+00:00,1.0,604,"[{'mal_id': 8, 'type': 'anime', 'name': 'Drama...",...,[],False,False,False,1917,spring,[Drama],[],1917-05-19,1917
6,23187,https://myanimelist.net/anime/23187/Chamebou_S...,Chamebou Shin Gachou: Nomi Fuufu Shikaeshi no ...,https://cdn.myanimelist.net/images/qm_50.gif,(No synopsis yet.),Movie,1917-04-27T15:00:00+00:00,1.0,276,"[{'mal_id': 4, 'type': 'anime', 'name': 'Comed...",...,[],False,False,False,1917,spring,[Comedy],[],1917-04-27,1917
7,18457,https://myanimelist.net/anime/18457/Hanasaka_J...,Hanasaka Jijii,https://cdn.myanimelist.net/images/anime/5/494...,Short movie produced by the first generation a...,Movie,1917-08-25T15:00:00+00:00,1.0,760,"[{'mal_id': 4, 'type': 'anime', 'name': 'Comed...",...,[],False,False,False,1917,summer,"[Comedy, Drama]",[],1917-08-25,1917
8,23183,https://myanimelist.net/anime/23183/Itazura_Post,Itazura Post,https://cdn.myanimelist.net/images/qm_50.gif,(No synopsis yet.),Movie,1917-07-27T15:00:00+00:00,1.0,282,"[{'mal_id': 4, 'type': 'anime', 'name': 'Comed...",...,[],False,False,False,1917,summer,[Comedy],[],1917-07-27,1917


In [36]:
df.to_csv("mal_scrape.csv")