### Importing Libraries

In [1]:
import requests
import pandas as pd
import time
import os
import json

### Collect data from myanimelist.net API

#### Test with one anime first to see what features are available

In [2]:
# query for anime
response_query = requests.get("https://api.myanimelist.net/v2/anime?q=one&limit=4", 
                        headers={'X-MAL-CLIENT-ID': '34e749b42846abadb1be2d886253fb7d'})
print("Response:", response_query.status_code)

Response: 200


In [3]:
response_query.json()

{'data': [{'node': {'id': 21,
    'title': 'One Piece',
    'main_picture': {'medium': 'https://cdn.myanimelist.net/images/anime/1244/138851.webp',
     'large': 'https://cdn.myanimelist.net/images/anime/1244/138851l.webp'}}},
  {'node': {'id': 459,
    'title': 'One Piece Movie 01',
    'main_picture': {'medium': 'https://cdn.myanimelist.net/images/anime/1770/97704.jpg',
     'large': 'https://cdn.myanimelist.net/images/anime/1770/97704l.jpg'}}},
  {'node': {'id': 12859,
    'title': 'One Piece Film: Z',
    'main_picture': {'medium': 'https://cdn.myanimelist.net/images/anime/6/44297.jpg',
     'large': 'https://cdn.myanimelist.net/images/anime/6/44297l.jpg'}}},
  {'node': {'id': 31772,
    'title': 'One Punch Man Specials',
    'main_picture': {'medium': 'https://cdn.myanimelist.net/images/anime/1452/97840.jpg',
     'large': 'https://cdn.myanimelist.net/images/anime/1452/97840l.jpg'}}}],
 'paging': {'next': 'https://api.myanimelist.net/v2/anime?offset=4&q=one&limit=4'}}

In [4]:
# extract anime id and title, did use the anime picture
anime = response_query.json()['data'][0]['node']
anime_id = anime['id']
anime

{'id': 21,
 'title': 'One Piece',
 'main_picture': {'medium': 'https://cdn.myanimelist.net/images/anime/1244/138851.webp',
  'large': 'https://cdn.myanimelist.net/images/anime/1244/138851l.webp'}}

In [5]:
# anime details
response_details = requests.get(f'https://api.myanimelist.net/v2/anime/30230?fields=id,title,main_picture,alternative_titles,start_date,end_date,synopsis,mean,rank,popularity,num_list_users,num_scoring_users,nsfw,created_at,updated_at,media_type,status,genres,my_list_status,num_episodes,start_season,broadcast,source,average_episode_duration,rating,pictures,background,related_anime,related_manga,recommendations,studios,statistics',
                        headers={'X-MAL-CLIENT-ID': '34e749b42846abadb1be2d886253fb7d'})
print("Response:", response_details.status_code)

Response: 200


In [6]:
print("Features Avaiable:")
print("-----")
for feature in response_details.json().keys():
    print(feature)

Features Avaiable:
-----
id
title
main_picture
alternative_titles
start_date
end_date
synopsis
mean
rank
popularity
num_list_users
num_scoring_users
nsfw
created_at
updated_at
media_type
status
genres
num_episodes
start_season
broadcast
source
average_episode_duration
rating
pictures
background
related_anime
related_manga
recommendations
studios
statistics


**From the above list of Features, we will be selecting the following features**:

- id
- title
- start_date
- end_date
- synopsis
- mean
- rank
- popularity
- num_list_users
- num_scoring_users
- nsfw
- created_at
- updated_at
- media_type
- status
- genres
- num_episodes
- start_season
- broadcast
- source
- average_episode_duration
- rating
- background
- studios
- statistics

### Get top ranked animes in various categories

In [7]:
ranking_categories = [
    'all',
    'airing',
    'tv',
    'ova',
    'movie',
    'special',
    'bypopularity',
    'favorite'
]

In [8]:
def create_ranking_csv(ranking_type):
    # Create the directory if it doesn't exist
    os.makedirs('dataset/rankings', exist_ok=True)
    try:
        # getting the top 500 for each category ranking
        response_ranking = requests.get(f'https://api.myanimelist.net/v2/anime/ranking?ranking_type={ranking_type}&limit=500',
                                       headers={'X-MAL-CLIENT-ID': '34e749b42846abadb1be2d886253fb7d'})
        
        # Print full response for debugging
        print(f'Response status ({ranking_type}): {response_ranking.status_code}')
        print('Response content:', response_ranking.text)
        
        # Check if response is successful
        response_ranking.raise_for_status()
        
        # Try parsing JSON with error handling
        try:
            ranking_data = response_ranking.json()
        except json.JSONDecodeError as e:
            print(f"JSON Decode Error: {e}")
            print("Response content:", response_ranking.text)
            return

        # querying anime details for each anime in the ranking list
        anime_ranking_list = []
        rank = 0

        for anime in ranking_data['data']:
            rank += 1
            anime_id = anime['node']['id']

            try:
                # query for anime details
                response_details = requests.get(
                    f'https://api.myanimelist.net/v2/anime/{anime_id}?fields=id,title,start_date,end_date,synopsis,mean,rank,popularity,num_list_users,num_scoring_users,nsfw,created_at,updated_at,media_type,status,genres,my_list_status,num_episodes,start_season,broadcast,source,average_episode_duration,rating,background,studios,statistics',
                    headers={'X-MAL-CLIENT-ID': '34e749b42846abadb1be2d886253fb7d'}
                )
                
                # Check if details response is successful
                response_details.raise_for_status()
                
                anime_details = response_details.json()
                anime_details[f'{ranking_type}_ranking'] = rank    # adding {ranking_type}_ranking feature
                anime_ranking_list.append(anime_details)
                
            except requests.RequestException as e:
                print(f"Error fetching details for anime {anime_id}: {e}")
                continue

        # convert into a dataframe and store as csv file
        if anime_ranking_list:
            df = pd.DataFrame(anime_ranking_list)
            df.to_csv(f'dataset/rankings/ranking_{ranking_type}.csv', index=False)
            print('csv created!')
        else:
            print(f"No anime details collected for {ranking_type}")

    except requests.RequestException as e:
        print(f"Request Error for {ranking_type}: {e}")

In [9]:
for ranking_type in ranking_categories:
    create_ranking_csv(ranking_type)
    
    # add delay for fetching the next api
    time.sleep(200)

Response status (all): 200
Response content: {"data":[{"node":{"id":52991,"title":"Sousou no Frieren","main_picture":{"medium":"https:\/\/cdn.myanimelist.net\/images\/anime\/1015\/138006.webp","large":"https:\/\/cdn.myanimelist.net\/images\/anime\/1015\/138006l.webp"}},"ranking":{"rank":1}},{"node":{"id":5114,"title":"Fullmetal Alchemist: Brotherhood","main_picture":{"medium":"https:\/\/cdn.myanimelist.net\/images\/anime\/1208\/94745.jpg","large":"https:\/\/cdn.myanimelist.net\/images\/anime\/1208\/94745l.jpg"}},"ranking":{"rank":2}},{"node":{"id":60022,"title":"One Piece Fan Letter","main_picture":{"medium":"https:\/\/cdn.myanimelist.net\/images\/anime\/1455\/146229.jpg","large":"https:\/\/cdn.myanimelist.net\/images\/anime\/1455\/146229l.jpg"}},"ranking":{"rank":3}},{"node":{"id":9253,"title":"Steins;Gate","main_picture":{"medium":"https:\/\/cdn.myanimelist.net\/images\/anime\/1935\/127974.jpg","large":"https:\/\/cdn.myanimelist.net\/images\/anime\/1935\/127974l.jpg"}},"ranking":{"ra

### Get animes from 2000-2024

In [10]:
response = requests.get('https://api.myanimelist.net/v2/anime/season/2017/summer?limit=4',
                            headers={'X-MAL-CLIENT-ID': '34e749b42846abadb1be2d886253fb7d'})

In [11]:
response.json()['data'][0]

{'node': {'id': 35247,
  'title': 'Owarimonogatari 2nd Season',
  'main_picture': {'medium': 'https://cdn.myanimelist.net/images/anime/6/87322.webp',
   'large': 'https://cdn.myanimelist.net/images/anime/6/87322l.webp'}}}

In [12]:
'''
Season name     Months
winter:         January, February, March
spring:         April, May, June
summer          July, August, September
fall            October, November, December
'''
seasons = [
    "winter",
    "spring",
    "summer",
    "fall"
]

In [13]:
def get_anime_season(year, season, anime_list):
    try:
        # Fetch animes from a particular season of a particular year
        response = requests.get(
            f'https://api.myanimelist.net/v2/anime/season/{year}/{season}?limit=100',
            headers={'X-MAL-CLIENT-ID': '34e749b42846abadb1be2d886253fb7d'}
        )
        
        # Check if the request was successful
        response.raise_for_status()
        
        # Parse JSON response
        try:
            season_data = response.json()
        except json.JSONDecodeError as e:
            print(f"JSON Decode Error for {year}/{season}: {e}")
            print("Response content:", response.text)
            return anime_list

        # Iterate through anime in the season
        for anime in season_data.get('data', []):
            try:
                anime_id = anime['node']['id']

                # Query for detailed anime information
                response_details = requests.get(
                    f'https://api.myanimelist.net/v2/anime/{anime_id}?fields=id,title,start_date,end_date,synopsis,mean,rank,popularity,num_list_users,num_scoring_users,nsfw,created_at,updated_at,media_type,status,genres,my_list_status,num_episodes,start_season,broadcast,source,average_episode_duration,rating,background,studios,statistics',
                    headers={'X-MAL-CLIENT-ID': '34e749b42846abadb1be2d886253fb7d'}
                )
                
                # Check if details request was successful
                response_details.raise_for_status()
                
                # Parse anime details
                anime_details = response_details.json()
                
                # Add season and year information
                anime_details['season'] = season
                anime_details['year'] = year
                
                # Add to list
                anime_list.append(anime_details)
                
            except requests.RequestException as detail_err:
                print(f"Error fetching details for anime {anime_id} in {year}/{season}: {detail_err}")
                continue

        print(f'Successfully processed {year}/{season}')
        
    except requests.RequestException as e:
        print(f"Request Error for {year}/{season}: {e}")
    
    return anime_list

In [14]:
anime_list = []

In [15]:
for year in range(2000, 2024):
    for season in seasons:
        # fetch api for {year} & {season}
        anime_list = get_anime_season(year, season, anime_list)

        # add delay for fetching the next api
        time.sleep(60)

Successfully processed 2000/winter
Error fetching details for anime 189 in 2000/spring: 504 Server Error: Gateway Timeout for url: https://api.myanimelist.net/v2/anime/189?fields=id,title,start_date,end_date,synopsis,mean,rank,popularity,num_list_users,num_scoring_users,nsfw,created_at,updated_at,media_type,status,genres,my_list_status,num_episodes,start_season,broadcast,source,average_episode_duration,rating,background,studios,statistics
Error fetching details for anime 2617 in 2000/spring: 504 Server Error: Gateway Timeout for url: https://api.myanimelist.net/v2/anime/2617?fields=id,title,start_date,end_date,synopsis,mean,rank,popularity,num_list_users,num_scoring_users,nsfw,created_at,updated_at,media_type,status,genres,my_list_status,num_episodes,start_season,broadcast,source,average_episode_duration,rating,background,studios,statistics
Successfully processed 2000/spring
Successfully processed 2000/summer
Error fetching details for anime 4459 in 2000/fall: 504 Server Error: Gateway

In [16]:
anime_df = pd.DataFrame(anime_list)
anime_df.to_csv('dataset/anime.csv', index=False)

In [17]:
print(f"Total anime collected: {len(anime_df)}")
print(anime_df.head())

Total anime collected: 7819
     id                                             title  \
0    21                                         One Piece   
1  3665                Ginga Eiyuu Densetsu Gaiden (1999)   
2  3545  Kochira Katsushikaku Kameari Kouenmae Hashutsujo   
3   136                                   Hunter x Hunter   
4   245                             Great Teacher Onizuka   

                                        main_picture  start_date  \
0  {'medium': 'https://cdn.myanimelist.net/images...  1999-10-20   
1  {'medium': 'https://cdn.myanimelist.net/images...  1999-12-24   
2  {'medium': 'https://cdn.myanimelist.net/images...  1996-06-16   
3  {'medium': 'https://cdn.myanimelist.net/images...  1999-10-16   
4  {'medium': 'https://cdn.myanimelist.net/images...  1999-06-30   

                                            synopsis  mean    rank  \
0  Barely surviving in a barrel after passing thr...  8.72    54.0   
1  Ginga Eiyuu Densetsu Gaiden (1999) is the seco...  8.

In [18]:
pd.read_csv('dataset/anime.csv').nunique()

id                          5246
title                       5245
main_picture                5348
start_date                  3136
synopsis                    5097
mean                         400
rank                        4417
popularity                  5204
num_list_users              4587
num_scoring_users           3905
nsfw                           1
created_at                  5246
updated_at                  5243
media_type                     9
status                         2
genres                      2186
num_episodes                 152
start_season                 115
broadcast                    401
source                        16
average_episode_duration    1347
rating                         5
background                   939
studios                      664
statistics                  5267
season                         4
year                          24
end_date                    3165
dtype: int64

In [19]:
pd.read_csv('dataset/anime.csv').head()

Unnamed: 0,id,title,main_picture,start_date,synopsis,mean,rank,popularity,num_list_users,num_scoring_users,...,broadcast,source,average_episode_duration,rating,background,studios,statistics,season,year,end_date
0,21,One Piece,{'medium': 'https://cdn.myanimelist.net/images...,1999-10-20,Barely surviving in a barrel after passing thr...,8.72,54.0,17,2463199,1398717,...,"{'day_of_the_week': 'sunday', 'start_time': '0...",manga,1440,pg_13,"The anime had a hiatus from October 13, 2024 t...","[{'id': 18, 'name': 'Toei Animation'}]","{'status': {'watching': '1746533', 'completed'...",winter,2000,
1,3665,Ginga Eiyuu Densetsu Gaiden (1999),{'medium': 'https://cdn.myanimelist.net/images...,1999-12-24,Ginga Eiyuu Densetsu Gaiden (1999) is the seco...,8.06,569.0,4962,21077,7777,...,,novel,1560,r,,"[{'id': 1269, 'name': 'K-Factory'}]","{'status': {'watching': '983', 'completed': '9...",winter,2000,2000-07-21
2,3545,Kochira Katsushikaku Kameari Kouenmae Hashutsujo,{'medium': 'https://cdn.myanimelist.net/images...,1996-06-16,"Ryoutsu, being an underpaid policeman, is alwa...",7.78,1074.0,5063,20010,7396,...,"{'day_of_the_week': 'sunday', 'start_time': '1...",manga,1500,pg_13,,"[{'id': 36, 'name': 'Gallop'}]","{'status': {'watching': '1591', 'completed': '...",winter,2000,2004-12-19
3,136,Hunter x Hunter,{'medium': 'https://cdn.myanimelist.net/images...,1999-10-16,Hunters are specialized in a wide variety of f...,8.42,188.0,373,610441,297113,...,"{'day_of_the_week': 'saturday', 'start_time': ...",manga,1380,pg_13,,"[{'id': 22, 'name': 'Nippon Animation'}]","{'status': {'watching': '44154', 'completed': ...",winter,2000,2001-03-31
4,245,Great Teacher Onizuka,{'medium': 'https://cdn.myanimelist.net/images...,1999-06-30,Twenty-two-year-old Eikichi Onizuka—ex-biker g...,8.68,68.0,218,857316,421043,...,"{'day_of_the_week': 'wednesday', 'start_time':...",manga,1550,r,"First episode has a duration of 48 minutes, st...","[{'id': 1, 'name': 'Pierrot'}]","{'status': {'watching': '71426', 'completed': ...",winter,2000,2000-09-17


In [20]:
df = pd.read_csv('dataset/rankings/ranking_airing.csv')
df.head()

Unnamed: 0,id,title,main_picture,start_date,synopsis,mean,rank,popularity,num_list_users,num_scoring_users,...,start_season,broadcast,source,average_episode_duration,rating,background,studios,statistics,airing_ranking,end_date
0,56784,Bleach: Sennen Kessen-hen - Soukoku-tan,{'medium': 'https://cdn.myanimelist.net/images...,2024-10-05,After an awe-inspiring battle with Ichibei Hyo...,8.98,14.0,1437,172693,36381,...,"{'year': 2024, 'season': 'fall'}","{'day_of_the_week': 'saturday', 'start_time': ...",manga,1478,r,,"[{'id': 2951, 'name': 'Pierrot Films'}]","{'status': {'watching': '104509', 'completed':...",1,
1,54857,Re:Zero kara Hajimeru Isekai Seikatsu 3rd Season,{'medium': 'https://cdn.myanimelist.net/images...,2024-10-02,"One year after the events at the Sanctuary, Su...",8.78,36.0,803,316202,48683,...,"{'year': 2024, 'season': 'fall'}","{'day_of_the_week': 'wednesday', 'start_time':...",light_novel,1420,r,Re:Zero kara Hajimeru Isekai Seikatsu 3rd Seas...,"[{'id': 314, 'name': 'White Fox'}]","{'status': {'watching': '150997', 'completed':...",2,
2,21,One Piece,{'medium': 'https://cdn.myanimelist.net/images...,1999-10-20,Barely surviving in a barrel after passing thr...,8.73,54.0,17,2463166,1398702,...,"{'year': 1999, 'season': 'fall'}","{'day_of_the_week': 'sunday', 'start_time': '0...",manga,1440,pg_13,"The anime had a hiatus from October 13, 2024 t...","[{'id': 18, 'name': 'Toei Animation'}]","{'status': {'watching': '1746523', 'completed'...",3,
3,57334,Dandadan,{'medium': 'https://cdn.myanimelist.net/images...,2024-10-04,"Reeling from her recent breakup, Momo Ayase, a...",8.7,64.0,497,474981,120385,...,"{'year': 2024, 'season': 'fall'}","{'day_of_the_week': 'friday', 'start_time': '0...",manga,1437,r,Dandadan aired on MBS and TBS' Super Animeism ...,"[{'id': 1591, 'name': 'Science SARU'}]","{'status': {'watching': '358412', 'completed':...",4,2024-12-20
4,55823,Natsume Yuujinchou Shichi,{'medium': 'https://cdn.myanimelist.net/images...,2024-10-08,"As a young child, Takashi Natsume believed his...",8.67,70.0,3831,37211,3565,...,"{'year': 2024, 'season': 'fall'}","{'day_of_the_week': 'tuesday', 'start_time': '...",manga,1429,pg_13,,"[{'id': 1119, 'name': 'Shuka'}]","{'status': {'watching': '12633', 'completed': ...",5,2024-12-24
