# Data Analysis
Purpose is to ping MAL's APIs to gather anime data. The data will then be saved into a `.csv` to be used for the DSC106 - Final Project Website.<br>
By: Jonathan Lo<br>
Date: 5/2/23

## Imports and Setup

In [1]:
# Imports
import requests as r
import json

from time import sleep
from tqdm import tqdm
from datetime import timedelta

In [2]:
# Unload Secrets
with open("secrets.json", 'r') as fh:
    secrets = json.load(fh)
    
mal_client_id = secrets['MAL-CID']

## Requests

In [3]:
endpoint = "https://api.myanimelist.net/v2/anime/"
headers = {"X-MAL-CLIENT-ID": mal_client_id}
fields = [
    "start_date",
    "end_date",
    "mean",
    "rank",
    "popularity",
    "num_list_users",
    "num_scoring_users",
    "media_type",
    "status",
    "genres",
    "start_season",
    "source",
    "rating",
    "recommendations",
    "studios"
]
payload = {"fields": str(fields).replace("'", "").replace('[', '').replace(']', '').replace(' ', '')}

def queryMALRange(top_range, timeout=0.75):
    """ Queries MyAnimeList's database for a certain range.
    """
    # Metadata
    print(f"The expected time to pull all data is: {timedelta(seconds=top_range * 0.875)}")
    
    # Init
    error_timeout = 20
    myanmielistData = []
    requery_ids = []
    session = r.Session()
    
    # First Loop
    for i in tqdm(range(1, top_range)):
        res = r.get(
            endpoint + str(i),
            params=payload,
            headers=headers
        )
        if res.status_code == 200:
            myanmielistData.append(res.json())
        elif res.status_code == 404:
            continue
        else:
            requery_ids.append(i)
            sleep(error_timeout)
        sleep(timeout)
        
    # Requery if any traffic errors
    for i in tqdm(requery_ids):
        res = r.get(
            endpoint + str(i),
            params=payload,
            headers=headers
        )
        if res.status_code == 200:
            myanmielistData.append(res.json())
        sleep(timeout)
        
    return myanmielistData

In [4]:
# Mass request
data = queryMALRange(50000)

The expected time to pull all data is: 12:09:10


100%|█████████████████████████████████████████████████████████████████████████| 49999/49999 [11:13:26<00:00,  1.24it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 768/768 [11:56<00:00,  1.07it/s]


In [5]:
# Saving data
with open("myanimelist-data.json", "w") as fh:
    json.dump(data, fh)

In [6]:
data

[{'id': 1,
  'title': 'Cowboy Bebop',
  'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/4/19644.jpg',
   'large': 'https://api-cdn.myanimelist.net/images/anime/4/19644l.jpg'},
  'start_date': '1998-04-03',
  'end_date': '1999-04-24',
  'mean': 8.75,
  'rank': 40,
  'popularity': 43,
  'num_list_users': 1757312,
  'num_scoring_users': 906456,
  'media_type': 'tv',
  'status': 'finished_airing',
  'genres': [{'id': 1, 'name': 'Action'},
   {'id': 50, 'name': 'Adult Cast'},
   {'id': 46, 'name': 'Award Winning'},
   {'id': 24, 'name': 'Sci-Fi'},
   {'id': 29, 'name': 'Space'}],
  'start_season': {'year': 1998, 'season': 'spring'},
  'source': 'original',
  'rating': 'r',
  'recommendations': [{'node': {'id': 205,
     'title': 'Samurai Champloo',
     'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1375/121599.jpg',
      'large': 'https://api-cdn.myanimelist.net/images/anime/1375/121599l.jpg'}},
    'num_recommendations': 119},
   {'node'