### General API call flow

#### Steps

1. Producer = Prepare API request
   1. Get template from folder
   2. Get ANIME seed from seed folder
   3. One media per producer task
      1. Producer loop through page
      2. Wait result to arive
      3. check `hasNextPage=True`
      4. Repeat
   4. Render variables
      1. perPage = 25 (max allowed)
      2. `current_page` parameteriz
   5. Manager
      1. Singleton implementation
      2. class attribute `api_call_count` to keep track all running jobs & get updated by header `X-RATE-LIMIT-REMAINING-API`
      3. Decide to start new producer task
      4. Require timer of 60 seconds to know duration of waiting when limit is hit
2. Consumer = Response parser and storage
   1. Parse `data` key
   2. Set storage destinationjson -> 1 json per api call

In [11]:

from pathlib import Path
import aiohttp
from queue import Queue
import os
import asyncio
import json
import duckdb 
import time

In [12]:
async def make_api_call(url, query, variables):
    async with aiohttp.ClientSession() as session:
        async with session.post(url, json={'query': query, 'variables': variables}) as resp:
            result = await resp.json()
            header = resp.headers
            status_code = resp.status
            return result, resp
        
async def producer(queue, ids:list[int], url, query, max_per_page):
    print("Producer: Started")
    remaining_request = 90

    for id in ids:

        has_next_page = True
        current_page = 0

        while has_next_page:

            variables = {  
                'page': current_page,
                'perPage': max_per_page,
                'media_id': id
            }

            if remaining_request < 10:
                print("Pause to recover limit")
                await asyncio.sleep(30)

            print(f"Requesting page {current_page} for {id}")
            result, resp = await make_api_call(url, query, variables)            
            header = resp.headers

            if resp.status != 200:
                print(f"Request fails at {id}")
                raise resp.raise_for_status()

            await queue.put((result, id, current_page))
            remaining_request = int(header.get('x-ratelimit-remaining'))
            print(f'Remaining API call per min: {remaining_request}')

            has_next_page = result.get('data').get('Media').get('characters').get('pageInfo').get('hasNextPage')
            
            if has_next_page:
                current_page += 1
            else:
                print(f'{id} has completed all pagination')
                break

    print('Producer: Done')

In [13]:
async def save_api_to_local(file_path:str, data: dict):
    with open(file_path, 'w') as fp:
        json.dump(data, fp)

async def consumer(queue:asyncio.Queue, storage_path:Path):
    print("Consumer: Started")
    while True:

        response_body, id, current_page = await queue.get()
        filename = Path(f'{id}-{current_page}.json')
        file_path = storage_path / filename
        print(f'Saving {id} at {file_path.as_posix()}')
        await save_api_to_local(file_path=str(file_path), data=response_body)
        queue.task_done()
        print(f'Saving {id} completed')

In [14]:
# API Initialization
MAX_PER_PAGE = 25
MAX_API_PER_MIN = 90
SEED_BATCH_SIZE = 50

url = 'https://graphql.anilist.co'

# Storage 
base_path = Path().cwd().parent
storage_path = base_path / Path('raw/entity/anime-character')
seed_path = base_path / Path('raw/seed/top-anime.csv')

seeds = duckdb.read_csv(str(seed_path))
total_rows = seeds.shape[0]

template_path = Path(r'graphql-template\get-anime-character.graphql')
with open(str(template_path), 'r') as fp:
    template = fp.read()


In [None]:
for batches in range(500, seeds.shape[0], SEED_BATCH_SIZE):

    seeds_batch = duckdb.sql(f"""
        SELECT media_id 
        FROM seeds
        LIMIT {SEED_BATCH_SIZE}
        OFFSET {batches}
    """).fetchall()
    ids = [_[0] for _ in seeds_batch]

    print(f"Batch: {batches}")

    queue = asyncio.Queue(SEED_BATCH_SIZE)
    # start the consumer
    _ = asyncio.create_task(consumer(queue, storage_path))
    # start the producer and wait for it to finish
    await asyncio.create_task(producer(queue, ids, url, template, MAX_PER_PAGE))
    # wait for all items to be processed
    await queue.join()

In [15]:
filenames = [filename.name.split('-')[0] for filename in storage_path.glob('*.json')]

In [None]:
print(len(filenames))
print(len(set(filenames)))