In [4]:
%cd /tf/notebooks/

/tf/notebooks


In [13]:
import requests
import json
import time

url = 'https://graphql.anilist.co'
headers = {'Content-Type': 'application/json'}

query = '''
{
  Page(page: %d, perPage: 10) {
    pageInfo {
      total
      perPage
      currentPage
      hasNextPage
    }
    media(type: ANIME, startDate_greater: 2015) {
      title {
        romaji
        english
      }
      startDate {
        year
        month
        day
      }
      characters(role: MAIN) {
        edges {
          node {
            name {
              full
            }
            image {
              medium
            }
          }
        }
      }
    }
  }
}
'''

data_file = 'anime_data_and_progress.json'  # File to store data and progress

try:
    with open(data_file, 'r') as file:
        data = json.load(file)
        page = data['progress']['currentPage'] + 1
        print(f"Resuming from page {page}")
except FileNotFoundError:
    print("No data file found. Starting from the beginning.")
    data = {'animeData': [], 'progress': {'total': 0, 'currentPage': 1}}
    json.dump(data, open(data_file, 'w'))
    page = 1

while True:
    start_time = time.time()
    payload = {'query': query % page}
    response = requests.post(url, headers=headers, json=payload)

    if response.status_code == 200:
        result = response.json()
        if not result['data']['Page']['media']:
            break  # No more data available, exit the loop

        data['animeData'].extend(result['data']['Page']['media'])
        data['progress']['currentPage'] = page
        data['progress']['total'] = result['data']['Page']['pageInfo']['total']
        page += 1

        # Save data and progress after each page fetch
        with open(data_file, 'w') as file:
            json.dump(data, file)

        # Check time elapsed for rate limiting
        elapsed_time = time.time() - start_time
        if elapsed_time < 60 / 90:  # Ensure no more than 90 requests per minute
            time.sleep((60 / 90) - elapsed_time)
    else:
        print('Failed to fetch data')
        break

print('Data and progress saved to anime_data_and_progress.json')


Resuming from page 369
Failed to fetch data
Data and progress saved to anime_data_and_progress.json
