In [None]:
%pip install tqdm httpx aiolimiter

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import time
import json
from tqdm.notebook import tqdm
import asyncio

from pathlib import Path
import httpx

In [None]:
params = {
    'view':0,
    'division':2,
    'region':0,
    'scaled':0,
    'sort':0
}


In [None]:
async def get_results(client, url, page, year) -> httpx.Response:
  response = await client.get(url, params={'page':page})
  data = response.json()
  return [{'year': year, **row} for row in data['leaderboardRows']]

async def get_all_pages(client: httpx.AsyncClient, url, year):
  all_results = []
  response = await client.get(url, params={'page':1})
  data = response.json()
  totalPages = data['pagination']['totalPages']
  
  todo = [get_results(client, url, p, year) for p in range(1,totalPages)]
  todo_iter = asyncio.as_completed(todo)
  for task in tqdm(todo_iter):
    t = await task
    all_results += t
  return all_results

def save_results(all_results, year):

  competitors = [{'year': row['year'], **row['entrant']} for row in all_results]
  overall_scores = [{
      'competitorId': row['entrant']['competitorId'],
      'overallRank': row['overallRank'],
      'overallScore': row['overallScore'],
      'year': row['year']
  } for row in all_results]
  event_scores = [{'competitorId':row['entrant']['competitorId'], 'year':row['year'], **scores} for row in all_results for scores in row['scores']]

  competitors_df = pd.DataFrame(competitors)
  overall_scores_df = pd.DataFrame(overall_scores)
  event_scores_df = pd.DataFrame(event_scores)

  file_string = '/content/drive/MyDrive/data/{year}-{file_name}-{gender}.{file_type}'

  # competitors_df.to_json(file_string.format(year=year,file_name='competitors', gender='female', file_type='json'))
  competitors_df.to_parquet(file_string.format(year=year,file_name='competitors', gender='female', file_type='parquet'))

  # overall_scores_df.to_json(file_string.format(year=year,file_name='overall-scores', gender='female', file_type='json'))
  overall_scores_df.to_parquet(file_string.format(year=year,file_name='overall-scores', gender='female', file_type='parquet'))

  # event_scores_df.to_json(file_string.format(year=year,file_name='event-scores', gender='female', file_type='json'))
  event_scores_df.to_parquet(file_string.format(year=year,file_name='event-scores', gender='female', file_type='parquet'))

async def main(year):
  all_results = []
  async with httpx.AsyncClient(params=params, timeout=600) as client:
    # Get first page
    url = f'https://c3po.crossfit.com/api/competitions/v2/competitions/open/{year}/leaderboards'
    all_results += await get_all_pages(client, url, year)
  
  save_results(all_results, year)
  return all_results


In [None]:
todo = [asyncio.create_task(main(year)) for year in range(2020, 2023)]

In [None]:
sum(task.done() for task in todo) / len(todo)

1.0