In [None]:
# https://data.cityofchicago.org/Transportation/CTA-Ridership-Daily-Boarding-Totals/6iiy-9s97/about_data

import requests
import pandas as pd
import json
import time
import os
from datetime import datetime, timedelta

SAVE_DIR = "../../data/raw data"
os.makedirs(SAVE_DIR, exist_ok=True)

# Socrata API endpoint
BASE_URL = "https://data.cityofchicago.org/resource/6iiy-9s97.json"

# SODA 2.1 API Parameters (Using SQL-like query)
LIMIT = 1000000  # Maximum records per request
WINDOW_SIZE = 5
DATA = []

def fetch_data():
    OFFSET = 0
    temp_data = []

    while True:
        query = f"""
            SELECT * 
            LIMIT {LIMIT} OFFSET {OFFSET}
        """

        params = {"$query": query}  # Using SODA 2.1 SQL query
        response = requests.get(BASE_URL, params=params)

        if response.status_code != 200:
            print(f"❌ Error: {response.status_code} - {response.text}")
            break

        records = response.json()

        if not records:
            print("✅ No more data available. Fetching completed.")
            break

        temp_data.extend(records)
        print(f"✅ Fetched {len(records)} records, total: {len(temp_data)} records.")

        OFFSET += LIMIT  # Increase offset for pagination
        time.sleep(1)  # Prevent rate limiting

    return temp_data

daily_data = fetch_data()
if daily_data:
    DATA.extend(daily_data)

# **Convert and save as CSV**
df = pd.DataFrame(DATA)
csv_file = os.path.join(SAVE_DIR, "chicago_public_transportation.csv")
df.to_csv(csv_file, index=False)
print(f"📁 Data saved to {csv_file}")

✅ Fetched 8828 records, total: 8828 records.
✅ No more data available. Fetching completed.
📁 Data saved to ../../data/raw data/chicago_bus_2020.csv
