In [6]:
pip install tqdm



In [8]:
SELECT_COLS = [
    "unique_key",
    "created_date",
    "closed_date",
    "status",
    "agency",
    "agency_name",
    "complaint_type",
    "descriptor",
    "descriptor_2",
    "borough",
    "city",
    "incident_zip",
    "street_name",
    "incident_address",
    "latitude",
    "longitude",
    "community_board",
    "council_district",
    "police_precinct",
    "resolution_description"
]

SELECT_CLAUSE = ", ".join(SELECT_COLS)

In [9]:
import requests
import pandas as pd
from tqdm import tqdm

URL = "https://data.cityofnewyork.us/api/v3/views/erm2-nwe9/query.json"

APP_TOKEN = "Z6cufGGN2BN79mZ6xsZWx6QUW"
HEADERS = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "X-App-Token": APP_TOKEN,
}

PAGE_SIZE = 50000
page_number = 1
chunks = []
total_rows = 0

pbar = tqdm(desc="Downloading NYC 311 rows", unit="rows")

while True:
    payload = {
        "query": f"SELECT {SELECT_CLAUSE}",
        "page": {"pageNumber": page_number, "pageSize": PAGE_SIZE},
        "includeSynthetic": False
    }

    r = requests.post(URL, json=payload, headers=HEADERS)
    if r.status_code != 200:
        print("Status:", r.status_code)
        print(r.text[:2000])
        r.raise_for_status()

    data = r.json()

    # handle both response shapes
    if isinstance(data, list):
        rows = data
    else:
        rows = data.get("results", [])

    if not rows:
        break

    df_chunk = pd.DataFrame(rows)
    chunks.append(df_chunk)

    print(f"Page {page_number} -> {len(df_chunk):,} rows")
    page_number += 1


df = pd.concat(chunks, ignore_index=True)
print("Final shape:", df.shape)
df.head()


Downloading NYC 311 rows: 0rows [03:29, ?rows/s]


Page 1 -> 50,000 rows
Page 2 -> 50,000 rows
Page 3 -> 50,000 rows
Page 4 -> 50,000 rows
Page 5 -> 50,000 rows
Page 6 -> 50,000 rows
Page 7 -> 50,000 rows
Page 8 -> 50,000 rows
Page 9 -> 50,000 rows
Page 10 -> 50,000 rows
Page 11 -> 50,000 rows
Page 12 -> 50,000 rows
Page 13 -> 50,000 rows
Page 14 -> 50,000 rows
Page 15 -> 50,000 rows
Page 16 -> 50,000 rows
Page 17 -> 50,000 rows
Page 18 -> 50,000 rows
Page 19 -> 50,000 rows
Page 20 -> 50,000 rows
Page 21 -> 50,000 rows
Page 22 -> 50,000 rows
Page 23 -> 50,000 rows
Page 24 -> 50,000 rows
Page 25 -> 50,000 rows
Page 26 -> 50,000 rows
Page 27 -> 50,000 rows
Page 28 -> 50,000 rows
Page 29 -> 50,000 rows
Page 30 -> 50,000 rows
Page 31 -> 50,000 rows
Page 32 -> 50,000 rows
Page 33 -> 50,000 rows
Page 34 -> 50,000 rows
Page 35 -> 50,000 rows
Page 36 -> 50,000 rows
Page 37 -> 50,000 rows
Page 38 -> 50,000 rows
Page 39 -> 50,000 rows
Page 40 -> 50,000 rows
Page 41 -> 50,000 rows
Page 42 -> 50,000 rows
Page 43 -> 50,000 rows
Page 44 -> 50,000 ro

KeyboardInterrupt: 

In [10]:
df = pd.concat(chunks, ignore_index=True)
print("Final shape:", df.shape)
df.head()

Final shape: (3850000, 20)


Unnamed: 0,unique_key,created_date,status,agency,agency_name,complaint_type,descriptor,borough,city,incident_zip,street_name,incident_address,latitude,longitude,community_board,council_district,police_precinct,descriptor_2,resolution_description,closed_date
0,67513805,2026-01-17T02:05:59.000,In Progress,NYPD,New York City Police Department,Illegal Parking,Blocked Hydrant,BROOKLYN,BROOKLYN,11209,78 STREET,511 78 STREET,40.626933545572655,-74.02302720428978,10 BROOKLYN,47,Precinct 68,,,
1,67512397,2026-01-17T02:05:27.000,In Progress,NYPD,New York City Police Department,Noise - Commercial,Loud Music/Party,BRONX,BRONX,10458,BRIGGS AVENUE,2645 BRIGGS AVENUE,40.86535764249122,-73.892495473103,07 BRONX,15,Precinct 52,,,
2,67509505,2026-01-17T02:05:26.000,In Progress,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Talking,MANHATTAN,NEW YORK,10002,ELDRIDGE STREET,107 ELDRIDGE STREET,40.71807035293164,-73.992020365249,03 MANHATTAN,1,Precinct 5,,,
3,67515109,2026-01-17T02:05:13.000,In Progress,NYPD,New York City Police Department,Blocked Driveway,No Access,QUEENS,CORONA,11368,109 STREET,36-12 109 STREET,40.75460458314396,-73.85879988501665,03 QUEENS,21,Precinct 115,,,
4,67516547,2026-01-17T02:05:11.000,In Progress,NYPD,New York City Police Department,Noise - Commercial,Loud Talking,MANHATTAN,NEW YORK,10003,EAST 7 STREET,79 EAST 7 STREET,40.72729771351166,-73.98648830466516,03 MANHATTAN,2,Precinct 9,,,


In [11]:
df.to_csv("nyc_311_clean.csv", index=False)