In [1]:
import os
import time
import requests
import pandas as pd

In [2]:
# Root project directoty (where this notebook lives)
PROJECT_ROOT = os.getcwd()

DATA_RAW_DIR = os.path.join(PROJECT_ROOT, "data_raw")
DATA_CLEAN_DIR = os.path.join(PROJECT_ROOT, "data_clean")
DATA_FINAL_DIR = os.path.join(PROJECT_ROOT, "data_final")

# Create directories if they don't exit
os.makedirs(DATA_RAW_DIR, exist_ok=True)
os.makedirs(DATA_CLEAN_DIR, exist_ok=True)
os.makedirs(DATA_FINAL_DIR, exist_ok=True)

PROJECT_ROOT, DATA_RAW_DIR

('c:\\Users\\Christopher\\Documents\\Python Projects\\New_York_City_Eviction',
 'c:\\Users\\Christopher\\Documents\\Python Projects\\New_York_City_Eviction\\data_raw')

In [3]:
BASE_URL = "https://data.cityofnewyork.us/resource/6z8x-wfk4.json"

# SODA2 pageination settings
LIMIT = 50000 # max rows per request (SODA2 usally caps at 50,00)
SLEEP_SECONDS = 0.25  # pause between calls to be polite / avoid throttling

In [4]:
def fetch_all_evictions(base_url: str, limit: int = 50000, sleep_seconds: float = 0.25):
    all_rows = []
    offset = 0
    batch_num =1

    while True:
        params = {
            "$limit": limit,
            "$offset": offset
        }

        print(f"Requesting batch {batch_num} | offset={offset} ...", end=" ")

        response = requests.get(base_url, params=params)
        response.raise_for_status()
        batch = response.json()

        batch_size = len(batch)
        print(f"received {batch_size} rows")

        if batch_size == 0:
            # No more data
            break

        all_rows.extend(batch)

        # Prepare next loop
        offset += limit
        batch_num += 1

        # Be gentle on the API
        time.sleep(sleep_seconds)

    print(f"\nDone. Total rows fetched: {len(all_rows)}")
    return all_rows

In [5]:
all_rows = fetch_all_evictions(BASE_URL, limit=LIMIT, sleep_seconds=SLEEP_SECONDS)

# Convert to DataFrame
evictions_raw = pd.DataFrame(all_rows)
print("DataFrame shape:", evictions_raw.shape)

# Save snapshot
raw_csv_path = os.path.join(DATA_RAW_DIR, "evictions_raw.csv")
evictions_raw.to_csv(raw_csv_path, index=False)

raw_csv_path

Requesting batch 1 | offset=0 ... received 50000 rows
Requesting batch 2 | offset=50000 ... received 50000 rows
Requesting batch 3 | offset=100000 ... received 20084 rows
Requesting batch 4 | offset=150000 ... received 0 rows

Done. Total rows fetched: 120084
DataFrame shape: (120084, 20)


'c:\\Users\\Christopher\\Documents\\Python Projects\\New_York_City_Eviction\\data_raw\\evictions_raw.csv'

In [6]:
evictions_raw.head(20)

Unnamed: 0,court_index_number,docket_number,eviction_address,eviction_apt_num,executed_date,marshal_first_name,marshal_last_name,residential_commercial_ind,borough,eviction_zip,ejectment,eviction_possession,latitude,longitude,community_board,council_district,census_tract,bin,bbl,nta
0,B051541/15,360641,120 WEST 183 STREET,15,2017-01-05T00:00:00.000,Richard,McCoy,Residential,BRONX,10453,Not an Ejectment,Possession,40.859892,-73.91015,7.0,14.0,255.0,2014902.0,2032230034.0,Kingsbridge Heights
1,2141/19,351881,1716 NEREID AVENUE,2,2019-04-26T00:00:00.000,Thomas,Bia,Residential,BRONX,10466,Not an Ejectment,Possession,40.897898,-73.852183,12.0,11.0,434.0,2069945.0,2050480062.0,Woodlawn-Wakefield
2,309099/23,17684,728 E. 136TH STREET 2ND FLOOR,,2023-12-15T00:00:00.000,Robert,Renzulli,Commercial,BRONX,10454,Not an Ejectment,Possession,40.803276,-73.913123,1.0,17.0,19.0,2003768.0,2025640047.0,Mott Haven-Port Morris
3,11149/16,5212,1490 BRYANT AVE 3RD FL RIGHT SIDE,,2017-11-22T00:00:00.000,Robert,Renzulli,Residential,BRONX,10460,Ejectment,Possession,,,,,,,,
4,7463/19,489650,767 EAST 229TH STREE T,*,2019-06-13T00:00:00.000,Danny,Weinheim,Residential,BRONX,10466,Not an Ejectment,Possession,40.890068,-73.857116,12.0,12.0,422.0,2063589.0,2048430008.0,Williamsbridge-Olinville
5,315671/23,18777,224 WEST 35TH STREET SUITE 1405,,2024-01-03T00:00:00.000,Robert,Renzulli,Commercial,MANHATTAN,10001,Not an Ejectment,Possession,40.752034,-73.991053,5.0,3.0,109.0,1014414.0,1007840060.0,Midtown-Midtown South
6,K58735/19,102435,79 MILFORD STREET,3,2022-06-01T00:00:00.000,Ileana,Rivera,Residential,BROOKLYN,11208,Not an Ejectment,Possession,40.677955,-73.877926,5.0,37.0,1192.0,3093108.0,3041530005.0,East New York
7,092011/16,68058,2025 NOSTRAND AVENUE UNIT: GROUND FLOOR STORE ...,,2017-05-02T00:00:00.000,Henry,Daley,Commercial,BROOKLYN,11210,Not an Ejectment,Possession,,,,,,,,
8,079536/16,109572,89-17 AUBREY AVE,,2017-08-01T00:00:00.000,Maxine,Chevlowe,Residential,QUEENS,11385,Not an Ejectment,Possession,40.711121,-73.862445,5.0,30.0,637.0,4094204.0,4038510145.0,Glendale
9,318666/23,10447,330 EAST 22ND STREET,4A,2025-08-01T00:00:00.000,David,Smith,Residential,BROOKLYN,11226,Not an Ejectment,Possession,40.643188,-73.956482,14.0,40.0,792.0,3118875.0,3051650030.0,Erasmus


In [7]:
evictions_raw.dtypes

court_index_number            object
docket_number                 object
eviction_address              object
eviction_apt_num              object
executed_date                 object
marshal_first_name            object
marshal_last_name             object
residential_commercial_ind    object
borough                       object
eviction_zip                  object
ejectment                     object
eviction_possession           object
latitude                      object
longitude                     object
community_board               object
council_district              object
census_tract                  object
bin                           object
bbl                           object
nta                           object
dtype: object