In [None]:
import pandas as pd
import plotly.express as px
import requests

In [9]:
# get data
def load_all_nyc_data(base_url, batch_size=2000):
    all_records = []
    offset = 0

    print(f"Start downloading: {base_url}")

    while True:
        params = {"$limit": batch_size, "$offset": offset}

        try:
            response = requests.get(base_url, params=params)
            response.raise_for_status()
            data = response.json()
            if not data:
                break

            all_records.extend(data)

            print(
                f"Acquired: {len(data)} lines (Total: {len(all_records)} lines) - Offset: {offset}"
            )

            offset += batch_size

        except requests.exceptions.RequestException as e:
            print(f"A request error occurred: {e}")
            break

    print(f"Finished! Total {len(all_records)} lines.")
    return pd.DataFrame(all_records)


url = "https://data.cityofnewyork.us/resource/7479-ugqb.json"
df = load_all_nyc_data(url)

Start downloading: https://data.cityofnewyork.us/resource/7479-ugqb.json
Acquired: 2000 lines (Total: 2000 lines) - Offset: 0
Acquired: 2000 lines (Total: 4000 lines) - Offset: 2000
Acquired: 2000 lines (Total: 6000 lines) - Offset: 4000
Acquired: 794 lines (Total: 6794 lines) - Offset: 6000
Finished! Total 6794 lines.


In [10]:
# inspect
print(df.head())
print(df.info())

   inmateid              admitted_dt custody_level bradh race gender age  \
0  10011390  2025-12-27T20:16:05.000           MED     Y    B      M  44   
1  20218749  2025-09-22T12:47:00.000           MED     Y    O      F  42   
2  20231403  2024-05-16T16:55:00.000           MED     Y    B      M  24   
3  20160043  2025-08-29T10:58:28.000           MIN     N    O      M  26   
4  10010169  2024-11-16T12:47:39.000           MAX     Y    B      M  34   

  inmate_status_code sealed srg_flg top_charge infraction  
0                 CS      N       N     155.25          N  
1                DEP      N       N     155.30          Y  
2                 DE      N       Y     125.25          Y  
3                 DE      N       N     140.25          N  
4                 DE      N       N     220.16          Y  
<class 'pandas.DataFrame'>
RangeIndex: 6794 entries, 0 to 6793
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype
---  ------              -------------- 

In [None]:
if "race" in df.columns:
    df["race"] = df["race"].fillna("Unknown")
if "custody_level" in df.columns:
    df["custody_level"] = df["custody_level"].fillna("Unknown")

fig = px.histogram(
    df,
    x="race",
    color="custody_level",
    barmode="stack",
    title="NYC Inmates: Race vs Custody Level Distribution",
    labels={"race": "Race", "custody_level": "Custody Level"},
    text_auto=True,
)

fig.update_layout(title_x=0.5)

fig.show()