In [5]:
from pathlib import Path

RAW   = Path("../data/raw")
CLEAN = Path("../data/clean")
OUT   = Path("../outputs")
for p in (RAW, CLEAN, OUT): p.mkdir(parents=True, exist_ok=True)

assert (RAW/"311_noise.csv").exists(),  "Missing 311_noise.csv in data/raw"
assert (RAW/"sla_active.csv").exists(), "Missing sla_active.csv in data/raw"
print("Found raw files.")


Found raw files.


In [4]:
import re, numpy as np

TARGET_PREFIXES = ("103", "112")  # 103xx = Staten Island, 112xx = Brooklyn

def fix_zip(val):
    s = str(val)
    m = re.search(r"(\d{5})", s)
    return m.group(1) if m else np.nan

def is_target_zip(z):
    z = str(z)
    return (len(z) == 5) and (z[:3] in TARGET_PREFIXES)

def borough_from_zip(z):
    z = str(z)
    if z.startswith("103"): return "Staten Island"
    if z.startswith("112"): return "Brooklyn"
    return "Other"


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests


In [8]:
# Load the raw 311 file
noise_raw = pd.read_csv(RAW/"311_noise.csv", low_memory=False)
noise = noise_raw.copy()
noise.columns = [c.strip().lower().replace(" ", "_") for c in noise.columns]

# Auto-detect common column names (adjust if yours differ)
zip_col_candidates   = [c for c in noise.columns if c in ("incident_zip","zip","incident_address_zip","zipcode")]
type_col_candidates  = [c for c in noise.columns if c in ("complaint_type","complaint")]
date_col_candidates  = [c for c in noise.columns if "created" in c and "date" in c]  # e.g., created_date

if not zip_col_candidates:
    print("311 headers I see (first 30):", list(noise.columns)[:30])
    raise ValueError("Couldn't find a ZIP column in 311 CSV. Add the correct name to zip_col_candidates.")

zip_col  = zip_col_candidates[0]
type_col = type_col_candidates[0] if type_col_candidates else None
date_col = date_col_candidates[0] if date_col_candidates else None

# Keep only Noise complaints (safety)
if type_col:
    noise = noise[noise[type_col].str.startswith("Noise", na=False)].copy()

# Clean ZIP + keep only BK/SI ZCTAs
noise["zip"] = noise[zip_col].apply(fix_zip)
noise = noise[noise["zip"].apply(is_target_zip)].copy()

# Hour-of-day (for heatmap)
if date_col:
    noise["created_dt"] = pd.to_datetime(noise[date_col], errors="coerce")
    noise["hour"] = noise["created_dt"].dt.hour

print("Rows after filtering to BK + SI Noise:", len(noise))
noise.head()


Rows after filtering to BK + SI Noise: 217982


Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,location_type,incident_zip,incident_address,...,bridge_highway_name,bridge_highway_direction,road_ramp,bridge_highway_segment,latitude,longitude,location,zip,created_dt,hour
1,66465967,10/14/2025 01:51:14 AM,,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,11212.0,181 ROCKAWAY PARKWAY,...,,,,,40.662317,-73.921442,"(40.66231674694474, -73.92144177201318)",11212,2025-10-14 01:51:14,1
4,66471764,10/14/2025 01:50:02 AM,,NYPD,New York City Police Department,Noise - Residential,Banging/Pounding,Residential Building/House,11214.0,1869 83 STREET,...,,,,,40.608101,-73.999384,"(40.60810108364675, -73.99938414563323)",11214,2025-10-14 01:50:02,1
11,66467429,10/14/2025 01:46:20 AM,,NYPD,New York City Police Department,Noise - Residential,Banging/Pounding,Residential Building/House,11226.0,101 WOODRUFF AVENUE,...,,,,,40.653708,-73.96271,"(40.65370793721925, -73.96270993293973)",11226,2025-10-14 01:46:20,1
13,66464509,10/14/2025 01:45:52 AM,,NYPD,New York City Police Department,Noise - Residential,Banging/Pounding,Residential Building/House,11212.0,270 MOTHER GASTON BOULEVARD,...,,,,,40.67277,-73.907673,"(40.672769616675254, -73.9076728513286)",11212,2025-10-14 01:45:52,1
29,66470371,10/14/2025 01:37:34 AM,,NYPD,New York City Police Department,Noise - Residential,Loud Talking,Residential Building/House,10309.0,168 MACGREGOR STREET,...,,,,,40.523635,-74.199475,"(40.52363478309076, -74.19947548745674)",10309,2025-10-14 01:37:34,1


In [10]:
# === Cell 4: Load & clean SLA (BK + SI) ===
sla_raw = pd.read_csv(RAW/"sla_active.csv", low_memory=False)
sla = sla_raw.copy()
sla.columns = [c.strip().lower().replace(" ", "_") for c in sla.columns]

# 1) Find the ZIP column (include 'zip_code')
sla_zip_candidates = [c for c in sla.columns if c in (
    "premise_zip", "premises_zip", "zip", "zipcode", "zip_code", "premises_zip_code"
)]
if not sla_zip_candidates:
    print("SLA headers I see (first 30):", list(sla.columns)[:30])
    raise ValueError("Couldn't find a ZIP column in SLA CSV. Add the correct name to sla_zip_candidates.")

zip_col_sla = sla_zip_candidates[0]

# 2) Clean ZIPs (ensure 5-digit strings even if the CSV stored integers)
sla["zip"] = sla[zip_col_sla].apply(fix_zip)

# 3) (Extra safety) Limit to Kings/Richmond counties if the column exists
if "premises_county" in sla.columns:
    sla = sla[sla["premises_county"].str.upper().isin(["KINGS", "RICHMOND"])]

# 4) Keep only Staten Island (103xx) and Brooklyn (112xx)
sla = sla[sla["zip"].apply(is_target_zip)].copy()

print("ZIP column used:", zip_col_sla)
print("Active licenses in BK + SI:", len(sla))
sla.head()


ZIP column used: zip_code
Active licenses in BK + SI: 134


Unnamed: 0,license_permit_id,premises_county,type,class,description,legalname,dba,actual_address_of_premises,additional_address_information,city,...,zip_code,original_issue_date,last_issue_date,effective_date,expiration_date,parent_license_id,legacy_serial_number,aka_address,georeference,zip
0,0340-22-106991,Richmond,1,340,Restaurant,MCMADD INC,GRIFF'S PLACE,702 NEW DORP LANE,NEW DROP LANE & HETT AVENUE,STATEN ISLAND,...,10306,04/01/2025,04/01/2025,09/01/2024,08/31/2026,,1226590,,POINT (-74.10047 40.56586),10306
1,0240-23-142229,Richmond,1,240,Restaurant,AVANTI PIZZA 1 INC,AVANTI PIZZA,170 NEW DORP LANE,,STATEN ISLAND,...,10306,04/30/2025,04/30/2025,05/01/2025,04/30/2027,,1334685,,POINT (-74.11514 40.57335),10306
2,0340-21-116837,Richmond,1,340,Restaurant,SAKAI RAMEN INC,SAKAI RAMEN & IZAKAYA,"965 RICHMOND AVE, STORE F",,STATEN ISLAND,...,10314,02/04/2025,02/04/2025,02/01/2024,01/31/2026,,1320779,,POINT (-74.14787 40.62349),10314
3,0240-23-139773,Richmond,1,240,Restaurant,SANDWICH AND PICKLE LLC,,1949 RICHMOND AVE,STORE #3,STATEN ISLAND,...,10314,07/30/2025,07/30/2025,08/01/2025,07/31/2027,,1353750,,POINT (-74.16259 40.6004),10314
4,0240-23-142528,Richmond,1,240,Restaurant,AVOCADO SUSHI I INC,,4906 ARTHUR KILL RD,STE B,STATEN ISLAND,...,10309,06/18/2025,06/18/2025,06/18/2025,01/31/2027,,1347142,,POINT (-74.23879 40.52253),10309
