In [2]:
# Import required packages
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from sklearn.preprocessing import MinMaxScaler

# Load cleaned NYPD complaints data
complaints = pd.read_csv("nypd_complaints_cleaned.csv")

# Filter relevant columns and drop missing coordinates
manhattan_complaints = complaints[["Date", "Crime_Type", "Severity", "Latitude", "Longitude", "severity_weight"]]
manhattan_complaints = manhattan_complaints.dropna(subset=["Latitude", "Longitude"])

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    manhattan_complaints,
    geometry=gpd.points_from_xy(manhattan_complaints["Longitude"], manhattan_complaints["Latitude"]),
    crs="EPSG:4326"
)

# Load full census tract geometries (must include all GEOIDs)
tracts = gpd.read_file("../census tract geofiles/manhattan_census_tracts.geojson").to_crs(epsg=4326)

# Spatial join: match each crime point to the tract it falls within
joined = gpd.sjoin(gdf, tracts[["GEOID", "geometry"]], how="inner", predicate="within")

# Group by GEOID and sum severity weights
crime_by_tract = joined.groupby("GEOID")["severity_weight"].sum().reset_index(name="weighted_crime")

# Normalize weighted crimes to 1–10 scale (10 = safest)
scaler = MinMaxScaler(feature_range=(1, 10))
crime_by_tract["crime_score"] = (11 - scaler.fit_transform(crime_by_tract[["weighted_crime"]])).round(1)

# Merge with full list of tracts to ensure all are present
all_tracts = tracts[["GEOID"]].drop_duplicates()
final_scores = all_tracts.merge(crime_by_tract[["GEOID", "crime_score"]], on="GEOID", how="left")

# Fill missing scores with 10 (safest)
final_scores["crime_score"] = final_scores["crime_score"].fillna(10)

# Export to CSV
final_scores.to_csv("crime_scores_by_tract.csv", index=False)
print("✅ Saved: crime_scores_by_tract.csv with shape:", final_scores.shape)


✅ Saved: crime_scores_by_tract.csv with shape: (310, 2)
