In [None]:
# Install geopandas package in the notebook
!pip install geopandas

Collecting geopandas
  Using cached geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Using cached pyogrio-0.11.0-cp312-cp312-win_amd64.whl.metadata (5.4 kB)
Collecting pyproj>=3.3.0 (from geopandas)
  Using cached pyproj-3.7.1-cp312-cp312-win_amd64.whl.metadata (31 kB)
Collecting shapely>=2.0.0 (from geopandas)
  Using cached shapely-2.1.1-cp312-cp312-win_amd64.whl.metadata (7.0 kB)
Using cached geopandas-1.0.1-py3-none-any.whl (323 kB)
Using cached pyogrio-0.11.0-cp312-cp312-win_amd64.whl (19.2 MB)
Using cached pyproj-3.7.1-cp312-cp312-win_amd64.whl (6.3 MB)
Using cached shapely-2.1.1-cp312-cp312-win_amd64.whl (1.7 MB)
Installing collected packages: shapely, pyproj, pyogrio, geopandas
Successfully installed geopandas-1.0.1 pyogrio-0.11.0 pyproj-3.7.1 shapely-2.1.1


In [18]:
# Import the required packages
import pandas as pd
import geopandas as gpd
from shapely import wkt
import json


In [19]:
# Read the csv
df = pd.read_csv('NYC_BIDS.csv', keep_default_na=True, delimiter=',', skipinitialspace=True)

In [21]:
# Filter by Manhattan
print("Available boroughs:", df['F_ALL_BI_1'].unique())
manhattan_bids = df[df['F_ALL_BI_1'].str.upper() == 'MANHATTAN'].copy()
print(f"Manhattan BIDs found: {len(manhattan_bids)}")

Available boroughs: ['Queens' 'Brooklyn' 'Manhattan' 'Bronx' 'Staten Island']
Manhattan BIDs found: 26


In [25]:
# Trim  columns
essential_columns = {
    'the_geom': 'geometry',     # For spatial join with crime points
    'Id': 'bid',               # For joining final crime scores  
    'F_ALL_BI_2': 'name',      # BID names for display
    'F_ALL_BI_1': 'boro_name'  # Verification column
}

print("Essential columns defined:", essential_columns)

Essential columns defined: {'the_geom': 'geometry', 'Id': 'bid', 'F_ALL_BI_2': 'name', 'F_ALL_BI_1': 'boro_name'}


In [29]:
# Keep only available essential columns
available_columns = {old: new for old, new in essential_columns.items() if old in manhattan_bids.columns}
print(f"Available columns to keep: {list(available_columns.keys())}")

# Select only the columns that exist in the data
manhattan_bids_trimmed = manhattan_bids[list(available_columns.keys())].copy()

# Rename columns to cleaner names  
manhattan_bids_trimmed = manhattan_bids_trimmed.rename(columns=available_columns)
print(f"Final columns after renaming: {list(manhattan_bids_trimmed.columns)}")
print(f"Dataset shape: {manhattan_bids_trimmed.shape}")

Available columns to keep: ['the_geom', 'Id', 'F_ALL_BI_2', 'F_ALL_BI_1']
Final columns after renaming: ['geometry', 'bid', 'name', 'boro_name']
Dataset shape: (26, 4)


In [None]:
# Parse WKT geometry 
manhattan_bids_trimmed['geometry_parsed'] = manhattan_bids_trimmed['geometry'].apply(wkt.loads)

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(
    manhattan_bids_trimmed.drop('geometry', axis=1), 
    geometry='geometry_parsed',
    crs='EPSG:4326'
)

# Save as GeoJSON
gdf.to_file("manhattan_bids.geojson", driver="GeoJSON")

Step 5: Converting to GeoDataFrame and saving...
Saved: manhattan_bids.geojson
Ready for crime analysis with 26 Manhattan BIDs
