## Relevant POI data

dataset from Geofabric (http://download.geofabrik.de/north-america/us.html)

In [87]:
#import necessary packages
import pandas as pd

# Import package numpy for numeric computing
import numpy as np

# Import package matplotlib for visualisation/plotting
import matplotlib.pyplot as plt

import geopandas as gpd

from shapely.geometry import Point


In [88]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [89]:
poi_data = gpd.read_file('poi_shapefiles/gis_osm_pois_free_1.shp')

In [90]:
poi_data.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,42105926,2907,camera_surveillance,,POINT (-73.72159 43.05355)
1,42503788,2907,camera_surveillance,,POINT (-73.89108 40.65626)
2,42503792,2907,camera_surveillance,,POINT (-73.88998 40.65478)
3,42538083,2902,bench,,POINT (-73.9707 40.67335)
4,43058007,2204,park,Kibler Park,POINT (-78.67233 43.16559)


In [91]:
poi_data.shape

(119510, 5)

In [92]:
poi_data.isnull().sum()

osm_id          0
code            0
fclass          0
name        51148
geometry        0
dtype: int64

In [93]:
missing_poi=poi_data[poi_data['name'].isnull()]
print(missing_poi.head())

      osm_id  code               fclass  name                    geometry
0   42105926  2907  camera_surveillance  None  POINT (-73.72159 43.05355)
1   42503788  2907  camera_surveillance  None  POINT (-73.89108 40.65626)
2   42503792  2907  camera_surveillance  None  POINT (-73.88998 40.65478)
3   42538083  2902                bench  None   POINT (-73.9707 40.67335)
9  111266407  2742            viewpoint  None  POINT (-78.90175 42.89644)


In [94]:
missing_poi['fclass'].unique()

array(['camera_surveillance', 'bench', 'viewpoint', 'tourist_info',
       'fire_station', 'memorial', 'tower', 'ruins', 'toilet',
       'lighthouse', 'water_tower', 'drinking_water', 'convenience',
       'bank', 'post_office', 'school', 'comms_tower', 'post_box',
       'artwork', 'fountain', 'playground', 'prison', 'theatre',
       'restaurant', 'museum', 'waste_basket', 'recycling_glass', 'atm',
       'telephone', 'picnic_site', 'hotel', 'shelter', 'recycling',
       'supermarket', 'police', 'pub', 'water_mill', 'laundry',
       'gift_shop', 'optician', 'hostel', 'garden_centre', 'pharmacy',
       'bar', 'fast_food', 'cafe', 'hairdresser', 'courthouse', 'bakery',
       'vending_machine', 'monument', 'guesthouse', 'graveyard',
       'vending_any', 'dentist', 'car_sharing', 'florist', 'water_well',
       'car_dealership', 'beverages', 'clothes', 'market_place',
       'camp_site', 'pitch', 'car_wash', 'library', 'doityourself',
       'caravan_site', 'observation_tower', 'ki

In [95]:
poi_data[poi_data['name'].notnull()].head()


Unnamed: 0,osm_id,code,fclass,name,geometry
4,43058007,2204,park,Kibler Park,POINT (-78.67233 43.16559)
5,60700639,2404,guesthouse,Inn By The Park,POINT (-75.17913 44.8968)
6,75427609,2723,monument,Tri-State Marker,POINT (-73.48734 42.04954)
7,75427609,2721,attraction,Tri-State Marker,POINT (-73.48734 42.04954)
8,103786325,2082,school,Northwood Elementary School,POINT (-78.74288 42.86341)


In [96]:
#Replace missing values with the fclass
poi_data['name'] = poi_data['name'].fillna(poi_data['fclass'])

In [97]:
poi_data.isnull().sum()

osm_id      0
code        0
fclass      0
name        0
geometry    0
dtype: int64

In [98]:
tracts = gpd.read_file('../census tract geofiles/manhattan_census_tracts.geojson')

In [99]:
tracts.head()

Unnamed: 0,GEOID,CTLabel,NTAName,NTA2020,CDTA2020,CDTANAME,BoroName,CT2020,BoroCT2020,geometry
0,36061000100,1.0,The Battery-Governors Island-Ellis Island-Libe...,MN0191,MN01,MN01 Financial District-Tribeca (CD 1 Equivalent),Manhattan,100,1000100,"MULTIPOLYGON (((-74.04388 40.69019, -74.04351 ..."
1,36061001401,14.01,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,1401,1001401,"MULTIPOLYGON (((-73.98837 40.71645, -73.98754 ..."
2,36061001402,14.02,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,1402,1001402,"MULTIPOLYGON (((-73.98507 40.71908, -73.98423 ..."
3,36061001800,18.0,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,1800,1001800,"MULTIPOLYGON (((-73.98985 40.72052, -73.98972 ..."
4,36061002201,22.01,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,2201,1002201,"MULTIPOLYGON (((-73.97875 40.71993, -73.97879 ..."


In [100]:
print(tracts.crs)
print(poi_data.crs)

EPSG:4326
EPSG:4326


In [101]:
#Only keep tracts that have a POI
joined = gpd.sjoin(tracts, poi_data, how= 'inner', predicate='intersects')

In [102]:
joined.head()

Unnamed: 0,GEOID,CTLabel,NTAName,NTA2020,CDTA2020,CDTANAME,BoroName,CT2020,BoroCT2020,geometry,index_right,osm_id,code,fclass,name
1,36061001401,14.01,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,1401,1001401,"MULTIPOLYGON (((-73.98837 40.71645, -73.98754 ...",88712,11038072613,2906,waste_basket,waste_basket
1,36061001401,14.01,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,1401,1001401,"MULTIPOLYGON (((-73.98837 40.71645, -73.98754 ...",88705,11038072593,2902,bench,bench
1,36061001401,14.01,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,1401,1001401,"MULTIPOLYGON (((-73.98837 40.71645, -73.98754 ...",88706,11038072594,2902,bench,bench
1,36061001401,14.01,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,1401,1001401,"MULTIPOLYGON (((-73.98837 40.71645, -73.98754 ...",88710,11038072598,2902,bench,bench
1,36061001401,14.01,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),Manhattan,1401,1001401,"MULTIPOLYGON (((-73.98837 40.71645, -73.98754 ...",88711,11038072612,2906,waste_basket,waste_basket


In [103]:
# ✅ Count POIs per tract
tract_poi_counts = joined.groupby("GEOID").size().reset_index(name="poi_count")

# ✅ Merge with original tracts (to retain those with 0 POIs)
tracts_with_poi = tracts.merge(tract_poi_counts, on="GEOID", how="left")
tracts_with_poi["poi_count"] = tracts_with_poi["poi_count"].fillna(0).astype(int)

# ✅ Normalize to 0–10 score (min-max scaling)
max_poi = tracts_with_poi["poi_count"].max()
min_poi = tracts_with_poi["poi_count"].min()
tracts_with_poi["poi_score"] = ((tracts_with_poi["poi_count"] - min_poi) / (max_poi - min_poi) * 10).round(2)

# ✅ Export only relevant columns to CSV
tracts_with_poi[["GEOID", "poi_count", "poi_score"]].to_csv("tract_level_poi.csv", index=False)
