In [58]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import csv

# Loading shapefile
shapefile = gpd.read_file('data/NTA map/geo_export_49ef428f-9550-420f-9953-bdb0fb30adec.shp')
# Loading cooling towerpoints
towers = pd.read_csv('data/bronx_detections.csv')

In [59]:
# Convert long and lat to geometric points
towers['geometry'] = towers.apply(lambda row: Point(row['longitude (deg)'], row['latitude (deg)']), axis=1)

# Convert points to GeoDataFrame
points_gdf = gpd.GeoDataFrame(towers, geometry='geometry')

Ensure that both the shapefile and the points are in the same coordinate reference system (CRS). If they are not, reproject one of them.

In [60]:
# Check the CRS of the shapefile
print(shapefile.crs)

# Reproject points if needed to match the shapefile CRS
points_gdf = points_gdf.set_crs("EPSG:4326")  # Assuming the points are in WGS84 (latitude, longitude)
points_gdf = points_gdf.to_crs(shapefile.crs)

EPSG:4326


In [61]:
# Perform spatial join
labeled = gpd.sjoin(points_gdf, shapefile, how="left", predicate="within")

# The resulting dataframe will contain the points and the corresponding labels from the shapefile
labeled

Unnamed: 0,id,selected,inside_boundary,meets threshold,latitude (deg),longitude (deg),distance from center (m),address,confidence,geometry,index_right,borocode,boroname,countyfips,ntacode,ntaname,shape_area,shape_leng
0,0,False,True,True,40.702250,-73.979703,7675.3,"1 1st Ave, Brooklyn, NY 11251, United States",0.59,POINT (-73.9797 40.70225),187.0,3.0,Brooklyn,047,BK99,park-cemetery-etc-Brooklyn,2.287909e+08,394313.013412
1,1,True,True,True,40.691670,-73.988769,7330.5,"1 Boerum Pl, Brooklyn, NY 11201, USA",0.64,POINT (-73.98877 40.69167),184.0,3.0,Brooklyn,047,BK38,DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill,2.846257e+07,32363.990305
2,2,True,True,True,40.691692,-73.988756,7331.3,"1 Boerum Pl, Brooklyn, NY 11201, USA",0.62,POINT (-73.98876 40.69169),184.0,3.0,Brooklyn,047,BK38,DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill,2.846257e+07,32363.990305
3,3,True,True,True,40.655290,-73.912532,1304.8,"1 Brookdale Plaza, Brooklyn, NY 11212, USA",0.89,POINT (-73.91253 40.65529),4.0,3.0,Brooklyn,047,BK96,Rugby-Remsen Village,3.270695e+07,30957.853395
4,4,True,True,True,40.655400,-73.912612,1307.6,"1 Brookdale Plaza, Brooklyn, NY 11212, USA",0.86,POINT (-73.91261 40.6554),4.0,3.0,Brooklyn,047,BK96,Rugby-Remsen Village,3.270695e+07,30957.853395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1152,1152,False,True,True,40.684728,-74.007031,8120.6,"Hugh L. Carey Tunnel, Brooklyn, NY 11231, Unit...",0.32,POINT (-74.00703 40.68473),177.0,3.0,Brooklyn,047,BK33,Carroll Gardens-Columbia Street-Red Hook,4.460027e+07,60795.911417
1153,1153,True,True,True,40.656292,-73.945334,2027.0,"Kings county hopital, 441 Clarkson Ave, Brookl...",0.69,POINT (-73.94533 40.65629),128.0,3.0,Brooklyn,047,BK60,Prospect Lefferts Gardens-Wingate,3.164212e+07,29580.537705
1154,1154,True,True,True,40.656294,-73.945311,2025.4,"Kings county hopital, 441 Clarkson Ave, Brookl...",0.67,POINT (-73.94531 40.65629),128.0,3.0,Brooklyn,047,BK60,Prospect Lefferts Gardens-Wingate,3.164212e+07,29580.537705
1155,1155,False,True,True,40.612022,-73.845034,7776.3,"New York, NY 11234, United States",0.60,POINT (-73.84503 40.61202),187.0,3.0,Brooklyn,047,BK99,park-cemetery-etc-Brooklyn,2.287909e+08,394313.013412


In [62]:
print(labeled[['latitude (deg)', 'longitude (deg)', 'ntacode']])

      latitude (deg)  longitude (deg) ntacode
0          40.702250       -73.979703    BK99
1          40.691670       -73.988769    BK38
2          40.691692       -73.988756    BK38
3          40.655290       -73.912532    BK96
4          40.655400       -73.912612    BK96
...              ...              ...     ...
1152       40.684728       -74.007031    BK33
1153       40.656292       -73.945334    BK60
1154       40.656294       -73.945311    BK60
1155       40.612022       -73.845034    BK99
1156       40.585051       -73.901241     NaN

[1157 rows x 3 columns]


In [63]:
labeled = labeled.dropna()
labeled = labeled.loc[labeled['selected'] == True]

# Count the occurrences of values in column A using a dictionary
counts = labeled.groupby('ntacode').size().reset_index()
counts.rename(columns={'ntacode': 'NTA code', 0: 'Towers'}, inplace=True)
counts.set_index('NTA code', inplace=True)

Cleaning up data. Kept columns: 

In [64]:
with open('data/cleaned_data/nta_tower_count.csv', 'w', newline='') as file:
    counts.to_csv(file)