In [1]:
import sys

sys.path.insert(0, "../src")

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

nyc_app_token = os.getenv("NYC_OPENDATA_APPTOKEN")

In [3]:
from data_downloader import OpenDataDownloader, GeometryFormatter

loader = OpenDataDownloader(nyc_app_token)

In [4]:
crashes = loader.load_data("crashes", limit=10000)
streets = loader.load_data("centerline")

In [5]:
crashes_gdf = GeometryFormatter(crashes).from_lat_long()
streets_gdf = GeometryFormatter(streets).from_geometry_column("the_geom")

In [6]:
from data_helpers import RoadFeaturesCalculator

collision_rates = RoadFeaturesCalculator(
    features_df=crashes_gdf, streets_df=streets_gdf
).calculate_road_features(
    id_column="collision_id", feature_name="collision_rate", date_column="crash_date"
)

In [7]:
collision_rates.head()

Unnamed: 0,physicalid,collision_rate
0,-1,0.0
1,10000,0.0
2,100000,0.0
3,100001,0.0
4,100003,0.0


In [8]:
trees = loader.load_data("trees")

In [9]:
trees_gdf = GeometryFormatter(trees).from_geometry_column("the_geom")

In [10]:
trees_gdf.head()

Unnamed: 0,created_at,tree_id,block_id,geometry,tree_dbh,stump_diam,curb_loc,status,health,spc_latin,...,st_assem,st_senate,nta,nta_name,boro_ct,state,latitude,longitude,x_sp,y_sp
0,08/27/2015,180683,348711,POINT (1027431.148 202756.769),3,0,OnCurb,Alive,Fair,Acer rubrum,...,28,16,QN17,Forest Hills,4073900,New York,40.72309177,-73.84421522,1027431.14821,202756.768749
1,09/03/2015,200540,315986,POINT (1034455.701 228644.837),21,0,OnCurb,Alive,Fair,Quercus palustris,...,27,11,QN49,Whitestone,4097300,New York,40.79411067,-73.81867946,1034455.70109,228644.837379
2,09/05/2015,204026,218365,POINT (1001822.831 200716.891),3,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,...,50,18,BK90,East Williamsburg,3044900,New York,40.71758074,-73.9366077,1001822.83131,200716.891267
3,09/05/2015,204337,217969,POINT (1002420.358 199244.253),10,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,...,53,18,BK90,East Williamsburg,3044900,New York,40.71353749,-73.93445616,1002420.35833,199244.253136
4,08/30/2015,189565,223043,POINT (990913.775 182202.426),21,0,OnCurb,Alive,Good,Tilia americana,...,44,21,BK37,Park Slope-Gowanus,3016500,New York,40.66677776,-73.97597938,990913.775046,182202.425999


In [11]:
trees_per_road = RoadFeaturesCalculator(
    features_df=trees_gdf, streets_df=streets_gdf
).calculate_road_features(
    id_column="tree_id", feature_name="number_of_trees", buffer=40
)

In [12]:
trees_per_road.number_of_trees.value_counts()

number_of_trees
0.0    121617
1.0        92
2.0         6
4.0         2
3.0         1
Name: count, dtype: int64