In [1]:
#Importing needed libraries
import requests
import pandas as pd
from datetime import datetime
import geopandas as gpd
import time
from json import JSONDecodeError
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from statsmodels.discrete.count_model import ZeroInflatedPoisson

### # NYC BUILDING ENERGY DATA 

In [2]:
## There are mulitple sources of yearly data. Compiling the source URLs here as well as the API urls to grab
## privately owned buildings over 25,000 ft2 and in City-owned buildings over 10,000 ft2
building_energy_LL84_sources={"2022+":{
    "api":"https://data.cityofnewyork.us/resource/5zyy-y8am.json",
    "info":"https://data.cityofnewyork.us/Environment/NYC-Building-Energy-and-Water-Data-Disclosure-for-/5zyy-y8am/about_data"
                              },
                              "2021":{
    "api":"https://data.cityofnewyork.us/resource/7x5e-2fxh.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/7x5e-2fxh/about_data"
                              },
                              "2020":{
    "api":"https://data.cityofnewyork.us/resource/usc3-8zwd.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/usc3-8zwd/about_data"
                                  },
                              "2019":{
    "api":"https://data.cityofnewyork.us/resource/wcm8-aq5w.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/wcm8-aq5w/about_data"
                                  },
                              "2018":{
    "api":"https://data.cityofnewyork.us/resource/4tys-3tzj.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/4tys-3tzj/about_data"
                                  },
                              "2017":{
    "api":"https://data.cityofnewyork.us/resource/4t62-jm4m.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/4t62-jm4m/about_data"
                                  },
                              "2016":{
    "api":"https://data.cityofnewyork.us/resource/utpj-74fz.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/utpj-74fz/about_data"
                                  },
                              "2015":{
    "api":"https://data.cityofnewyork.us/resource/77q4-nkfh.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/77q4-nkfh/about_data"
                                  },
                              "2014":{
    "api":"https://data.cityofnewyork.us/resource/nbun-wekj.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/nbun-wekj/about_data"
                                  },
                              "2013":{
    "api":"https://data.cityofnewyork.us/resource/yr5p-wjer.json",
    "info":"http://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/yr5p-wjer"
                                  },
                              "2012":{
    "api":"https://data.cityofnewyork.us/resource/r6ub-zhff.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/r6ub-zhff/about_data"
                                  },
                              "2011":{
    "api":"https://data.cityofnewyork.us/resource/k7nh-aufb.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/k7nh-aufb/about_data"
                                  },
                              "2010":{
    "api":"https://data.cityofnewyork.us/resource/kswi-37bp.json",
    "info":"https://data.cityofnewyork.us/Environment/Energy-and-Water-Data-Disclosure-for-Local-Law-84-/kswi-37bp/about_data"
                                  },
#Monthly Data? 2018-2023
                              "Monthly":{
    "api":"https://data.cityofnewyork.us/resource/fvp3-gcb2.json",
    "info":"https://data.cityofnewyork.us/Environment/Local-Law-84-Monthly-Data/fvp3-gcb2/about_data"
    }}


In [48]:
PAGE = 1000               # <-- enforce 1,000 rows per page
TIMEOUT = 30
MAX_RETRIES = 5
BACKOFF_BASE = 1.5

session = requests.Session()
headers = {}

def fetch_all_rows_1k(api_url: str, source_years: str, source_info_url: str) -> pd.DataFrame:
    offset = 0
    frames = []

    while True:
        params = {"$limit": PAGE, "$offset": offset}
        # (Optional but recommended) add a stable order to avoid inconsistent paging
        # params["$order"] = ":id"  # uncomment if needed

        for attempt in range(1, MAX_RETRIES + 1):
            try:
                resp = session.get(api_url, params=params, headers=headers, timeout=TIMEOUT)

                # Retry on transient errors & rate limits
                if resp.status_code in (429, 500, 502, 503, 504):
                    time.sleep((BACKOFF_BASE ** attempt) + (0.1 * attempt))
                    continue

                resp.raise_for_status()

                if "json" not in resp.headers.get("Content-Type", "").lower():
                    preview = resp.text[:200]
                    raise ValueError(f"Non-JSON response (status {resp.status_code}): {preview}")

                data_chunk = resp.json()  # will be ≤ 1000 rows
                if not data_chunk:
                    # no more data
                    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()

                df = pd.DataFrame(data_chunk)
                df["source_years"] = source_years
                df["source_api_url"] = api_url
                df["source_info_url"] = source_info_url
                frames.append(df)

                # Stop if final page has < PAGE rows; otherwise advance by PAGE=1000
                if len(data_chunk) < PAGE:
                    return pd.concat(frames, ignore_index=True)
                offset += PAGE
                break  # success; next page

            except (requests.RequestException, JSONDecodeError, ValueError) as e:
                if attempt == MAX_RETRIES:
                    raise RuntimeError(f"Failed fetching {api_url} at offset {offset}: {e}") from e
                time.sleep((BACKOFF_BASE ** attempt) + (0.1 * attempt))

# --- your existing mapping dict (unchanged) ---
# building_energy_LL84_sources = { ... }  # keep your dict as-is

In [None]:
agg_running_list = []
for k, v in building_energy_LL84_sources.items():
    print(f"Fetching {k} -> {v['api']}")
    df = fetch_all_rows_1k(v["api"], k, v["info"])
    if not df.empty:
        agg_running_list.append(df)
    else:
        print(f"Warning: no rows returned for {k} ({v['api']}).")

nyc_building_energy = pd.concat(agg_running_list, ignore_index=True) if agg_running_list else pd.DataFrame()
print(f"Total rows: {len(nyc_building_energy)}")

Fetching 2022+ -> https://data.cityofnewyork.us/resource/5zyy-y8am.json
Fetching 2021 -> https://data.cityofnewyork.us/resource/7x5e-2fxh.json
Fetching 2020 -> https://data.cityofnewyork.us/resource/usc3-8zwd.json
Fetching 2019 -> https://data.cityofnewyork.us/resource/wcm8-aq5w.json
Fetching 2018 -> https://data.cityofnewyork.us/resource/4tys-3tzj.json
Fetching 2017 -> https://data.cityofnewyork.us/resource/4t62-jm4m.json
Fetching 2016 -> https://data.cityofnewyork.us/resource/utpj-74fz.json
Fetching 2015 -> https://data.cityofnewyork.us/resource/77q4-nkfh.json
Fetching 2014 -> https://data.cityofnewyork.us/resource/nbun-wekj.json
Fetching 2013 -> https://data.cityofnewyork.us/resource/yr5p-wjer.json
Fetching 2012 -> https://data.cityofnewyork.us/resource/r6ub-zhff.json
Fetching 2011 -> https://data.cityofnewyork.us/resource/k7nh-aufb.json
Fetching 2010 -> https://data.cityofnewyork.us/resource/kswi-37bp.json
Fetching Monthly -> https://data.cityofnewyork.us/resource/fvp3-gcb2.json
To

In [None]:
## Formatting into proper df
print(nyc_building_energy.shape)
## Saving to File
nyc_building_energy.to_csv("nyc_dob_energy_2010_2024.csv",index=False)

(2044010, 852)


In [3]:
nyc_building_energy=  pd.read_csv("nyc_dob_energy_2010_2024.csv")

  nyc_building_energy=  pd.read_csv("nyc_dob_energy_2010_2024.csv")


## Cleaning Start

In [36]:
### Columns to Drop because potentially irrelevant or redundant for 
nyc_building_energy_processing = nyc_building_energy.copy()
nyc_building_energy_processing = nyc_building_energy_processing.drop(columns=['data_center_ups_output_meter',
 'data_center_pdu_input_meter',
 'data_center_pdu_output_meter',
 'data_center_it_equipment',
 'data_center_it_site_energy',
 'data_center_it_source_energy',
 'data_center_pue',
 'data_center_national_median',
 'data_center_gross_floor_area',
 'data_center_ups_system',
 'data_center_it_energy',
 'data_center_cooling_equipment',
'supermarkets_grocery_gross',
 'supermarkets_grocery_number',
 'supermarkets_grocery_number_1',
 'supermarkets_grocery_number_2',
 'supermarkets_grocery_percent',
 'supermarkets_grocery_presence',
 'supermarkets_grocery_walk',
 'supermarkets_grocery_weekly',
 'supermarkets_grocery_workers',
'house_of_worship_gross_floor',
 'house_of_worship_pc_density',
 'house_of_worship_weekly',
 'house_of_worship_presence',
'residence_halls_dormitories',
'medical_office_percent_cooled',
 'residence_halls_dormitories_1',
 'residence_halls_dormitories_2',
 'residence_halls_dormitories_3',
 'residence_halls_dormitories_4',
'hotel_onsite_laundry_short',
 'warehouse_unrefrigerated',
 'warehouse_unrefrigerated_1',
 'warehouse_unrefrigerated_2',
 'warehouse_unrefrigerated_3',
 'warehouse_unrefrigerated_4',
 'warehouse_unrefrigerated_5',
 'warehouse_unrefrigerated_6',
 'hospital_gross_floor_area',
 'hospital_laboratory_y_1_n',
 'hospital_laundry_facility',
 'hospital_maximum_number_of',
 'hospital_number_of_buildings',
 'warehouse_refrigerated_gross',
 'warehouse_refrigerated_weekly',
 'warehouse_refrigerated_workers',
 'hospital_number_of_licensed',
 'multifamily_home_dishwashers'])
nyc_building_energy_processing["primary_property_type"] = nyc_building_energy_processing["primary_property_type"].combine_first(nyc_building_energy_processing["primary_property_type_epa"])
nyc_building_energy_processing = nyc_building_energy_processing.drop(columns=["primary_property_type_epa"])
nyc_building_energy_processing["primary_property_type_self"] = nyc_building_energy_processing["primary_property_type_self"].combine_first(nyc_building_energy_processing["primary_property_type_self_selected"])
nyc_building_energy_processing = nyc_building_energy_processing.drop(columns=["primary_property_type_self_selected"])
residential = nyc_building_energy_processing[(
    (nyc_building_energy_processing['primary_property_type'].isin(['Multifamily Housing','Single-Family Home']))
    |(nyc_building_energy_processing['primary_property_type_self'].isin(['Multifamily Housing','Single-Family Home'])))]
residential = residential.dropna(how='all',axis=1)


In [None]:
"Not Applicable: Standalone Property"

In [44]:
sorted(list(residential.columns))
#.groupby(["parent_property_id"]).agg({"report_year":"count"})

['_2nd_largest_property_use',
 '_2nd_largest_property_use_1',
 '_2nd_largest_property_use_gross_floor_area_ft',
 '_2nd_largest_property_use_type',
 '_3rd_largest_property_use',
 '_3rd_largest_property_use_1',
 '_3rd_largest_property_use_type',
 '_3rd_largest_property_use_type_gross_floor_area_ft',
 'address_1',
 'address_2',
 'adult_education_gross_floor',
 'adult_education_gross_floor_area_ft',
 'aggregate_meter_s_district_steam_number_of_individual_meters_included',
 'aggregate_meter_s_electric_number_of_individual_meters_included',
 'aggregate_meter_s_natural_gas_number_of_individual_meters_included',
 'alert_data_center_issue_with',
 'alert_data_center_issue_with_estimates_it_configuration_or_it_meter',
 'alert_energy_meter_has',
 'alert_energy_meter_has_gaps',
 'alert_energy_meter_has_less',
 'alert_energy_meter_has_less_than_12_full_calendar_months_of_data',
 'alert_energy_meter_has_overlaps',
 'alert_energy_meter_has_single',
 'alert_energy_meter_has_single_entry_more_than_65_da

In [29]:
## All rows have one of these two values.
nyc_building_energy_processing[((nyc_building_energy_processing['primary_property_type'].isnull())
                                &(nyc_building_energy_processing['primary_property_type_self'].isnull()))]

Unnamed: 0,report_year,property_id,property_name,parent_property_id,parent_property_name,year_ending,nyc_borough_block_and_lot,nyc_building_identification,address_1,city,...,total_on_site_electric,calendar_year,month,district_steam_use_kbtu_,electricity_use_grid_kbtu_,electricity_use_onsite_renewables_kbtu_,fuel_oil_1_use_monthly_kbtu_,fuel_oil_2_use_monthly_kbtu_,fuel_oil_4_use_monthly_kbtu_,fuel_oil_5_6_use_monthly_kbtu_


Unnamed: 0,report_year,property_id,property_name,parent_property_id,parent_property_name,year_ending,nyc_borough_block_and_lot,nyc_building_identification,address_1,city,...,total_on_site_electric,calendar_year,month,district_steam_use_kbtu_,electricity_use_grid_kbtu_,electricity_use_onsite_renewables_kbtu_,fuel_oil_1_use_monthly_kbtu_,fuel_oil_2_use_monthly_kbtu_,fuel_oil_4_use_monthly_kbtu_,fuel_oil_5_6_use_monthly_kbtu_
1,2022.0,9793770,1870 Pelham Parkway South,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2022-12-31T00:00:00.000,2042500026,2047795,1870 Pelham Parkway South,Bronx,...,,,,,,,,,,
3,2022.0,13511507,215 East 99th Street,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2022-12-31T00:00:00.000,1016490009,1052383,215 East 99th Street,New York,...,,,,,,,,,,
5,2022.0,14377690,1680 Ocean Ave,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2022-12-31T00:00:00.000,3067300001,3180535,1680 Ocean Ave,Brooklyn,...,,,,,,,,,,
8,2022.0,15176247,88-24 Merrick Blvd,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2022-12-31T00:00:00.000,4098150067,4210063,88-24 Merrick Blvd,Jamaica,...,,,,,,,,,,
9,2022.0,15176327,90-11 149th Street,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2022-12-31T00:00:00.000,4096790052,4206819,90-11 149 str,Jamaica,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128578,,3111724,DEPM 2820 - 408 East 79th St,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2019-12-31T00:00:00.000,1-01473-7504,1046037,408 East 79th Street,New York,...,,,,,,,,,,
128579,,3111728,DEPM 2730 - 860 Park Ave,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2019-12-31T00:00:00.000,1-01392-0036,1041828,860 Park Avenue,New York,...,,,,,,,,,,
128580,,3111748,DEPM 2680 - 825 Fifth Ave,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2019-12-31T00:00:00.000,1-01378-0070,1041056,825 Fifth Avenue,New York,...,,,,,,,,,,
128581,,3111749,DEPM 2676 - 300 East 23rd St,Not Applicable: Standalone Property,Not Applicable: Standalone Property,2019-12-31T00:00:00.000,1-00928-7504,1088559,300 East 23rd Street,New York,...,,,,,,,,,,


In [32]:
# nyc_building_energy_processing['primary_property_type'].unique() #'Multifamily Housing','Single-Family Home'
nyc_building_energy_processing['primary_property_type_self'].unique() 

array(['Non-Refrigerated Warehouse', 'Multifamily Housing',
       'Mixed Use Property', 'Medical Office', 'Office', 'Enclosed Mall',
       'Worship Facility', 'Hotel', 'Manufacturing/Industrial Plant',
       'Residence Hall/Dormitory', 'Retail Store',
       'Self-Storage Facility', 'Performing Arts',
       'Refrigerated Warehouse', 'Supermarket/Grocery Store',
       'K-12 School', 'Distribution Center',
       'Repair Services (Vehicle, Shoe, Locksmith, etc.)', 'Laboratory',
       'Parking', 'Outpatient Rehabilitation/Physical Therapy',
       'Hospital (General Medical & Surgical)',
       'Residential Care Facility', 'Transportation Terminal/Station',
       'Financial Office', 'Bank Branch', 'Other - Lodging/Residential',
       'Bowling Alley', 'Senior Living Community', 'Strip Mall',
       'Adult Education', 'College/University',
       'Fitness Center/Health Club/Gym', 'Automobile Dealership',
       'Pre-school/Daycare', 'Other - Recreation', 'Food Service',
       'Movi

In [27]:
to_drop = ['data_center_ups_output_meter',
 'data_center_pdu_input_meter',
 'data_center_pdu_output_meter',
 'data_center_it_equipment',
 'data_center_it_site_energy',
 'data_center_it_source_energy',
 'data_center_pue',
 'data_center_national_median',
 'data_center_gross_floor_area',
 'data_center_ups_system',
 'data_center_it_energy',
 'data_center_cooling_equipment',
'supermarkets_grocery_gross',
 'supermarkets_grocery_number',
 'supermarkets_grocery_number_1',
 'supermarkets_grocery_number_2',
 'supermarkets_grocery_percent',
 'supermarkets_grocery_presence',
 'supermarkets_grocery_walk',
 'supermarkets_grocery_weekly',
 'supermarkets_grocery_workers',
'house_of_worship_gross_floor',
 'house_of_worship_pc_density',
 'house_of_worship_weekly',
 'house_of_worship_presence',
'residence_halls_dormitories',
'medical_office_percent_cooled',
 'residence_halls_dormitories_1',
 'residence_halls_dormitories_2',
 'residence_halls_dormitories_3',
 'residence_halls_dormitories_4',
'hotel_onsite_laundry_short',
 'warehouse_unrefrigerated',
 'warehouse_unrefrigerated_1',
 'warehouse_unrefrigerated_2',
 'warehouse_unrefrigerated_3',
 'warehouse_unrefrigerated_4',
 'warehouse_unrefrigerated_5',
 'warehouse_unrefrigerated_6',
 'hospital_gross_floor_area',
 'hospital_laboratory_y_1_n',
 'hospital_laundry_facility',
 'hospital_maximum_number_of',
 'hospital_number_of_buildings',
 'warehouse_refrigerated_gross',
 'warehouse_refrigerated_weekly',
 'warehouse_refrigerated_workers',
 'hospital_number_of_licensed',
 'multifamily_home_dishwashers',
 'data_center_energy_estimates',
 'adult_education_gross_floor',
 'ambulatory_surgical_center',
 'automobile_dealership_gross',
 'bank_branch_gross_floor_area',
 'barracks_gross_floor_area',
 'college_university_gross',
 'convention_center_gross_floor',
 'courthouse_gross_floor_area',
 'distribution_center_gross',
 'enclosed_mall_gross_floor',
 'energy_power_station_gross',
 'financial_office_gross_floor',
 'financial_office_number_of',
 'financial_office_weekly',
 'financial_office_number_of_1',
 'fitness_center_health_club',
 'food_sales_gross_floor_area',
 'food_service_gross_floor',
 'hospital_general_medical',
 'hospital_general_medical_1',
 'hospital_general_medical_2',
 'hospital_general_medical_3',
 'hospital_general_medical_4',
 'hospital_general_medical_5',
 'hospital_general_medical_6',
 'hospital_general_medical_7',
 'hospital_general_medical_8',
 'hospital_general_medical_9',
 'hospital_general_medical_10',
 'medical_office_gross_floor',
 'medical_office_number_of',
 'medical_office_number_of_1',
 'medical_office_number_of_2',
 'medical_office_percent_that',
 'medical_office_percent_that_1',
 'medical_office_weekly',
 'outpatient_rehabilitation',
 'urgent_care_clinic_other',
 'hotel_gross_floor_area_ft',
 'hotel_gym_fitness_center',
 'k_12_school_gross_floor_area',
 'pre_school_daycare_gross',
 'laboratory_gross_floor_area',
 'mailing_center_post_office',
 'movie_theater_gross_floor',
 'manufacturing_industrial',
 'multifamily_housing_government',
 'multifamily_housing_gross',
 'multifamily_housing_number',
 'multifamily_housing_number_1',
 'multifamily_housing_number_2',
 'multifamily_housing_number_3',
 'multifamily_housing_number_4',
 'multifamily_housing_percent',
 'multifamily_housing_percent_1',
 'multifamily_housing_total',
 'multifamily_housing_total_1',
 'multifamily_housing_number_5',
 'multifamily_housing_number_6',
 'multifamily_housing_number_7',
 'multifamily_housing_resident',
 'residence_hall_dormitory',
 'refrigerated_warehouse_gross',
 'non_refrigerated_warehouse',
 'office_gross_floor_area_ft',
 'office_number_of_computers',
 'office_number_of_workers',
 'office_percent_that_can_be',
 'office_percent_that_can_be_1',
 'office_weekly_operating_hours',
 'office_worker_density_number',
 'other_gross_floor_area_ft',
 'museum_gross_floor_area_ft',
 'performing_arts_gross_floor',
 'parking_gross_floor_area',
 'parking_open_parking_lot',
 'parking_completely_enclosed',
 'parking_partially_enclosed',
 'restaurant_gross_floor_area',
 'restaurant_weekly_operating',
 'restaurant_worker_density',
 'retail_store_gross_floor',
 'retail_store_number_of_walk',
 'retail_store_number_of_open',
 'self_storage_facility_gross',
 'senior_living_community_gross',
 'senior_living_community_living',
 'social_meeting_hall_gross',
 'supermarket_grocery_gross',
 'supermarket_grocery_number',
 'supermarket_grocery_number_1',
 'wholesale_club_supercenter',
 'wholesale_club_supercenter_1',
 'wastewater_treatment_plant',
 'worship_facility_gross_floor',
 'strip_mall_gross_floor_area',
          'senior_care_facility_average',
 'senior_care_facility_gross',
 'senior_care_facility_number',
 'senior_care_facility_number_1',
 'senior_care_facility_number_2',
 'senior_care_facility_number_3',
 'senior_care_facility_number_4',
 'senior_care_facility_percent',
 'senior_care_facility_total',
 'senior_care_facility_total_1',
 'senior_care_facility_workers',
            'hotel_floor_area_of_full',
 'hotel_floor_area_of_gym',
 'hotel_average_occupancy',
            'bank_financial_institution',
 'bank_financial_institution_1',
 'bank_financial_institution_2',
 'bank_financial_institution_3',
 'bank_financial_institution_4',
 'bank_financial_institution_5',
 'k_12_school_high_school_y',
 'k_12_school_open_weekends',
 'k_12_school_pc_density',
 'k_12_school_percent_cooled',
 'k_12_school_walk_in_refrig',
 'k_12_school_presence_of',
 'swimming_pool_size',
 'swimming_pool_indoor_outdoor',
           'adult_education_gross_floor_area_ft',
 'ambulatory_surgical_center_gross_floor_area_ft',
 'automobile_dealership_gross_floor_area_ft',
 'bank_branch_gross_floor_area_ft',
 'barracks_gross_floor_area_ft',
 'college_university_gross_floor_area_ft',
 'convention_center_gross_floor_area_ft',
 'courthouse_gross_floor_area_ft',
 'data_center_gross_floor_area_ft',
 'data_center_ups_system_redundancy',
 'data_center_cooling_equipment_redundancy',
 'data_center_it_energy_configuration',
 'data_center_energy_estimates_applied',
 'data_center_ups_output_meter_kwh',
 'data_center_pdu_input_meter_kwh',
 'data_center_pdu_output_meter_kwh',
 'data_center_it_equipment_input_meter_kwh',
 'data_center_it_site_energy_kwh',
 'data_center_it_source_energy_kbtu',
 'data_center_national_median_pue',
 'distribution_center_gross_floor_area_ft',
 'enclosed_mall_gross_floor_area_ft',
 'energy_power_station_gross_floor_area_ft',
 'financial_office_gross_floor_area_ft',
 'financial_office_number_of_computers',
 'financial_office_weekly_operating_hours',
 'financial_office_number_of_workers_on_main_shift',
 'fitness_center_health_club_gym_gross_floor_area_ft',
 'food_sales_gross_floor_area_ft',
 'food_service_gross_floor_area_ft',
 'hospital_general_medical_surgical_full_time_equivalent_fte_workers',
 'hospital_general_medical_surgical_gross_floor_area_ft',
 'hospital_general_medical_surgical_laboratory',
 'hospital_general_medical_surgical_licensed_bed_capacity',
 'hospital_general_medical_surgical_number_of_mri_machines',
 'hospital_general_medical_surgical_mri_density_number_per_1_000_sq_ft',
 'hospital_general_medical_surgical_number_of_staffed_beds',
 'hospital_general_medical_surgical_staffed_bed_density_number_per_1_000_sq_ft',
 'hospital_general_medical_surgical_number_of_workers_on_main_shift',
 'hospital_general_medical_surgical_number_of_workers_on_main_shift_density_number_per_1_000_sq_ft',
 'hospital_general_medical_surgical_percent_that_can_be_cooled',
 'hospital_general_medical_surgical_percent_that_can_be_heated',
 'medical_office_gross_floor_area_ft',
 'medical_office_number_of_computers',
 'medical_office_number_of_mri_machines',
 'medical_office_number_of_workers_on_main_shift',
 'medical_office_percent_that_can_be_cooled',
 'medical_office_percent_that_can_be_heated',
 'medical_office_weekly_operating_hours',
 'outpatient_rehabilitation_physical_therapy_gross_floor_area_ft',
 'urgent_care_clinic_other_outpatient_gross_floor_area_ft',
 'hotel_gym_fitness_center_floor_area_ft',
 'k_12_school_gross_floor_area_ft',
 'pre_school_daycare_gross_floor_area_ft',
 'laboratory_gross_floor_area_ft',
 'mailing_center_post_office_gross_floor_area_ft',
 'movie_theater_gross_floor_area_ft',
 'manufacturing_industrial_plant_gross_floor_area_ft',
 'multifamily_housing_government_subsidized_housing',
 'multifamily_housing_gross_floor_area_ft',
 'multifamily_housing_number_of_bedrooms',
 'multifamily_housing_number_of_bedrooms_density_number_per_1_000_sq_ft',
 'multifamily_housing_number_of_laundry_hookups_in_all_units',
 'multifamily_housing_number_of_laundry_hookups_in_common_area_s',
 'multifamily_housing_number_of_residential_living_units_in_a_high_rise_building_10_or_more_stories',
 'multifamily_housing_total_number_of_residential_living_units',
 'multifamily_housing_total_number_of_residential_living_units_density_number_per_1_000_sq_ft',
 'multifamily_housing_number_of_residential_living_units_in_a_high_rise_building_density_number_per_1_000_sq_ft',
 'multifamily_housing_number_of_residential_living_units_in_a_mid_rise_building_5_9_stories',
 'multifamily_housing_number_of_residential_living_units_in_a_mid_rise_building_density_number_per_1_000_sq_ft',
 'multifamily_housing_percent_that_can_be_cooled',
 'multifamily_housing_percent_that_can_be_heated',
 'multifamily_housing_resident_population_type',
 'residence_hall_dormitory_gross_floor_area_ft',
 'non_refrigerated_warehouse_gross_floor_area_ft',
 'office_number_of_workers_on_main_shift',
 'office_percent_that_can_be_cooled',
 'office_percent_that_can_be_heated',
 'office_worker_density_number_per_1_000_sq_ft',
 'performing_arts_gross_floor_area_ft',
 'parking_gross_floor_area_ft',
 'parking_completely_enclosed_parking_garage_size_ft',
 'parking_open_parking_lot_size_ft',
 'parking_partially_enclosed_parking_garage_size_ft',
 'refrigerated_warehouse_gross_floor_area_ft',
 'restaurant_gross_floor_area_ft',
 'restaurant_weekly_operating_hours',
 'restaurant_worker_density_number_per_1_000_sq_ft',
 'retail_store_gross_floor_area_ft',
 'retail_store_number_of_walk_in_refrigeration_freezer_units',
 'retail_store_number_of_open_or_closed_refrigeration_freezer_units',
 'self_storage_facility_gross_floor_area_ft',
 'senior_living_community_gross_floor_area_ft',
 'senior_living_community_living_unit_density_number_per_1_000_sq_ft',
 'social_meeting_hall_gross_floor_area_ft',
 'supermarket_grocery_gross_floor_area_ft',
 'supermarket_grocery_number_of_open_or_closed_refrigeration_freezer_units',
 'supermarket_grocery_number_of_walk_in_refrigeration_freezer_units',
 'wholesale_club_supercenter_gross_floor_area_ft',
 'wholesale_club_supercenter_exterior_entrance_to_the_public',
 'wastewater_treatment_plant_gross_floor_area_ft',
 'worship_facility_gross_floor_area_ft',
           'hotel_quantity_of_laundry',
           'third_party_certification',
 'third_party_certification_1',
 'third_party_certification_2',
            'hospital_general_medical_11',
 'hospital_general_medical_12',
 'retail_store_percent_that',
 'senior_care_community_gross',
 'senior_care_community_living',
           'other_weekly_operating_hours',
 'other_number_of_computers',
 'other_number_of_workers_on',
            'other_computer_density_number',
 'repair_services_vehicle_shoe',
 'retail_store_walk_in',
 'retail_store_open_or_closed',
 'hotel_room_density_number',
 'hotel_worker_density_number',
 'hotel_percent_that_can_be',
           'supermarket_grocery_open',
 'convenience_store_with_gas',
 'convenience_store_with_gas_1',
           'fast_food_restaurant_gross',
 'hospital_general_medical_13',
 'hospital_general_medical_14',
 'hospital_general_medical_15',
 'hospital_general_medical_16',
 'hospital_general_medical_17',
 'medical_office_mri_machine',
 'library_gross_floor_area',
           'convenience_store_with_gas_2',
           'office_computer_density_number',
           'bar_nightclub_gross_floor',
 'convenience_store_without',
 'personal_services_health',
 'race_track_gross_floor_area',
 'transportation_terminal',
 'veterinary_office_gross_floor',
 'vocational_school_gross_floor',
            'retail_store_cash_register',
 'retail_store_computer_density',
           'aquarium_gross_floor_area',
 'bowling_alley_gross_floor',
 'casino_gross_floor_area_ft',
 'data_center_pdu_input_site',
 'data_center_pdu_output_site',
 'ice_curling_rink_gross_floor',
 'indoor_arena_gross_floor',
 'laboratory_number_of_computers',
 'lifestyle_center_gross_floor',
 'multifamily_housing_maximum',
 'multifamily_housing_percent_2',
 'multifamily_housing_primary',
           'roller_rink_gross_floor_area',
 'stadium_closed_gross_floor',
 'stadium_open_gross_floor',
 'zoo_gross_floor_area_ft',
           'retail_store_worker_density',
 'retail_store_weekly_operating',
           'bank_branch_percent_that',
 'bank_branch_computer_density',
 'bank_branch_weekly_operating',
 'bank_branch_worker_density',
 'bank_branch_number_of_workers',
 'hotel_full_service_spa_floor',
 'hotel_number_of_rooms',
 'hotel_amount_of_laundry',
 'hotel_type_of_laundry_facility',
 'hotel_cooking_facilities',
 'worship_facility_computer',
 'worship_facility_weekly',
 'worship_facility_cooking',
 'k_12_school_high_school',
 'k_12_school_weekend_operation',
 'k_12_school_computer_density',
 'k_12_school_percent_that',
 'k_12_school_refrigeration',
 'k_12_school_cooking_facilities',
 'residence_hall_dormitory_1',
 'residence_hall_dormitory_2',
 'residence_hall_dormitory_3',
 'residence_hall_dormitory_4',
 'retail_store_exterior_entrance',
 'senior_care_community_average',
 'senior_care_community_number',
 'senior_care_community_number_1',
 'senior_care_community_number_2',
 'senior_care_community_number_3',
 'senior_care_community_number_4',
 'senior_care_community_percent',
 'senior_care_community_number_5',
 'senior_care_community_maximum',
 'senior_care_community_number_6',
 'supermarket_grocery_number_2',
 'supermarket_grocery_percent',
 'supermarket_grocery_cooking',
 'supermarket_grocery_walk',
 'supermarket_grocery_weekly',
 'supermarket_grocery_worker',
 'swimming_pool_approximate',
 'swimming_pool_location_of',
 'swimming_pool_months_in_use',
 'refrigerated_warehouse_weekly',
 'refrigerated_warehouse_worker',
 'non_refrigerated_warehouse_1',
 'non_refrigerated_warehouse_2',
 'non_refrigerated_warehouse_3',
 'non_refrigerated_warehouse_4',
 'non_refrigerated_warehouse_5',
 'data_center_ups_output_site',
            'office_gross_floor_area_sq',
 'office_office_air_conditioned',
 'office_pc_density',
 'office_workers_density',
 'parking_enclosed_floor_area',
 'parking_non_enclosed_floor',
 'parking_open_floor_area_w',
 'parking_weekly_hours_of_access',
 'retail_gross_floor_area_sq',
 'retail_cash_register_density',
 'retail_exterior_entrance',
 'retail_workers_density',
 'retail_weekly_operating_hours',
 'retail_walk_in_refrig_density',
 'retail_percent_cooled',
 'retail_pc_density',
 'retail_open_closed_refrig',
 'retail_number_of_walk_in',
 'retail_number_of_open_or',
            'other_number_of_pcs',
 'other_workers_on_main_shift',
 'hotel_gross_floor_area_sq',
 'hotel_room_density',
 'hotel_percent_cooled',
 'hotel_presence_of_cooking',
 'hotel_workers_density',
            'multifamily_home_gross_floor',
 'multifamily_home_dishwasher',
 'multifamily_home_laundry',
 'multifamily_home_laundry_1',
 'multifamily_home_government',
 'multifamily_home_number_of',
 'multifamily_home_number_of_1',
 'multifamily_home_percent',
 'multifamily_home_percent_1',
 'multifamily_home_total_number',
 'multifamily_home_primary',
 'multifamily_home_resident',
 'other_space_type_name',
 'other_gross_floor_area_sq',
           'data_center_annual_it_energy',
            'number_of_mri_machines',
 'number_of_staffed_beds',
          ]

In [28]:
sorted([i for i in list(nyc_building_energy_processing.columns) if i not in to_drop])

['_2nd_largest_property_use',
 '_2nd_largest_property_use_1',
 '_2nd_largest_property_use_gross_floor_area_ft',
 '_2nd_largest_property_use_type',
 '_3rd_largest_property_use',
 '_3rd_largest_property_use_1',
 '_3rd_largest_property_use_type',
 '_3rd_largest_property_use_type_gross_floor_area_ft',
 'account_id',
 'address_1',
 'address_2',
 'aggregate_meter_s_district_steam_number_of_individual_meters_included',
 'aggregate_meter_s_electric_number_of_individual_meters_included',
 'aggregate_meter_s_natural_gas_number_of_individual_meters_included',
 'alert_data_center_does_not',
 'alert_data_center_issue_with',
 'alert_data_center_issue_with_estimates_it_configuration_or_it_meter',
 'alert_energy_meter_has',
 'alert_energy_meter_has_gaps',
 'alert_energy_meter_has_less',
 'alert_energy_meter_has_less_than_12_full_calendar_months_of_data',
 'alert_energy_meter_has_overlaps',
 'alert_energy_meter_has_single',
 'alert_energy_meter_has_single_entry_more_than_65_days',
 'alert_energy_no_met

In [10]:
 nyc_building_energy[['outpatient_rehabilitation',
 'urgent_care_clinic_other',
 'hotel_gross_floor_area_ft',
 'hotel_gym_fitness_center']].drop_duplicates()

Unnamed: 0,outpatient_rehabilitation,urgent_care_clinic_other,hotel_gross_floor_area_ft,hotel_gym_fitness_center
0,Not Available,Not Available,Not Available,Not Available
39,Not Available,Not Available,28148,Not Available
65,Not Available,Not Available,119685,747
83,Not Available,Not Available,28059,0
129,Not Available,Not Available,71286,410
...,...,...,...,...
126522,Not Available,Not Available,99301,Not Available
126597,Not Available,2985,Not Available,Not Available
126693,Not Available,Not Available,512000,Not Available
126771,Not Available,Not Available,60201,0


In [90]:
nyc_building_energy[["primary_property_type_portfolio_manager_calculated",'primary_property_type_self','primary_property_type_self_selected']][
(~nyc_building_energy["primary_property_type_self_selected"].isnull())
].drop_duplicates()
#]
# .drop_duplicates()

Unnamed: 0,primary_property_type_portfolio_manager_calculated,primary_property_type_self,primary_property_type_self_selected
94011,Office,,Office
94013,Financial Office,,Financial Office
94016,Hospital (General Medical & Surgical),,Hospital (General Medical & Surgical)
94023,Retail Store,,Retail Store
94029,Laboratory,,Laboratory
...,...,...,...
121370,Office,,Parking
121637,Parking,,Stadium (Open)
121659,Not Available,,Multifamily Housing
121732,Not Available,,Mixed Use Property


In [89]:
nyc_building_energy[["primary_property_type_portfolio_manager_calculated",'primary_property_type_self','primary_property_type_self_selected']][
    # ((nyc_building_energy["primary_property_type_self"].isnull())
(nyc_building_energy["primary_property_type_self_selected"].isnull()))].drop_duplicates()
#]
# .drop_duplicates()

SyntaxError: closing parenthesis ')' does not match opening parenthesis '[' on line 1 (67698167.py, line 3)

In [88]:
nyc_building_energy["primary_property_type_portfolio_manager_calculated"].unique()
{"primary_property_type_portfolio_manager_calculated":[]}

array([nan, 'Office', 'Financial Office',
       'Hospital (General Medical & Surgical)', 'Retail Store',
       'Laboratory', 'Mixed Use Property', 'Multifamily Housing',
       'Other - Specialty Hospital', 'Senior Living Community', 'Hotel',
       'K-12 School', 'Other - Education', 'College/University',
       'Stadium (Open)', 'Museum', 'Urgent Care/Clinic/Other Outpatient',
       'Vocational School', 'Non-Refrigerated Warehouse',
       'Medical Office', 'Residence Hall/Dormitory',
       'Other - Entertainment/Public Assembly', 'Data Center',
       'Not Available', 'Manufacturing/Industrial Plant',
       'Senior Care Community', 'Self-Storage Facility', 'Indoor Arena',
       'Distribution Center', 'Enclosed Mall', 'Parking', 'Other',
       'Library', 'Residential Care Facility', 'Courthouse',
       'Movie Theater', 'Strip Mall', 'Refrigerated Warehouse',
       'Repair Services (Vehicle, Shoe, Locksmith, etc.)',
       'Social/Meeting Hall', 'Ambulatory Surgical Center',


In [None]:
nyc_building_energy["primary_property_type_portfolio_manager_calculated"].unique()

In [94]:
nyc_building_energy[[#'national_median_reference_property_type',
    "largest_property_use_type",
 'primary_property_type',
 'primary_property_type_epa',
 #'primary_property_type_portfolio_manager_calculated',
 'primary_property_type_self',
 'primary_property_type_self_selected']].drop_duplicates()

Unnamed: 0,largest_property_use_type,primary_property_type,primary_property_type_epa,primary_property_type_self,primary_property_type_self_selected
0,Non-Refrigerated Warehouse,Non-Refrigerated Warehouse,,Non-Refrigerated Warehouse,
1,Multifamily Housing,Multifamily Housing,,Multifamily Housing,
2,Office,Mixed Use Property,,Mixed Use Property,
4,Medical Office,Medical Office,,Medical Office,
6,Office,Office,,Office,
...,...,...,...,...,...
281744,,,,Other - Education,
283029,,,,Retail Store,
283152,,,,Senior Care Community,
283176,,,,Worship Facility,


In [97]:
nyc_building_energy["largest_property_use_type"].unique()
# nyc_building_energy[['largest_property_use_type','largest_property_use_type_1']].drop_duplicates()

array(['Non-Refrigerated Warehouse', 'Multifamily Housing', 'Office',
       'Medical Office', 'Enclosed Mall', 'Worship Facility', 'Hotel',
       'Manufacturing/Industrial Plant', 'Residence Hall/Dormitory',
       'Retail Store', 'Self-Storage Facility', 'Performing Arts',
       'Refrigerated Warehouse', 'Supermarket/Grocery Store',
       'K-12 School', 'Distribution Center',
       'Repair Services (Vehicle, Shoe, Locksmith, etc.)', 'Laboratory',
       'Parking', 'Outpatient Rehabilitation/Physical Therapy',
       'Hospital (General Medical & Surgical)',
       'Residential Care Facility', 'Other',
       'Transportation Terminal/Station', 'Financial Office',
       'Bank Branch', 'Other - Lodging/Residential', 'Bowling Alley',
       'Senior Living Community', 'Strip Mall', 'Adult Education',
       'College/University', 'Fitness Center/Health Club/Gym',
       'Automobile Dealership', 'Pre-school/Daycare',
       'Other - Recreation', 'Restaurant', 'Food Service',
       'Mov

In [58]:
nyc_building_energy["nyc_building_identification_number_bin"][~nyc_building_energy["nyc_building_identification_number_bin"].isnull()].head()

94011    1022631
94012    1037545
94013    1022667
94014    1037596
94015    1085682
Name: nyc_building_identification_number_bin, dtype: object

In [101]:
[i for i in nyc_building_energy.columns if (('number' in i))]#|(('property_use' in i)))]

['number_of_buildings',
 'financial_office_number_of',
 'financial_office_number_of_1',
 'medical_office_number_of',
 'medical_office_number_of_1',
 'medical_office_number_of_2',
 'multifamily_housing_number',
 'multifamily_housing_number_1',
 'multifamily_housing_number_2',
 'multifamily_housing_number_3',
 'multifamily_housing_number_4',
 'multifamily_housing_number_5',
 'multifamily_housing_number_6',
 'multifamily_housing_number_7',
 'office_number_of_computers',
 'office_number_of_workers',
 'office_worker_density_number',
 'retail_store_number_of_walk',
 'retail_store_number_of_open',
 'supermarket_grocery_number',
 'supermarket_grocery_number_1',
 'number_of_active_energy_meters',
 'number_of_active_energy_meters_1',
 'number_of_active_energy_meters_2',
 'number_of_active_it_meters',
 'aggregate_meter_s_electric_number_of_individual_meters_included',
 'aggregate_meter_s_natural_gas_number_of_individual_meters_included',
 'aggregate_meter_s_district_steam_number_of_individual_met

In [11]:
list(nyc_building_energy_processing.columns)

['report_year',
 'property_id',
 'property_name',
 'parent_property_id',
 'parent_property_name',
 'year_ending',
 'nyc_borough_block_and_lot',
 'nyc_building_identification',
 'address_1',
 'city',
 'postal_code',
 'primary_property_type_self',
 'primary_property_type',
 'national_median_reference',
 'list_of_all_property_use',
 'largest_property_use_type',
 'largest_property_use_type_1',
 '_2nd_largest_property_use',
 '_2nd_largest_property_use_1',
 '_3rd_largest_property_use',
 '_3rd_largest_property_use_1',
 'year_built',
 'construction_status',
 'number_of_buildings',
 'occupancy',
 'metered_areas_energy',
 'metered_areas_water',
 'energy_star_score',
 'national_median_energy_star',
 'target_energy_star_score',
 'reason_s_for_no_score',
 'energy_star_certification',
 'energy_star_certification_1',
 'site_eui_kbtu_ft',
 'weather_normalized_site_eui',
 'national_median_site_eui',
 'site_energy_use_kbtu',
 'weather_normalized_site_energy',
 'electricity_weather_normalized',
 'electri

In [None]:
## May be interesting 
multifamily_home_market_rate

In [None]:
### Columns to Drop because potentially irrelevant or redundant for 
nyc_building_energy.drop(columns=['data_center_ups_output_meter',
 'data_center_pdu_input_meter',
 'data_center_pdu_output_meter',
 'data_center_it_equipment',
 'data_center_it_site_energy',
 'data_center_it_source_energy',
 'data_center_pue',
 'data_center_national_median',
 'data_center_gross_floor_area',
 'data_center_ups_system',
 'data_center_it_energy',
 'data_center_cooling_equipment',
'supermarkets_grocery_gross',
 'supermarkets_grocery_number',
 'supermarkets_grocery_number_1',
 'supermarkets_grocery_number_2',
 'supermarkets_grocery_percent',
 'supermarkets_grocery_presence',
 'supermarkets_grocery_walk',
 'supermarkets_grocery_weekly',
 'supermarkets_grocery_workers'
'house_of_worship_gross_floor',
 'house_of_worship_pc_density',
 'house_of_worship_weekly',
 'house_of_worship_presence'
'residence_halls_dormitories',
'medical_office_percent_cooled'
 'residence_halls_dormitories_1',
 'residence_halls_dormitories_2',
 'residence_halls_dormitories_3',
 'residence_halls_dormitories_4'
'hotel_onsite_laundry_short'
 'warehouse_unrefrigerated',
 'warehouse_unrefrigerated_1',
 'warehouse_unrefrigerated_2',
 'warehouse_unrefrigerated_3',
 'warehouse_unrefrigerated_4',
 'warehouse_unrefrigerated_5',
 'warehouse_unrefrigerated_6',
 'hospital_gross_floor_area',
 'hospital_laboratory_y_1_n',
 'hospital_laundry_facility',
 'hospital_maximum_number_of',
 'hospital_number_of_buildings',
 'warehouse_refrigerated_gross',
 'warehouse_refrigerated_weekly',
 'warehouse_refrigerated_workers',
 'hospital_number_of_licensed',
 'multifamily_home_dishwashers'])

In [103]:
nyc_building_energy[['supermarkets_grocery_gross']].drop_duplicates()

Unnamed: 0,supermarkets_grocery_gross
0,
283192,5000
283199,6665
283204,975
283205,2400
...,...
305166,52094
305167,49188
305168,42064
305169,36600


### Foresty Tree Points 

In [None]:

forestry = {
    "ALL":{
        "api":"https://data.cityofnewyork.us/resource/hn5i-inap.json",
        "info":"https://data.cityofnewyork.us/Environment/Forestry-Tree-Points/hn5i-inap/about_data"
    }
}

### NYC Tax Zoning 

In [52]:
tax_zoning = {
    "ALL":{
        "api":"https://data.cityofnewyork.us/resource/fdkv-4t4z.json",
        "info":"https://data.cityofnewyork.us/City-Government/NYC-Zoning-Tax-Lot-Database/fdkv-4t4z/about_data"
}

SyntaxError: incomplete input (940578819.py, line 5)

In [None]:
PAGE = 1000               # <-- enforce 1,000 rows per page
TIMEOUT = 30
MAX_RETRIES = 5
BACKOFF_BASE = 1.5

session = requests.Session()
headers = {}

agg_running_list = []
for k, v in tax_zoning.items():
    print(f"Fetching {k} -> {v['api']}")
    df = fetch_all_rows_1k(v["api"], k, v["info"])
    if not df.empty:
        agg_running_list.append(df)
    else:
        print(f"Warning: no rows returned for {k} ({v['api']}).")

nyc_tax_zoning = pd.concat(agg_running_list, ignore_index=True) if agg_running_list else pd.DataFrame()
print(f"Total rows: {len(nyc_tax_zoning)}")


In [None]:
nyc_tax_zoning.to_csv("nyc_tax_zoning.csv", index=False )

### Tree Census

In [46]:
### Pulling Tree Census Data Data
tree_census ={
    "1995":{
        "api":"https://data.cityofnewyork.us/resource/kyad-zm4j.json",
        "info":"https://data.cityofnewyork.us/Environment/1995-Street-Tree-Census/kyad-zm4j/about_data"
    },
    "2005":{
        "api":"https://data.cityofnewyork.us/resource/29bw-z7pj.json",
        "info":"https://data.cityofnewyork.us/Environment/2005-Street-Tree-Census/29bw-z7pj/about_data"
    },
    "2015":{
        "api":"https://data.cityofnewyork.us/resource/uvpi-gqnh.json",
        "info":"https://data.cityofnewyork.us/Environment/2015-Street-Tree-Census-Tree-Data/uvpi-gqnh/about_data"
    }
}
### Need to add 2025 When Available. 

In [49]:
PAGE = 1000               # <-- enforce 1,000 rows per page
TIMEOUT = 30
MAX_RETRIES = 5
BACKOFF_BASE = 1.5

session = requests.Session()
headers = {}

agg_running_list = []
for k, v in tree_census.items():
    print(f"Fetching {k} -> {v['api']}")
    df = fetch_all_rows_1k(v["api"], k, v["info"])
    if not df.empty:
        agg_running_list.append(df)
    else:
        print(f"Warning: no rows returned for {k} ({v['api']}).")

nyc_tree_census = pd.concat(agg_running_list, ignore_index=True) if agg_running_list else pd.DataFrame()
print(f"Total rows: {len(nyc_tree_census)}")


Fetching 1995 -> https://data.cityofnewyork.us/resource/kyad-zm4j.json
Fetching 2005 -> https://data.cityofnewyork.us/resource/29bw-z7pj.json
Fetching 2015 -> https://data.cityofnewyork.us/resource/uvpi-gqnh.json
Total rows: 1793149


In [51]:
nyc_tree_census.to_csv("NYC_tree_census.csv", index=False )