# Start here
This notebook contains scripts to fetch data from the NYC Open Data API. Set up the basic workflow in this section, and then go to the relevant section to pull an individual dataset (on homeless shelters, evictions, or 311 complaints). 

This code is based in part on Mark Bauer's sodapy Tutorial for NYC Open Data (https://github.com/mebauer/sodapy-tutorial-nyc-opendata/tree/main). Thanks also to Darcy Krasne, who provided some template code for pulling from the NYC Open Data API. 

In [None]:
import arcpy
import os
import pandas
import geopandas
from arcgis.features import GeoAccessor, GeoSeriesAccessor
from sodapy import Socrata
from shapely.geometry import Point

In [None]:
#set up for Socrata API
data_url = 'data.cityofnewyork.us'
app_token = 'Your API token' # to get a token, create an NYC Open Data account then follow these steps: https://support.socrata.com/hc/en-us/articles/210138558-Generating-App-Tokens-and-API-Keys
client = Socrata(data_url, app_token, timeout=1000)

In [None]:
#set up workspaces and check database
default_gdb = arcpy.mp.ArcGISProject("CURRENT").defaultGeodatabase
print(default_gdb)

datasets = arcpy.ListDatasets(feature_type='feature')
datasets = [''] + datasets if datasets is not None else []

for ds in datasets:
    for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
        path = os.path.join(ds, fc)
        print(path)

# Shelter data
This pulls data on the count of homeless shelters from NYC Dept of Homeless Serivces. It pulls monthly shelter counts by borough for the most recent year. Data from https://data.cityofnewyork.us/Social-Services/Buildings-by-Borough-and-Community-District/3qem-6v3v/about_data

### For shelter counts by Community District in the most recent year

In [None]:
#define and pull data from API -- use this for count of shelters by CD for most recent year
data_set = "3qem-6v3v"
results = client.get(data_set, 
                    where="report_date between '2024-01-01T22:43:28' and '2024-12-31T22:43:28' and community_district is not null",
                     order="report_date DESC")

In [None]:
#add to a dataframe
data_shelters = pandas.DataFrame.from_records(results)
data_shelters

In [None]:
# Convert fields to numeric types
field_list = ['adult_shelter', 'fwc_comm_hotel', 'adult_shelter_comm_hotel', 'fwc_shelter', 'adult_family_shelter', 'adult_family_comm_hotel']
data_shelters[field_list] = data_shelters[field_list].apply(pandas.to_numeric, errors='coerce')

In [None]:
# summarize shelter counts by community district
data_shelters_byCD = pandas.pivot_table(
    data_shelters, 
    values=field_list,
    index='community_district', 
    aggfunc={field: ['max'] for field in field_list}
)

#clean up the table
data_shelters_byCD.reset_index(inplace=True)
data_shelters_byCD.columns = ['_'.join(col).strip() for col in data_shelters_byCD.columns.values]

print(data_shelters_byCD)

In [None]:
#summarize shelter counts by month
#Convert the report_date to a monthly record
data_shelters['report_date'] = pandas.to_datetime(data_shelters['report_date'])
data_shelters['report_month'] = data_shelters['report_date'].dt.to_period('M')

# group by month
data_shelters_forlinechart = data_shelters.groupby('report_month')[field_list].sum().reset_index() 

# Renaming the columns for clarity 
data_shelters_forlinechart.columns = ['month'] + [f'{col}_sum' for col in field_list] 

print(data_shelters_forlinechart)

In [None]:
#export most recent year to CSV
aprx = arcpy.mp.ArcGISProject("CURRENT")
default_folder = aprx.homeFolder

#shelter averbge by community district
file_path = os.path.join(default_folder, 'data_shelters_byCD.csv')
data_shelters_byCD.to_csv(file_path, index=False)

### For shelter counts by month for entire range of dataset

In [None]:
#initialize empty list
all_results = []

In [None]:
#set limits
limit = 1000
offset = 0
max_attempts = 3

In [None]:
# Function to fetch data 
def fetch_data(offset):
    for attempt in range(max_attempts):
        try:
            results = client.get("3qem-6v3v", 
                                  where="report_date between '2018-07-01T22:43:28' and '2024-12-31T22:43:28' and community_district is not null",
                                 order="report_date DESC",
                                 limit=limit,
                                 offset=offset)
            return results
        except Exception as e:
            print(f"Error on attempt {attempt + 1}: {str(e)}")
            if attempt < max_attempts - 1:
                print("Retrying in 5 seconds...")
                time.sleep(5)
            else:
                print("Max retries reached. Exiting.")
                sys.exit(1)

# Loop through the data to avoid 1000 record limit on API
start_time = time.time()
try:
    while True:
        results = fetch_data(offset)
        
        if not results:
            print("No more records to fetch.")
            break
        
        all_results.extend(results)
        offset += limit

        print(f"Fetched {len(all_results)} records so far...")

except Exception as e:
    print(f"An error occurred: {str(e)}")
    sys.exit(1)

In [None]:
#add to a dataframe
data_shelters = pandas.DataFrame.from_records(all_results)
data_shelters

In [None]:
# Convert fields to numeric types
field_list = ['adult_shelter', 'fwc_comm_hotel', 'adult_shelter_comm_hotel', 'fwc_shelter', 'adult_family_shelter', 'adult_family_comm_hotel']
data_shelters[field_list] = data_shelters[field_list].apply(pandas.to_numeric, errors='coerce')

In [None]:
#Convert the report_date to a monthly record
data_shelters['report_date'] = pandas.to_datetime(data_shelters['report_date'])
data_shelters['report_month'] = data_shelters['report_date'].dt.to_period('M')

# group by month
data_shelters_forlinechart = data_shelters.groupby('report_month')[field_list].sum().reset_index() 

# Renaming the columns for clarity 
data_shelters_forlinechart.columns = ['month'] + [f'{col}_sum' for col in field_list] 

print(data_shelters_forlinechart)

In [None]:
#export timeline data to CSV
#shelter count by month
file_path = os.path.join(default_folder, 'data_shelters_forlinechart.csv')
data_shelters_forlinechart.to_csv(file_path, index=False)

# Evictions data
This pulls data on the evictions from the NYC Dept of Investigations. It pulls evictions exected for the most recent year. Data fromdata from https://data.cityofnewyork.us/City-Government/Evictions/6z8x-wfk4

In [None]:
#initialize empty list
all_results = []

In [None]:
#set limits
limit = 1000
offset = 0
max_attempts = 3

In [None]:
# Function to fetch data 
def fetch_data(offset):
    for attempt in range(max_attempts):
        try:
            results = client.get("6z8x-wfk4", 
                                 where="executed_date between '2024-01-01T00:00:00' and '2024-12-31T23:59:59'",
                                 order="executed_date DESC",
                                 limit=limit,
                                 offset=offset)
            return results
        except Exception as e:
            print(f"Error on attempt {attempt + 1}: {str(e)}")
            if attempt < max_attempts - 1:
                print("Retrying in 5 seconds...")
                time.sleep(5)
            else:
                print("Max retries reached. Exiting.")
                sys.exit(1)

# Loop through the data to avoid 1000 record limit on API
start_time = time.time()
try:
    while True:
        results = fetch_data(offset)
        
        if not results:
            print("No more records to fetch.")
            break
        
        all_results.extend(results)
        offset += limit

        print(f"Fetched {len(all_results)} records so far...")

except Exception as e:
    print(f"An error occurred: {str(e)}")
    sys.exit(1)

In [None]:
# Convert to DataFrame
df = pandas.DataFrame.from_records(all_results)
df

In [None]:
#convert to spatial data
sdf = pandas.DataFrame.spatial.from_xy(df=df,
x_column='longitude',
y_column='latitude',
sr=4326)

sdf.head()

In [None]:
#save to geodatabase
sdf.spatial.to_featureclass(location=default_gdb+"/data_evictions")

In [None]:
#export to CSV
aprx = arcpy.mp.ArcGISProject("CURRENT")
default_folder = aprx.homeFolder
file_path = os.path.join(default_folder, 'data_evictions.csv')
data_shelters.to_csv(file_path, index=False)

# 311 data
This pulls data on 311 service requests from the NYC Office of Technology and Innovation. It pulls 311 requests that referece "Homeless" from the most recent year. Data from https://data.cityofnewyork.us/Social-Services/311-Service-Requests-from-2010-to-Present/erm2-nwe9/about_data

In [None]:
#initialize empty list
all_results = []

In [None]:
#set limits
limit = 1000
offset = 0
max_attempts = 3

In [None]:
# Function to fetch data 
def fetch_data(offset):
    for attempt in range(max_attempts):
        try:
            results = client.get("erm2-nwe9", 
                                 where="created_date between '2024-01-01T00:00:00' and '2024-12-31T23:59:59' AND complaint_type like '%Homeless%'",
                                 order="created_date DESC",
                                 limit=limit,
                                 offset=offset)
            return results
        except Exception as e:
            print(f"Error on attempt {attempt + 1}: {str(e)}")
            if attempt < max_attempts - 1:
                print("Retrying in 5 seconds...")
                time.sleep(5)
            else:
                print("Max retries reached. Exiting.")
                sys.exit(1)

# Loop through the data to avoid 1000 record limit on API
start_time = time.time()
try:
    while True:
        results = fetch_data(offset)
        
        if not results:
            print("No more records to fetch.")
            break
        
        all_results.extend(results)
        offset += limit

        print(f"Fetched {len(all_results)} records so far...")

except Exception as e:
    print(f"An error occurred: {str(e)}")
    sys.exit(1)

In [None]:
# Convert to DataFrame
df = pandas.DataFrame.from_records(all_results)
df

In [None]:
#convert to spatial data
sdf = pandas.DataFrame.spatial.from_xy(df=df,
x_column='longitude',
y_column='latitude',
sr=4326)

sdf.head()

In [None]:
#save to geodatabase
sdf.spatial.to_featureclass(location=default_gdb+"/data_311")

In [None]:
#export to CSV
aprx = arcpy.mp.ArcGISProject("CURRENT")
default_folder = aprx.homeFolder
file_path = os.path.join(default_folder, 'data_311.csv')
data_shelters.to_csv(file_path, index=False)