# Start here
This notebook contains scripts to fetch data from the NYC Open Data API. Set up the basic workflow in this section, and then I've included one example using 311 data. You can adapt this for any tabular dataset on NYC Open Data by changing the query in the client.get() function. 

This code is based in part on Mark Bauer's sodapy Tutorial for NYC Open Data (https://github.com/mebauer/sodapy-tutorial-nyc-opendata/tree/main). Thanks also to Darcy Krasne, who provided some template code for pulling from the NYC Open Data API. 

In [None]:
import arcpy
import os
import pandas
import geopandas
from arcgis.features import GeoAccessor, GeoSeriesAccessor
from sodapy import Socrata
from shapely.geometry import Point

In [None]:
#set up for Socrata API
data_url = 'data.cityofnewyork.us'
app_token = 'your token' # to get a token, create an NYC Open Data account then follow these steps: https://support.socrata.com/hc/en-us/articles/210138558-Generating-App-Tokens-and-API-Keys
client = Socrata(data_url, app_token, timeout=1000)

In [None]:
#set up workspaces and check database
default_gdb = arcpy.mp.ArcGISProject("CURRENT").defaultGeodatabase
print(default_gdb)

datasets = arcpy.ListDatasets(feature_type='feature')
datasets = [''] + datasets if datasets is not None else []

for ds in datasets:
    for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
        path = os.path.join(ds, fc)
        print(path)

# 311 data
This pulls data on 311 service requests from the NYC Office of Technology and Innovation. It pulls 311 requests that referece "Homeless" from the most recent year. Data from https://data.cityofnewyork.us/Social-Services/311-Service-Requests-from-2010-to-Present/erm2-nwe9/about_data

In [None]:
#initialize empty list
all_results = []

In [None]:
#set limits
limit = 1000
offset = 0
max_attempts = 3

In [None]:
# Function to fetch data 
def fetch_data(offset):
    for attempt in range(max_attempts):
        try:
            results = client.get("erm2-nwe9", 
                                 where="created_date between '2024-01-01T00:00:00' and '2024-12-31T23:59:59' AND complaint_type like '%Homeless%'",
                                 order="created_date DESC",
                                 limit=limit,
                                 offset=offset)
            return results
        except Exception as e:
            print(f"Error on attempt {attempt + 1}: {str(e)}")
            if attempt < max_attempts - 1:
                print("Retrying in 5 seconds...")
                time.sleep(5)
            else:
                print("Max retries reached. Exiting.")
                sys.exit(1)

# Loop through the data to avoid 1000 record limit on API
start_time = time.time()
try:
    while True:
        results = fetch_data(offset)
        
        if not results:
            print("No more records to fetch.")
            break
        
        all_results.extend(results)
        offset += limit

        print(f"Fetched {len(all_results)} records so far...")

except Exception as e:
    print(f"An error occurred: {str(e)}")
    sys.exit(1)

In [None]:
# Convert to DataFrame
df = pandas.DataFrame.from_records(all_results)
df

In [None]:
#convert to spatial data
sdf = pandas.DataFrame.spatial.from_xy(df=df,
x_column='longitude',
y_column='latitude',
sr=4326)

sdf.head()

In [None]:
#save to geodatabase
sdf.spatial.to_featureclass(location=default_gdb+"/data_311")

In [None]:
#export to CSV
aprx = arcpy.mp.ArcGISProject("CURRENT")
default_folder = aprx.homeFolder
file_path = os.path.join(default_folder, 'data_311.csv')
data_shelters.to_csv(file_path, index=False)