In [26]:
import pandas as pd
import requests
import json
import datetime
import csv
import time
import matplotlib.pyplot as plt
import ast
from dotenv import load_dotenv
import os 
from sqlalchemy import create_engine

In [27]:
load_dotenv()

True

In [28]:
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

In [29]:
TOKEN = os.getenv("NYC_open_data_token")

base_url = "https://data.cityofnewyork.us/resource/enfh-gkve.json"

headers = {"X-App-Token": TOKEN}

limit = 1000    
offset = 0
all_records = []

while True:
    url = f"{base_url}?$limit={limit}&$offset={offset}"
    try:
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print("Request failed, retrying in 2 seconds...", e)
        time.sleep(2)
        continue

    batch = response.json()

    if not batch:
        print("No more data returned. Stopping.")
        break

    all_records.extend(batch)
    print(f"Fetched {len(batch)} rows (offset={offset})")

    # Stop if fewer than the limit means end of dataset
    if len(batch) < limit:
        break

    offset += limit
    time.sleep(0.2)  # polite rate-limit protection


# dataframe:

parks = pd.DataFrame(all_records)

print("Done! Total rows:", len(parks))
print(parks.head())

Fetched 1000 rows (offset=0)
Fetched 1000 rows (offset=1000)
Fetched 56 rows (offset=2000)
Done! Total rows: 2056
           acquisitiondate    acres                address borough class  \
0  1972-09-22T00:00:00.000     83.3  1000 RICHMOND TERRACE       R  PARK   
1  1934-02-28T00:00:00.000    0.403     532 EAST 12 STREET       M  PARK   
2  1934-06-08T00:00:00.000    0.763             2 2 AVENUE       M  PARK   
3  1926-04-29T00:00:00.000  286.557    298 SATERLEE STREET       R  PARK   
4  1948-01-29T00:00:00.000    1.534   255 PARKINSON AVENUE       R  PARK   

  communityboard councildistrict department   gisobjid gispropnum  ...  \
0            501              49       R-01  100003741       R116  ...   
1            103               2       M-03  100004130       M113  ...   
2            103               2       M-03  100004795       M124  ...   
3            503              51       R-03  100004319       R006  ...   
4            502              50      R-02B  100004659     

In [30]:
parks.head()

Unnamed: 0,acquisitiondate,acres,address,borough,class,communityboard,councildistrict,department,gisobjid,gispropnum,...,pip_ratable,precinct,retired,signname,subcategory,typecategory,us_congress,waterfront,zipcode,multipolygon
0,1972-09-22T00:00:00.000,83.3,1000 RICHMOND TERRACE,R,PARK,501,49,R-01,100003741,R116,...,False,120,False,Snug Harbor Cultural Center,Large Park,Historic House Park,11,True,"10301, 10310","{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
1,1934-02-28T00:00:00.000,0.403,532 EAST 12 STREET,M,PARK,103,2,M-03,100004130,M113,...,True,9,False,Joseph C. Sauer Park,Neighborhood Plgd,Neighborhood Park,10,False,10009,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
2,1934-06-08T00:00:00.000,0.763,2 2 AVENUE,M,PARK,103,2,M-03,100004795,M124,...,True,9,False,First Park,Neighborhood Plgd,Neighborhood Park,10,False,10003,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
3,1926-04-29T00:00:00.000,286.557,298 SATERLEE STREET,R,PARK,503,51,R-03,100004319,R006,...,False,123,False,Conference House Park,Large Park,Nature Area,11,True,"10307, 10309","{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
4,1948-01-29T00:00:00.000,1.534,255 PARKINSON AVENUE,R,PARK,502,50,R-02B,100004659,R063,...,True,122,False,Old Town Playground,JOP,Playground,11,False,10305,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."


In [34]:
print(parks.columns.tolist())

['acquisitiondate', 'acres', 'address', 'borough', 'class', 'communityboard', 'councildistrict', 'department', 'gisobjid', 'gispropnum', 'globalid', 'jurisdiction', 'location', 'mapped', 'name311', 'nys_assembly', 'nys_senate', 'objectid', 'omppropid', 'parentid', 'permit', 'permitdistrict', 'permitparent', 'pip_ratable', 'precinct', 'retired', 'signname', 'subcategory', 'typecategory', 'us_congress', 'waterfront', 'zipcode', 'multipolygon']


In [36]:
columns_to_keep = ["typecategory", "name311", "multipolygon"]

In [37]:
parks_final = parks[columns_to_keep]

In [38]:
parks_final.head()

Unnamed: 0,typecategory,name311,multipolygon
0,Historic House Park,Snug Harbor Cultural Center,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
1,Neighborhood Park,Joseph C. Sauer Park,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
2,Neighborhood Park,First Park,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
3,Nature Area,Conference House Park,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
4,Playground,Old Town Playground,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."


In [39]:
import pandas as pd

def compute_centroid_safe(multipolygon):
    # If it's a string, convert to dict
    if isinstance(multipolygon, str):
        import ast
        multipolygon = ast.literal_eval(multipolygon)
    
    coords = multipolygon['coordinates']
    
    # Flatten all coordinate points (handles multiple polygons and rings)
    all_points = []
    for polygon in coords:
        for ring in polygon:
            all_points.extend(ring)  # ring is a list of [lon, lat] pairs
    
    # Compute average longitude and latitude
    avg_lon = sum(pt[0] for pt in all_points) / len(all_points)
    avg_lat = sum(pt[1] for pt in all_points) / len(all_points)
    
    return avg_lon, avg_lat

# Apply to DataFrame
parks_final['centroid'] = parks_final['multipolygon'].apply(compute_centroid_safe)

# Optionally split into lon/lat columns
parks_final[['centroid_lon', 'centroid_lat']] = pd.DataFrame(parks_final['centroid'].tolist(), index=parks_final.index)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  parks_final['centroid'] = parks_final['multipolygon'].apply(compute_centroid_safe)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  parks_final[['centroid_lon', 'centroid_lat']] = pd.DataFrame(parks_final['centroid'].tolist(), index=parks_final.index)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pa

In [40]:
parks_final.head()

Unnamed: 0,typecategory,name311,multipolygon,centroid,centroid_lon,centroid_lat
0,Historic House Park,Snug Harbor Cultural Center,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...","(-74.10312983893252, 40.644863105489236)",-74.10313,40.644863
1,Neighborhood Park,Joseph C. Sauer Park,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...","(-73.97970336952203, 40.7280951932555)",-73.979703,40.728095
2,Neighborhood Park,First Park,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...","(-73.9901170550191, 40.723688106952146)",-73.990117,40.723688
3,Nature Area,Conference House Park,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...","(-74.23980570077656, 40.50108512752425)",-74.239806,40.501085
4,Playground,Old Town Playground,"{'type': 'MultiPolygon', 'coordinates': [[[[-7...","(-74.08118377477349, 40.59531115962469)",-74.081184,40.595311


In [41]:
parks_final = parks_final.drop(['multipolygon', 'centroid'], axis=1)

In [42]:
parks_final.head()

Unnamed: 0,typecategory,name311,centroid_lon,centroid_lat
0,Historic House Park,Snug Harbor Cultural Center,-74.10313,40.644863
1,Neighborhood Park,Joseph C. Sauer Park,-73.979703,40.728095
2,Neighborhood Park,First Park,-73.990117,40.723688
3,Nature Area,Conference House Park,-74.239806,40.501085
4,Playground,Old Town Playground,-74.081184,40.595311


In [43]:
parks_final = parks_final.drop_duplicates(subset=['name311'], keep='first')

In [44]:
print(len(parks_final))

1597


In [45]:
parks_final = parks_final.rename(columns={'name311': 'name', 'centroid_lon': 'longitude', 'centroid_lat': 'latitude'})

In [46]:
parks_final.head()

Unnamed: 0,typecategory,name,longitude,latitude
0,Historic House Park,Snug Harbor Cultural Center,-74.10313,40.644863
1,Neighborhood Park,Joseph C. Sauer Park,-73.979703,40.728095
2,Neighborhood Park,First Park,-73.990117,40.723688
3,Nature Area,Conference House Park,-74.239806,40.501085
4,Playground,Old Town Playground,-74.081184,40.595311


In [48]:
# Send to db:
engine = create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

In [50]:
# Send df to PostgreSQL
parks_final.to_sql('parks', engine, if_exists='replace', index=False)


597