In [2]:
import pandas as pd
import requests
import json
import datetime
import csv
import time
import matplotlib.pyplot as plt
import ast
from dotenv import load_dotenv
import os 
from sqlalchemy import create_engine

In [3]:
load_dotenv()

True

In [4]:
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

In [5]:
TOKEN = os.getenv("NYC_open_data_token")

base_url = "https://data.cityofnewyork.us/resource/wg9x-4ke6.json"

headers = {"X-App-Token": TOKEN}

limit = 1000    
offset = 0
all_records = []

while True:
    url = f"{base_url}?$limit={limit}&$offset={offset}"
    try:
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print("Request failed, retrying in 2 seconds...", e)
        time.sleep(2)
        continue

    batch = response.json()

    if not batch:
        print("No more data returned. Stopping.")
        break

    all_records.extend(batch)
    print(f"Fetched {len(batch)} rows (offset={offset})")

    # Stop if fewer than the limit means end of dataset
    if len(batch) < limit:
        break

    offset += limit
    time.sleep(0.2)  # polite rate-limit protection


# dataframe:

schools = pd.DataFrame(all_records)

print("Done! Total rows:", len(schools))
schools.head()

Fetched 1000 rows (offset=0)
Fetched 1000 rows (offset=1000)
Fetched 190 rows (offset=2000)
Done! Total rows: 2190


Unnamed: 0,fiscal_year,system_code,location_code,location_name,beds,managed_by_name,location_type_description,location_category_description,grades_text,grades_final_text,...,administrative_district_name,community_school_sup_name,tier_3_support_location_name,tier_3_support_leader_name,tier_2_support_location_name,highschool_network_location,highschool_network_name,highschool_network,community_district_1,police_precinct
0,2020,15K001,K001,P.S. 001 The Bergen,331500010001,DOE,General Academic,Elementary,"PK,0K,01,02,03,04,05,SE","PK,0K,01,02,03,04,05",...,COMMUNITY SCHOOL DISTRICT 15,"SKOP, ANITA",NYCDOE Borough Office - Brooklyn North,,School Support Team 5- Brooklyn North,,,,307,72
1,2020,17K002,K002,Parkside Preparatory Academy,331700010002,DOE,General Academic,Junior High-Intermediate-Middle,"06,07,08,SE",060708,...,COMMUNITY SCHOOL DISTRICT 17,"ELLIS, CLARENCE",NYCDOE Borough Office - Brooklyn South,Mauriciere de Govia,School Support Team 2- Brooklyn South,,,,309,71
2,2020,13K003,K003,P.S. 003 The Bedford Village,331300010003,DOE,General Academic,Elementary,"PK,0K,01,02,03,04,05,SE","PK,0K,01,02,03,04,05",...,COMMUNITY SCHOOL DISTRICT 13,"SAMUELS, KAMAR",NYCDOE Borough Office - Brooklyn North,,School Support Team 3- Brooklyn North,,,,303,79
3,2020,75K004,K004,P.S. K004,307500013004,DOE,Special Education,Elementary,"PK,0K,01,02,03,04,05,SE","PK,0K,01,02,03,04,05,06,07,SE",...,CITYWIDE SPECIAL EDUCATION,"LOUISSAINT, KETLER",D75 CITYWIDE BCO,Tillman Roberto,Children First Network 752,,,,305,75
4,2020,16K005,K005,P.S. 005 Dr. Ronald McNair,331600010005,DOE,General Academic,Elementary,"PK,0K,01,02,03,04,05,SE","PK,0K,01,02,03,04,05",...,COMMUNITY SCHOOL DISTRICT 16,"MARTIN, YOLANDA",NYCDOE Borough Office - Brooklyn North,,School Support Team 4- Brooklyn North,,,,303,81


In [6]:
schools.columns

Index(['fiscal_year', 'system_code', 'location_code', 'location_name', 'beds',
       'managed_by_name', 'location_type_description',
       'location_category_description', 'grades_text', 'grades_final_text',
       'open_date', 'status_descriptions', 'primary_building_code',
       'primary_address_line_1', 'state_code', 'x_coordinate', 'y_coordinate',
       'longitude', 'latitude', 'community_district', 'council_district',
       'census_tract', 'borough_block_lot', 'nta', 'nta_name',
       'principal_name', 'principal_title', 'principal_phone_number',
       'fax_number', 'geographical_district_code',
       'administrative_district_code', 'administrative_district_name',
       'community_school_sup_name', 'tier_3_support_location_name',
       'tier_3_support_leader_name', 'tier_2_support_location_name',
       'highschool_network_location', 'highschool_network_name',
       'highschool_network', 'community_district_1', 'police_precinct'],
      dtype='object')

In [6]:
cols_to_keep = ['location_name', 'location_category_description', 'longitude', 'latitude']

In [7]:
schools = schools[cols_to_keep]

In [8]:
schools.head()

Unnamed: 0,location_name,location_category_description,longitude,latitude
0,P.S. 001 The Bergen,Elementary,-74.01142,40.648959
1,Parkside Preparatory Academy,Junior High-Intermediate-Middle,-73.951575,40.656423
2,P.S. 003 The Bedford Village,Elementary,-73.955219,40.682311
3,P.S. K004,Elementary,-73.879276,40.6585
4,P.S. 005 Dr. Ronald McNair,Elementary,-73.92197,40.685241


In [9]:
schools = schools.drop_duplicates()

In [10]:
schools = schools.dropna(subset=['longitude', 'latitude'])

In [11]:
schools['school_id'] = range(1, len(schools) + 1)

In [12]:
# Send to db:
engine = create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

In [13]:
# Send df to PostgreSQL
schools.to_sql('schools', engine, if_exists='replace', index=False)


131