In [1]:
import pandas as pd
import requests
import json
import datetime
import csv
import time
import matplotlib.pyplot as plt
import ast
from dotenv import load_dotenv
import os 
from sqlalchemy import create_engine

In [2]:
load_dotenv()

True

In [3]:
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

In [4]:
TOKEN = os.getenv("NYC_open_data_token")

base_url = "https://data.cityofnewyork.us/resource/swpk-hqdp.json"

headers = {"X-App-Token": TOKEN}

limit = 1000    
offset = 0
all_records = []

while True:
    url = f"{base_url}?$limit={limit}&$offset={offset}"
    try:
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print("Request failed, retrying in 2 seconds...", e)
        time.sleep(2)
        continue

    batch = response.json()

    if not batch:
        print("No more data returned. Stopping.")
        break

    all_records.extend(batch)
    print(f"Fetched {len(batch)} rows (offset={offset})")

    # Stop if fewer than the limit means end of dataset
    if len(batch) < limit:
        break

    offset += limit
    time.sleep(0.2)  # polite rate-limit protection


# dataframe:

population = pd.DataFrame(all_records)

print("Done! Total rows:", len(population))
population.head()

Fetched 390 rows (offset=0)
Done! Total rows: 390


Unnamed: 0,borough,year,fips_county_code,nta_code,nta_name,population
0,Bronx,2000,5,BX01,Claremont-Bathgate,28149
1,Bronx,2000,5,BX03,Eastchester-Edenwald-Baychester,35422
2,Bronx,2000,5,BX05,Bedford Park-Fordham North,55329
3,Bronx,2000,5,BX06,Belmont,25967
4,Bronx,2000,5,BX07,Bronxdale,34309


In [13]:
population.year.value_counts()

year
2000    195
2010    195
Name: count, dtype: int64

In [15]:
population = population.loc[population['year'] == '2010']

In [17]:
print(population.nunique())

borough               5
year                  1
fips_county_code      5
nta_code            195
nta_name            195
population          193
dtype: int64


In [19]:
cols_to_keep = ["nta_code", "nta_name", "population"]

In [20]:
population = population[cols_to_keep]

In [21]:
population.head()

Unnamed: 0,nta_code,nta_name,population
195,BX01,Claremont-Bathgate,31078
196,BX03,Eastchester-Edenwald-Baychester,34517
197,BX05,Bedford Park-Fordham North,54415
198,BX06,Belmont,27378
199,BX07,Bronxdale,35538


In [22]:
# Send to db:
engine = create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

In [23]:
# Send df to PostgreSQL
population.to_sql('population', engine, if_exists='replace', index=False)


195