In [1]:
import pandas as pd
import requests
import json
import datetime
import csv
import time
import matplotlib.pyplot as plt
import ast
from dotenv import load_dotenv
import os 
from sqlalchemy import create_engine

In [2]:
load_dotenv()

True

In [3]:
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

In [4]:
TOKEN = os.getenv("NYC_open_data_token")

base_url = "https://data.cityofnewyork.us/resource/4dx7-axux.json"

headers = {"X-App-Token": TOKEN}

limit = 1000    
offset = 0
all_records = []

while True:
    url = f"{base_url}?$limit={limit}&$offset={offset}"
    try:
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print("Request failed, retrying in 2 seconds...", e)
        time.sleep(2)
        continue

    batch = response.json()

    if not batch:
        print("No more data returned. Stopping.")
        break

    all_records.extend(batch)
    print(f"Fetched {len(batch)} rows (offset={offset})")

    # Stop if fewer than the limit means end of dataset
    if len(batch) < limit:
        break

    offset += limit
    time.sleep(0.2)  # polite rate-limit protection


# dataframe:

restaurants = pd.DataFrame(all_records)

print("Done! Total rows:", len(restaurants))
restaurants.head()

Fetched 1000 rows (offset=0)
Fetched 1000 rows (offset=1000)
Fetched 1000 rows (offset=2000)
Fetched 1000 rows (offset=3000)
Fetched 1000 rows (offset=4000)
Fetched 1000 rows (offset=5000)
Fetched 1000 rows (offset=6000)
Fetched 1000 rows (offset=7000)
Fetched 1000 rows (offset=8000)
Fetched 1000 rows (offset=9000)
Fetched 1000 rows (offset=10000)
Fetched 1000 rows (offset=11000)
Fetched 1000 rows (offset=12000)
Fetched 1000 rows (offset=13000)
Fetched 1000 rows (offset=14000)
Fetched 1000 rows (offset=15000)
Fetched 1000 rows (offset=16000)
Fetched 1000 rows (offset=17000)
Fetched 1000 rows (offset=18000)
Fetched 1000 rows (offset=19000)
Fetched 1000 rows (offset=20000)
Fetched 1000 rows (offset=21000)
Fetched 1000 rows (offset=22000)
Fetched 1000 rows (offset=23000)
Fetched 1000 rows (offset=24000)
Fetched 1000 rows (offset=25000)
Fetched 1000 rows (offset=26000)
Fetched 1000 rows (offset=27000)
Fetched 1000 rows (offset=28000)
Fetched 1000 rows (offset=29000)
Fetched 1000 rows (offs

Unnamed: 0,borough,restaurantname,seatingchoice,legalbusinessname,businessaddress,restaurantinspectionid,isroadwaycompliant,inspectedon,agencycode,postcode,latitude,longitude,communityboard,councildistrict,censustract,bin,bbl,nta,skippedreason
0,Manhattan,Oscar Wilde,both,Camelot Castle LLC,45 West 27th st,72891,Non-Compliant,2021-12-20T16:06:58.000,DOT,10001,40.744876,-73.989657,5,3,58,1015677,1008290011,Hudson Yards-Chelsea-Flatiron-Union Square,
1,Manhattan,LA RUBIA RESTAURANT,both,LA RUBIA RESTAURANT INC,3517 BROADWAY,72892,For HIQA Review,2021-12-20T16:18:42.000,DOT,10031,40.825863,-73.950874,9,7,229,1062369,1020910032,Hamilton Heights,
2,Manhattan,Thai Sliders,sidewalk,Silom Thai Inc,150 8th Ave,72893,Non-Compliant,2021-12-20T16:35:41.000,DOT,10011,40.741906,-74.000945,4,3,81,1013845,1007670002,Hudson Yards-Chelsea-Flatiron-Union Square,
3,Brooklyn,Otway,both,St james 930 LLC,930 Fulton street,72894,Cease and Desist,2021-12-20T16:38:45.000,DOT,11238,40.682833,-73.963833,2,35,201,3335112,3020130038,Clinton Hill,
4,Brooklyn,WILLIAMSBURG THAI CUISINE,both,WILLIAMSBURG THAI CUISINE NY INC,212 BEDFORD AVENUE,72896,Compliant,2021-12-20T16:52:41.000,DOT,11249,40.716913,-73.958728,1,33,553,3062192,3023350026,North Side-South Side,


In [8]:
cols_to_keep = ['restaurantname', 'latitude', 'longitude']

In [9]:
restaurants = restaurants[cols_to_keep]

In [10]:
restaurants.head()

Unnamed: 0,restaurantname,latitude,longitude
0,Oscar Wilde,40.744876,-73.989657
1,LA RUBIA RESTAURANT,40.825863,-73.950874
2,Thai Sliders,40.741906,-74.000945
3,Otway,40.682833,-73.963833
4,WILLIAMSBURG THAI CUISINE,40.716913,-73.958728


In [11]:
restaurants = restaurants.drop_duplicates(subset=['restaurantname', 'latitude', 'longitude'], keep='first')

In [12]:
restaurants = restaurants.dropna(subset=['longitude', 'latitude'])

In [13]:
restaurants.shape

(9666, 3)

In [14]:
restaurants['rest_id'] = range(1, len(restaurants) + 1)

In [15]:
restaurants.head()

Unnamed: 0,restaurantname,latitude,longitude,rest_id
0,Oscar Wilde,40.744876,-73.989657,1
1,LA RUBIA RESTAURANT,40.825863,-73.950874,2
2,Thai Sliders,40.741906,-74.000945,3
3,Otway,40.682833,-73.963833,4
4,WILLIAMSBURG THAI CUISINE,40.716913,-73.958728,5


In [16]:
# Send to db:
engine = create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

In [17]:
# Send df to PostgreSQL
restaurants.to_sql('restaurants', engine, if_exists='replace', index=False)


666