In [1]:
import pandas as pd
import requests
import json
import datetime
import csv
import time
import matplotlib.pyplot as plt
import ast
from dotenv import load_dotenv
import os 
from sqlalchemy import create_engine

In [2]:
load_dotenv()

True

In [3]:
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

In [4]:
TOKEN = os.getenv("NYC_open_data_token")

base_url = "https://data.cityofnewyork.us/resource/uip8-fykc.json"

headers = {"X-App-Token": TOKEN}

limit = 10000  
offset = 0
all_records = []

while True:
    url = f"{base_url}?$limit={limit}&$offset={offset}"
    try:
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print("Request failed, retrying in 2 seconds...", e)
        time.sleep(2)
        continue

    batch = response.json()

    if not batch:
        print("No more data returned. Stopping.")
        break

    all_records.extend(batch)
    print(f"Fetched {len(batch)} rows (offset={offset})")

    # Stop if fewer than the limit means end of dataset
    if len(batch) < limit:
        break

    offset += limit
    time.sleep(0.2)  # polite rate-limit protection


# dataframe:

crime = pd.DataFrame(all_records)

print("Done! Total rows:", len(crime))
crime.head()

Fetched 10000 rows (offset=0)
Fetched 10000 rows (offset=10000)
Fetched 10000 rows (offset=20000)
Fetched 10000 rows (offset=30000)
Fetched 10000 rows (offset=40000)
Fetched 10000 rows (offset=50000)
Fetched 10000 rows (offset=60000)
Fetched 10000 rows (offset=70000)
Fetched 10000 rows (offset=80000)
Fetched 10000 rows (offset=90000)
Fetched 10000 rows (offset=100000)
Fetched 10000 rows (offset=110000)
Fetched 10000 rows (offset=120000)
Fetched 10000 rows (offset=130000)
Fetched 10000 rows (offset=140000)
Fetched 10000 rows (offset=150000)
Fetched 10000 rows (offset=160000)
Fetched 10000 rows (offset=170000)
Fetched 10000 rows (offset=180000)
Fetched 10000 rows (offset=190000)
Fetched 10000 rows (offset=200000)
Fetched 10000 rows (offset=210000)
Fetched 10000 rows (offset=220000)
Fetched 10000 rows (offset=230000)
Fetched 10000 rows (offset=240000)
Fetched 10000 rows (offset=250000)
Fetched 10000 rows (offset=260000)
Fetched 8953 rows (offset=270000)
Done! Total rows: 278953


Unnamed: 0,arrest_key,arrest_date,pd_cd,pd_desc,ky_cd,ofns_desc,law_code,law_cat_cd,arrest_boro,arrest_precinct,...,x_coord_cd,y_coord_cd,latitude,longitude,geocoded_column,:@computed_region_f5dn_yrer,:@computed_region_yeji_bk3q,:@computed_region_92fq_4b7q,:@computed_region_sbqj_enih,:@computed_region_efsh_h5xi
0,299201470,2025-01-10T00:00:00.000,105,STRANGULATION 1ST,106,FELONY ASSAULT,PL 1211200,F,Q,110,...,1017105,210917,40.745543,-73.881427,"{'type': 'Point', 'coordinates': [-73.881427, ...",66,3,5,68,14784
1,299351927,2025-01-13T00:00:00.000,105,STRANGULATION 1ST,106,FELONY ASSAULT,PL 1211200,F,S,120,...,962808,174275,40.645005,-74.077265,"{'type': 'Point', 'coordinates': [-74.077265, ...",4,1,13,74,10369
2,299366743,2025-01-13T00:00:00.000,157,RAPE 1,104,RAPE,PL 130351A,F,B,46,...,1011755,250279,40.8535983673823,-73.9005768807295,"{'type': 'Point', 'coordinates': [-73.90057688...",6,5,22,29,10935
3,300990163,2025-02-12T00:00:00.000,153,RAPE 3,104,RAPE,PL 1302504,F,K,77,...,1003509,185018,40.6744956865259,-73.9305713255961,"{'type': 'Point', 'coordinates': [-73.93057132...",16,2,49,49,17615
4,301010124,2025-02-12T00:00:00.000,157,RAPE 1,104,RAPE,PL 130353A,F,K,77,...,1003509,185018,40.6744956865259,-73.9305713255961,"{'type': 'Point', 'coordinates': [-73.93057132...",16,2,49,49,17615


In [5]:
cols_to_keep = ['arrest_key', 'ofns_desc', 'latitude', 'longitude']

In [6]:
crime = crime[cols_to_keep]

In [7]:
crime.head()

Unnamed: 0,arrest_key,ofns_desc,latitude,longitude
0,299201470,FELONY ASSAULT,40.745543,-73.881427
1,299351927,FELONY ASSAULT,40.645005,-74.077265
2,299366743,RAPE,40.8535983673823,-73.9005768807295
3,300990163,RAPE,40.6744956865259,-73.9305713255961
4,301010124,RAPE,40.6744956865259,-73.9305713255961


In [8]:
crime = crime.drop_duplicates(subset='arrest_key')

In [9]:
# Send to db:
engine = create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

In [10]:
# Send df to PostgreSQL
crime.to_sql('crime', engine, if_exists='replace', index=False)


1