In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os, simplejson, requests
from pandas.io.json import json_normalize
from sqlalchemy import create_engine # database connection
from __future__ import division   # force float division

In [53]:
 # delete file 'NYC_Restaurant_Inspection_API.db' if it exists
try:
    os.remove('NYC_Restaurant_Inspection_API.db')
except OSError:
    pass
# Initialize database with filename NYC_Restaurant_Inspection_API.db in current directory
disk_engine = create_engine('sqlite:///NYC_Restaurant_Inspection_API.db')

url = 'https://data.cityofnewyork.us/resource/9w7m-hzhe.json'
with open("NYC.json.nogit") as fh:
    secrets = simplejson.loads(fh.read())
app_token = secrets['app_token']

# count number of rows        
resp = requests.get(url=url+'?$$app_token='+app_token+'&$select=count(CAMIS)')
data = simplejson.loads(resp.text)
d_rows = json_normalize(data)
rows=d_rows.astype(int).iloc[0,0]


# build the database
chunksize = 30000
max_offset=rows//chunksize

offset=0
index_start = 0  
j=0
start = dt.datetime.now()     # start timing

for offset in range(max_offset+1):
    resp = requests.get(url=url+'?$$app_token='+app_token
                        +'&$order=camis ASC, inspection_date DESC'+'&$limit='+str(chunksize)+'&$offset='+str(offset*chunksize))
    data = simplejson.loads(resp.text)
    df = json_normalize(data)
    
    df = df.rename(columns={c: c.upper().replace(' ', '_') for c in df.columns}) # Replace spaces with _ in columns

    df.loc[:,'INSPECTION_DATE'] = pd.to_datetime(df['INSPECTION_DATE']) # Convert to datetimes
    df.loc[:,'GRADE_DATE'] = pd.to_datetime(df['GRADE_DATE']) # Convert to datetimes
    df.loc[:,'RECORD_DATE'] = pd.to_datetime(df['RECORD_DATE']) # Convert to datetimes

    df.index += index_start
    
    print '{} seconds: completed {} rows'.format((dt.datetime.now() - start).seconds, j*chunksize+len(df.index))
    j+=1

    df.to_sql('data', disk_engine, if_exists='append')
    index_start = df.index[-1] + 1                     #fix the index