In [228]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
import time
import datetime
import os
from pandas.io.json import json_normalize

import warnings
warnings.filterwarnings('ignore')

In [229]:
resultsPerRequest = 200000
saveToDirectory = "data"

In [230]:
def buildURL(startDate, endDate, resultsPerRequest):
    startDate = startDate + "T00:00:00.000"
    endDate = endDate + "T00:00:00.000"
    url = "https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='{}' and trip_start_timestamp < '{}'&$limit={}&$order=trip_start_timestamp".format(startDate, endDate, resultsPerRequest)
    return(url)   

In [231]:
def callAPI(url, offset):
    token = "INSERT_TOKEN_HERE"
    headers = {"X-App-Token": token}
    res = requests.get(url, headers = headers, verify = False)
    print("HTTP response code: ", res.status_code)
    if (res.status_code == 200):
        results = res.json()
        return(results)
    else:
        return(None)

In [232]:
def gatherData(startDate, endDate, resultsPerRequest, saveToDirectory):
    offset = 0
    previousRecordsFound = 999
    
    while(previousRecordsFound > 0):
        url = buildURL(startDate, endDate, resultsPerRequest)
        url = url + "&$offset={}".format(offset)
        
        print("Calling offset {}: {}".format(offset, url))
        results = callAPI(url, offset)
        
        if (results == None):
            previousRecordsFound = 0
        else:            
            previousRecordsFound = len(results)
            print("Results Found:", previousRecordsFound)

            if (previousRecordsFound > 0):
                df = json_normalize(results)
                firstDate = df["trip_start_timestamp"].head(1)
                lastDate = df["trip_start_timestamp"].tail(1)
                print("First Date:", firstDate)
                print("Last Date:", lastDate)
                fileName = "ChiTaxi_" + startDate + "_" + endDate + "_" + str(offset).zfill(20) + ".csv"
                print("Saving {} records to fileName: {}".format(previousRecordsFound, fileName))
                df.to_csv(os.path.join(saveToDirectory, fileName), index = False)
                offset = offset + resultsPerRequest
                time.sleep(30)
                print("--------------------------------------------------------------------------------------------------------")        

In [233]:
startDate = "2016-01-01"
endDate = "2016-02-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-01-01T00:00:00.000' and trip_start_timestamp < '2016-02-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-01-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-01-05T05:45:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-01-01_2016-02-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-01-01T00:00:00.000' and trip_start_timestamp < '2016-02-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-01-05T05:45:00.000
Name: trip_start_times

In [234]:
startDate = "2016-02-01"
endDate = "2016-03-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-02-01T00:00:00.000' and trip_start_timestamp < '2016-03-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-02-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-02-04T17:15:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-02-01_2016-03-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-02-01T00:00:00.000' and trip_start_timestamp < '2016-03-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-02-04T17:15:00.000
Name: trip_start_times

In [235]:
startDate = "2016-03-01"
endDate = "2016-04-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-03-01T00:00:00.000' and trip_start_timestamp < '2016-04-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-03-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-03-04T05:30:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-03-01_2016-04-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-03-01T00:00:00.000' and trip_start_timestamp < '2016-04-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-03-04T05:30:00.000
Name: trip_start_times

In [236]:
startDate = "2016-04-01"
endDate = "2016-05-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-04-01T00:00:00.000' and trip_start_timestamp < '2016-05-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-04-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-04-03T20:15:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-04-01_2016-05-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-04-01T00:00:00.000' and trip_start_timestamp < '2016-05-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-04-03T20:15:00.000
Name: trip_start_times

In [237]:
startDate = "2016-05-01"
endDate = "2016-06-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-05-01T00:00:00.000' and trip_start_timestamp < '2016-06-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-05-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-05-04T16:00:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-05-01_2016-06-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-05-01T00:00:00.000' and trip_start_timestamp < '2016-06-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-05-04T16:00:00.000
Name: trip_start_times

In [238]:
startDate = "2016-06-01"
endDate = "2016-07-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-06-01T00:00:00.000' and trip_start_timestamp < '2016-07-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-06-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-06-03T21:30:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-06-01_2016-07-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-06-01T00:00:00.000' and trip_start_timestamp < '2016-07-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-06-03T21:30:00.000
Name: trip_start_times

In [239]:
startDate = "2016-07-01"
endDate = "2016-08-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-07-01T00:00:00.000' and trip_start_timestamp < '2016-08-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-07-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-07-05T10:30:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-07-01_2016-08-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-07-01T00:00:00.000' and trip_start_timestamp < '2016-08-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-07-05T10:30:00.000
Name: trip_start_times

In [240]:
startDate = "2016-08-01"
endDate = "2016-09-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-08-01T00:00:00.000' and trip_start_timestamp < '2016-09-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-08-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-08-04T21:00:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-08-01_2016-09-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-08-01T00:00:00.000' and trip_start_timestamp < '2016-09-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-08-04T21:00:00.000
Name: trip_start_times

In [241]:
startDate = "2016-09-01"
endDate = "2016-10-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-09-01T00:00:00.000' and trip_start_timestamp < '2016-10-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-09-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-09-05T22:00:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-09-01_2016-10-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-09-01T00:00:00.000' and trip_start_timestamp < '2016-10-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-09-05T22:00:00.000
Name: trip_start_times

In [242]:
startDate = "2016-10-01"
endDate = "2016-11-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-10-01T00:00:00.000' and trip_start_timestamp < '2016-11-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-10-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-10-05T20:30:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-10-01_2016-11-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-10-01T00:00:00.000' and trip_start_timestamp < '2016-11-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-10-05T20:30:00.000
Name: trip_start_times

In [243]:
startDate = "2016-11-01"
endDate = "2016-12-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-11-01T00:00:00.000' and trip_start_timestamp < '2016-12-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-11-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-11-04T20:00:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-11-01_2016-12-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-11-01T00:00:00.000' and trip_start_timestamp < '2016-12-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-11-04T20:00:00.000
Name: trip_start_times

In [244]:
startDate = "2016-12-01"
endDate = "2017-01-01"
gatherData(startDate, endDate, resultsPerRequest, saveToDirectory)

Calling offset 0: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-12-01T00:00:00.000' and trip_start_timestamp < '2017-01-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=0
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-12-01T00:00:00.000
Name: trip_start_timestamp, dtype: object
Last Date: 199999    2016-12-05T09:15:00.000
Name: trip_start_timestamp, dtype: object
Saving 200000 records to fileName: ChiTaxi_2016-12-01_2017-01-01_00000000000000000000.csv
--------------------------------------------------------------------------------------------------------
Calling offset 200000: https://data.cityofchicago.org/resource/wrvz-psew.json?$where=trip_start_timestamp >='2016-12-01T00:00:00.000' and trip_start_timestamp < '2017-01-01T00:00:00.000'&$limit=200000&$order=trip_start_timestamp&$offset=200000
HTTP response code:  200
Results Found: 200000
First Date: 0    2016-12-05T09:15:00.000
Name: trip_start_times