In [1]:
import pandas as pd
import requests
import sys



In [2]:
def download_inspections(outfile, start_date='2016-01-01', end_date='2018-12-31', limit=20):
    if not outfile:
        raise ValueError("No outfile specified.")


    DATA_URL = "https://data.cityofnewyork.us/resource/43nn-pn8j.json"

    query = f"""
        SELECT
            camis,
            dba,
            boro,
            zipcode,
            cuisine_description,
            inspection_date,
            action,
            violation_code,
            violation_description,
            critical_flag,
            score,
            grade,
            inspection_type,
            latitude,
            longitude
        WHERE
            inspection_date BETWEEN "{start_date}" AND "{end_date}"
        LIMIT {int(limit)}
    """
    
    r = requests.get(DATA_URL, params={"$query":query})
    rows = r.json()
    if not r.ok:
        print(rows)
    data = pd.DataFrame(rows)
    data.to_csv(outfile, index=False)
    
    return data

In [3]:
def download_311(outfile, start_date='2016-01-01', end_date='2018-12-31', limit=20):

    if not outfile:
        raise ValueError("No outfile specified.")

    DATA_URL = "https://data.cityofnewyork.us/resource/erm2-nwe9.json"
    
    query = f"""
        SELECT
            created_date,
            agency,
            complaint_type,
            descriptor,
            location_type,
            incident_zip,
            address_type,
            city,
            facility_type,
            borough,
            latitude,
            longitude
        WHERE
            agency in('DEP', 'DOHMH', 'DSNY')
            AND created_date BETWEEN "{start_date}" AND "{end_date}"
        LIMIT {int(limit)}
    """
    
    r = requests.get(DATA_URL, params={"$query":query})        
    rows = r.json()
    if not r.ok:
        print(rows)
    data = pd.DataFrame(rows)
    data.to_csv(outfile, index=False)
    
    return data

In [4]:
def download_business(outfile, start_date='2016-01-01', end_date='2018-12-31', limit=20):

    if not outfile:
        raise ValueError("No outfile specified.")

    DATA_URL = "https://data.cityofnewyork.us/resource/w7w3-xahh.json"
    
    query = f"""
        SELECT
            license_nbr as lic_id,
            license_status as status,
            lic_expir_dd as lic_expire,
            license_creation_date as lic_create,
            industry,
            business_name as name,
            address_zip,
            address_borough,
            latitude,
            longitude

        LIMIT {int(limit)}
    """
    
    r = requests.get(DATA_URL, params={"$query":query})        
    rows = r.json()
    if not r.ok:
        print(rows)
    data = pd.DataFrame(rows)
    data.to_csv(outfile, index=False)
    
    return data

In [5]:
def download_nypd_historic(outfile, start_date='2016-01-01', end_date='2018-12-31', limit=20):

    if not outfile:
        raise ValueError("No outfile specified.")

    DATA_URL = "https://data.cityofnewyork.us/resource/qgea-i56i.json"
    
    query = f"""
        SELECT
            cmplnt_fr_dt as complaint_date,
            ky_cd as complaint_code,
            law_cat_cd as offense_lvl,
            boro_nm as boro,
            prem_typ_desc as premisis_type,
            latitude,
            longitude
        WHERE
            complaint_date BETWEEN "{start_date}" AND "{end_date}"
        LIMIT {int(limit)}
    """
    
    r = requests.get(DATA_URL, params={"$query":query})        
    rows = r.json()
    if not r.ok:
        print(rows)
    data = pd.DataFrame(rows)
    data.to_csv(outfile, index=False)
    
    return data

In [6]:
df = download_inspections('./data/inspections.csv', limit=3e5)
# df = download_311('./data/311.csv', limit=3e5)
# df = download_business('./data/business.csv', limit=3e5)
# df = download_nypd_historic('./data/nypd.csv', limit=3e5)

In [8]:
import pandas as pd
import requests
import time
import sys
from bs4 import BeautifulSoup

In [9]:

URL = "https://www.timeanddate.com/weather/usa/chicago/historic?month={}&year={}"


data = []
for year in range(2010, 2019):
    for month in range(1, 13):
        url = URL.format(month, year)
        response = requests.get(url)
        # const avgTemp = document.querySelectorAll(".sep-t")[0].children[1].textContent;
        soup = BeautifulSoup(response.text, "html.parser")
        temp_str = soup.select(".sep-t")[0].find_all("td")[0].getText()
        avg_temp = int(temp_str.split()[0])
        record = {"month": month, "year": year, "temperature": avg_temp}
        data.append(record)
        time.sleep(1)
        break
df = pd.DataFrame(data)
# df.to_csv(outfile, index=False)
print("Saved {} records of historical weather data.".format(len(df)))

Saved 9 records of historical weather data.
