In [1]:
import numpy as np
import pandas as pd
import urllib
import json

In [2]:
"""Helper function to make Socrata API request."""
def makeAPIRequest(api_endpoint, params, read_function = pd.read_json, limit=100000000, debug=False):
    query = "?"
    if len(params) > 0:
        query += "&".join(params) + "&"
    query += "$limit=" + str(limit)
    url = api_endpoint + urllib.parse.quote(query, safe="&?$=,!()")
    if debug:
        print("Request URL:", url)
    return read_function(url)

In [3]:
nbhd_list = tuple(makeAPIRequest(
    api_endpoint = "https://datacatalog.cookcountyil.gov/resource/pcdw-pxtg.json",
    params = [
        "$select=distinct town_nbhd as nbhd_code",
    ],
    debug = True,
)['nbhd_code'].astype('str'))

Request URL: https://datacatalog.cookcountyil.gov/resource/pcdw-pxtg.json?$select=distinct%20town_nbhd%20as%20nbhd_code&$limit=100000000


In [4]:
sales = makeAPIRequest(
    api_endpoint = "https://datacatalog.cookcountyil.gov/resource/wvhk-k5uv.json",
    params = [
        "$select=year, nbhd_code, sale_price",
        "$where=year between 2012 and 2022 AND starts_with(class, '2') AND nbhd_code in " + str(nbhd_list),
    ],
    debug = True
)

Request URL: https://datacatalog.cookcountyil.gov/resource/wvhk-k5uv.json?$select=year,%20nbhd_code,%20sale_price&$where=year%20between%202012%20and%202022%20AND%20starts_with(class,%20%272%27)%20AND%20nbhd_code%20in%20(%2710011%27,%20%2710012%27,%20%2710014%27,%20%2710021%27,%20%2710022%27,%20%2710023%27,%20%2710024%27,%20%2710025%27,%20%2710030%27,%20%2710031%27,%20%2710040%27,%20%2711010%27,%20%2711020%27,%20%2711030%27,%20%2711040%27,%20%2711050%27,%20%2711060%27,%20%2711070%27,%20%2712010%27,%20%2712021%27,%20%2712022%27,%20%2712031%27,%20%2712032%27,%20%2712033%27,%20%2712041%27,%20%2712042%27,%20%2712043%27,%20%2712044%27,%20%2712046%27,%20%2712051%27,%20%2712052%27,%20%2712053%27,%20%2712054%27,%20%2712061%27,%20%2712063%27,%20%2712064%27,%20%2712065%27,%20%2712071%27,%20%2712072%27,%20%2712073%27,%20%2712081%27,%20%2712082%27,%20%2712084%27,%20%2712085%27,%20%2712086%27,%20%2712087%27,%20%2712088%27,%20%2712091%27,%20%2712093%27,%20%2712101%27,%20%2712111%27,%20%2712112%27,%20

In [14]:
data = sales.groupby(
    by=['nbhd_code', 'year']
)['sale_price'].describe(
    percentiles = [.5]
).rename(
    columns={'50%': 'median'}
).round(0)

In [15]:
data_as_dict = { nbhd: {
    year: (data.loc[nbhd, year].to_dict() if (nbhd, year) in data.index else None) for year in data.index.levels[1]
} for nbhd in data.index.levels[0] }

In [16]:
totals = sales.groupby(
    by=['year']
)['sale_price'].describe(
    percentiles = [.5]
).rename(
    columns={'50%': 'median'}
).round(0)

In [17]:
with open('sales.js', 'w') as f:
    f.write(f'const sales_by_nbhd = ')
    f.write(json.dumps(data_as_dict))
    f.write(';')
    f.write(f'\nconst sales_totals = ')
    f.write(totals.to_json(orient='index'))
    f.write(';')