In [1]:
import numpy as np
import pandas as pd
import urllib
import json

In [2]:
"""Helper function to make Socrata API request."""
def makeAPIRequest(api_endpoint, params, read_function = pd.read_json, limit=100000000, debug=False):
    query = "?"
    if len(params) > 0:
        query += "&".join(params) + "&"
    query += "$limit=" + str(limit)
    url = api_endpoint + urllib.parse.quote(query, safe="&?$=,!()")
    if debug:
        print("Request URL:", url)
    return read_function(url)

In [3]:
nbhd_list = tuple(makeAPIRequest(
    api_endpoint = "https://datacatalog.cookcountyil.gov/resource/pcdw-pxtg.json",
    params = [
        "$select=distinct town_nbhd as nbhd_code",
    ],
    debug = False,
)['nbhd_code'].astype('str'))

In [4]:
propertyClasses = ['2','3','9']

In [5]:
sales = makeAPIRequest(
    api_endpoint = "https://datacatalog.cookcountyil.gov/resource/wvhk-k5uv.json",
    params = [
        "$select=year, class, nbhd_code, sale_price",
        "$where=year between 2012 and 2022 AND (" + \
        " OR ".join([f"starts_with(class, '{c}')" for c in propertyClasses]) + \
        ") AND nbhd_code in " + str(nbhd_list),
    ],
    debug = False
)
sales['property_class'] = sales['class'].astype('str').str.slice(0,1)
sales.drop(columns=['class'], inplace=True)

In [6]:
data = sales.groupby(
    by=['property_class', 'nbhd_code', 'year']
)['sale_price'].describe(
    percentiles = [.5]
).drop(
    columns = ['std']
).rename(
    columns={'50%': 'median'}
).round(0)

In [7]:
data_as_dict = {
    property_class: {
        nbhd: {
            year: (data.loc[property_class, nbhd, year].to_dict() if (property_class, nbhd, year) in data.index else None) for year in data.index.levels[2]
        } for nbhd in data.index.levels[1]
    } for property_class in data.index.levels[0]
}

In [8]:
totals = sales.groupby(
    by=['property_class', 'year']
)['sale_price'].describe(
    percentiles = [.5]
).drop(
    columns = ['std']
).rename(
    columns={'50%': 'median'}
).round(0)

In [9]:
totals_as_dict = {
    property_class: {
        year: (totals.loc[property_class, year].to_dict() if (property_class, year) in totals.index else None) for year in totals.index.levels[1]
    } for property_class in totals.index.levels[0]
}

In [10]:
years = sales['year'].sort_values().unique()

In [11]:
with open('sales.js', 'w') as f:
    f.write(f'const sales_years = ')
    f.write(str(list(years)))
    f.write(';')
    f.write(f'const sales_by_nbhd = ')
    f.write(json.dumps(data_as_dict))
    f.write(';')
    f.write(f'\nconst sales_totals = ')
    f.write(json.dumps(totals_as_dict))
    f.write(';')