In [1]:
# Author: Claire Wagner
# Date: 15 June 2022
# Purpose: To generate data about city-owned PINs.

In [2]:
import pandas as pd
import urllib.parse
import datetime

In [3]:
def makeAPIRequest(api_endpoint, params, limit, read_function):
    """Helper function to make Socrata API request."""
    query = "?" + "&".join(params) + "&$limit=" + str(limit)
    return read_function(api_endpoint + urllib.parse.quote(query, safe="&?$=,!()"))

def getLastUpdated(api_endpoint, read_function):
    return makeAPIRequest(
        api_endpoint = "https://data.cityofchicago.org/resource/aksk-kvfp.json",
        params = [ "$select=max(:updated_at) AS last_updated" ],
        limit = 1,
        read_function = pd.read_json,
    ).loc[0, "last_updated"]

In [4]:
limit = 3000000
city_owned_api_endpoint = "https://data.cityofchicago.org/resource/aksk-kvfp.json"
properties_api_endpoint = "https://datacatalog.cookcountyil.gov/resource/c49d-89sn.json"
assessments_api_endpoint = "https://datacatalog.cookcountyil.gov/resource/tnes-dgyi.json"

In [5]:
# fetch data from the City-Owned Land Inventory dataset about all properties currently owned by the City of Chicago that might be up for sale (see http://dev.cityofchicago.org/open%20data/data%20portal/2020/08/11/city-owned-property.html)
city_owned = makeAPIRequest(
    api_endpoint = city_owned_api_endpoint,
    params = [
        "$select=pin, managing_organization, lower(property_status) AS property_status, date_of_acquisition, date_of_disposition, sq_ft, last_update, :created_at, :updated_at",
        "$where=(lower(property_status)='owned by city') AND (lower(managing_organization)='none' OR managing_organization IS NULL)",
    ],
    limit = limit,
    read_function = pd.read_json,
)

# when this dataset was last updated
city_owned_last_updated = getLastUpdated(
    api_endpoint = city_owned_api_endpoint,
    read_function = pd.read_json,
)

In [6]:
# fetch data from the Property Locations dataset about all properties in Wards 1-50
properties = makeAPIRequest(
    api_endpoint = properties_api_endpoint,
    params = [
        "$select=pin, property_address, property_zip, ward, longitude, latitude, tract_geoid, tract_pop, tract_midincome, tract_white_perc, tract_black_perc, tract_asian_perc, tract_his_perc, tract_other_perc",
        "$where=ward between '0' and '50'",
    ],
    limit = limit,
    read_function = pd.read_json,
)

# when this dataset was last updated
properties_last_updated = getLastUpdated(
    api_endpoint = properties_api_endpoint,
    read_function = pd.read_json,
)

In [7]:
# fetch data from the Cook County Property Assessments dataset about 2020 Board of Review 
assessments = makeAPIRequest(
    api_endpoint = assessments_api_endpoint,
    params = [
        "$select=distinct pin, bor_result AS total_assessed_value_2020_board_of_review_certified",
        "$where=year=2020",
    ],
    limit = limit,
    read_function = pd.read_json,
)

# when this dataset was last updated
assessments_last_updated = getLastUpdated(
    api_endpoint = assessments_api_endpoint,
    read_function = pd.read_json,
)

In [8]:
# Sunshine Gospel Ministries address (source: https://www.sunshinegospel.org/)
sgmAddress = "500 E 61st St".lower() # source: https://www.sunshinegospel.org/
# get location data for Sunshine Gospel Ministries
sgm = makeAPIRequest(
    api_endpoint = "https://datacatalog.cookcountyil.gov/resource/c49d-89sn.json",
    params = [
        "$select=pin, property_address, property_zip, ward, longitude, latitude, :created_at, :updated_at",
        f"$where=lower(property_address)='{sgmAddress}'",
    ],
    limit = 1,
    read_function = pd.read_json,
).loc[0]

In [9]:
# Generate attribution string that gives the URLs and the access date and time for the data sources.
fetchtime = datetime.datetime.now(datetime.timezone.utc).strftime("%d %B %Y at roughly %H:%M UTC")

In [10]:
attribution = f"/* Data sources: https://data.cityofchicago.org/Community-Economic-Development/City-Owned-Land-Inventory/aksk-kvfp (last updated at {city_owned_last_updated}), https://datacatalog.cookcountyil.gov/Property-Taxation/Assessor-Archived-05-11-2022-Property-Locations/c49d-89sn (last updated at {properties_last_updated}), and https://datacatalog.cookcountyil.gov/Finance-Administration/Assessor-Archived-05-11-2022-Cook-County-Property-/tnes-dgyi (last updated at {properties_last_updated}), accessed on {fetchtime}. */"

In [11]:
# prepare to compute the inner join of these two datasets by standardizing the PIN format
city_owned["pin"] = city_owned["pin"].str.replace("-","")
properties["pin"] = properties["pin"].apply(str)
assessments["pin"] = assessments["pin"].str.replace("-","")

In [12]:
# check for duplicate entries
for df in [city_owned, properties, assessments]:
    assert df.shape[0] == df["pin"].unique().shape[0]

In [14]:
# compute the inner join
partial_join = pd.merge(city_owned, properties, how="inner", on="pin", suffixes = ["_aksk-kvfp", "_c49d-89sn"])
final_join = pd.merge(partial_join, assessments, how="inner", on="pin")

In [20]:
print(8186/final_join.shape[0])
final_join['total_assessed_value_2020_board_of_review_certified'].value_counts(dropna=False)

0.9818879692935109


0        8186
2275        5
3701        3
5615        2
5500        2
         ... 
12817       1
30636       1
1950        1
2424        1
1577        1
Name: total_assessed_value_2020_board_of_review_certified, Length: 137, dtype: int64

In [None]:
final_join = final_join.set_index("pin").rename(columns = {
    "last_update" : "last_update_aksk-kvfp",
})

In [13]:
# output data to "data.js"
with open("data.js", "w", encoding="utf-8") as f:
    f.write(attribution) # include attribution
    f.write(f"\n\nconst accessDateTime = \"{fetchtime}\";")
    f.write("\n\nconst sunshineGospel = ") # assign sgm JSON data to variable for easier access by JavaScript scripts in browser
    f.write(sgm.to_json(orient="index") + ";") # output sgm as JSON
    f.write("\n\nconst data = ") # assign join JSON data to variable for easier access by JavaScript scripts in browser
    f.write(final_join.to_json(orient="index") + ";") # output join as JSON

In [14]:
# export select columns of data to Excel
pins = final_join.reset_index().filter(items=[
    "pin",
    "property_address",
    "ward",
    "property_status",
    "managing_organization",
    "date_of_acquisition",
    "date_of_disposition",
    "last_update_aksk-kvfp",
    "tract_geoid",
]).rename(columns = {
    "last_update_aksk-kvfp" : "last_update",
})
pins.to_excel("city_owned_pins.xlsx", index=False)