In [1]:
# Author: Claire Wagner
# Date: 15 June 2022
# Purpose: To generate data about city-owned PINs.

In [2]:
import pandas as pd
import urllib.parse
import datetime

In [3]:
def makeAPIRequest(api_endpoint, params, limit, read_function):
    """Helper function to make Socrata API request."""
    query = "?" + "&".join(params) + "&$limit=" + str(limit)
    return read_function(api_endpoint + urllib.parse.quote(query, safe="&?$=,!()"))

def getLastUpdated(api_endpoint, read_function):
    return makeAPIRequest(
        api_endpoint = api_endpoint,
        params = [ "$select=max(:updated_at) AS last_updated" ],
        limit = 1,
        read_function = pd.read_json,
    ).loc[0, 'last_updated']

In [4]:
limit = 3000000
city_owned_api_endpoint = "https://data.cityofchicago.org/resource/aksk-kvfp.json"
properties_api_endpoint = "https://datacatalog.cookcountyil.gov/resource/c49d-89sn.json"
business_licenses_api_endpoint = "https://data.cityofchicago.org/resource/uupf-x98q.json"

In [5]:
# fetch data from the City-Owned Land Inventory dataset about all properties currently owned by the City of Chicago that might be up for sale (see http://dev.cityofchicago.org/open%20data/data%20portal/2020/08/11/city-owned-property.html)
city_owned = makeAPIRequest(
    api_endpoint = city_owned_api_endpoint,
    params = [
        "$select=pin, managing_organization, lower(property_status) AS property_status, date_of_acquisition, date_of_disposition, sq_ft, last_update",
        "$where=(lower(property_status)='owned by city') AND (lower(managing_organization)='none' OR managing_organization IS NULL)",
    ],
    limit = limit,
    read_function = pd.read_json,
)

In [6]:
#datetime.datetime.fromisoformat(city_owned_last_updated.split('T')[0]).strftime("%d %B %Y")

In [7]:
# fetch data from the Property Locations dataset about all properties in Wards 1-50
properties = makeAPIRequest(
    api_endpoint = properties_api_endpoint,
    params = [
        "$select=pin, property_address, property_zip, ward, longitude, latitude, tract_geoid",
    ],
    limit = limit,
    read_function = pd.read_json,
)

In [8]:
# Sunshine Gospel Ministries address (source: https://www.sunshinegospel.org/)
sgmAddress = "500 E 61st St".lower() # source: https://www.sunshinegospel.org/
# get location data for Sunshine Gospel Ministries
sgm = makeAPIRequest(
    api_endpoint = "https://datacatalog.cookcountyil.gov/resource/c49d-89sn.json",
    params = [
        "$select=pin, longitude, latitude",
        f"$where=lower(property_address)='{sgmAddress}'",
    ],
    limit = 1,
    read_function = pd.read_json,
).loc[0]

In [9]:
# fetch data from the Property Locations dataset about all properties in Wards 1-50
business_licenses = makeAPIRequest(
    api_endpoint = business_licenses_api_endpoint,
    params = [
        "$select=license_number, license_id AS license_record_id, doing_business_as_name, license_description, business_activity, address, ward, zip_code, longitude, latitude, license_start_date",
        "$where=city='CHICAGO'",
    ],
    limit = limit,
    read_function = pd.read_json,
)

In [10]:
# filter out duplicate entries for the same license number, keeping only the entry with the most recent license start date
business_licenses_filtered = business_licenses.sort_values(by=['license_number', 'license_start_date']).drop_duplicates(subset=['license_number'], keep='last').drop(columns=['license_start_date'])

# check that each license number is unique
assert business_licenses_filtered.shape[0] == business_licenses_filtered['license_number'].unique().shape[0]

In [11]:
# when datasets were last updated
city_owned_last_updated = getLastUpdated(
    api_endpoint = city_owned_api_endpoint,
    read_function = pd.read_json,
)
properties_last_updated = getLastUpdated(
    api_endpoint = properties_api_endpoint,
    read_function = pd.read_json,
)
# doesn't work (outdated api endpoint?)
#business_licenses_last_updated = getLastUpdated(
#    api_endpoint = business_licenses_api_endpoint,
#    read_function = pd.read_json,
#)

In [12]:
# Generate attribution string that gives the URLs and the access date and time for the data sources.
fetchtime = datetime.datetime.now(datetime.timezone.utc).strftime("%d %B %Y at roughly %H:%M UTC")

In [13]:
# prepare to compute the inner join of these two datasets by standardizing the PIN format (convert to string, remove '-', left-pad with zeros)
city_owned["pin"] = city_owned["pin"].str.replace("-","") 
properties["pin"] = properties["pin"].apply(str).str.rjust(14, '0')

In [14]:
# compute the inner join
city_owned_join = pd.merge(city_owned, properties, how="inner", on="pin", suffixes = ["_aksk-kvfp", "_c49d-89sn"])

In [15]:
# number of city-owned properties for which no location data from properties could be found
city_owned.shape[0] - city_owned_join.shape[0]

6

In [16]:
city_owned_join.filter(items=[
    "pin",
    "managing_organization",
    "property_status",
    "last_update",
    "date_of_acquisition",
    "date_of_disposition",
    "property_address",
    "property_zip",
    "ward",
    "tract_geoid",
]).to_excel("city_owned_pins_possibly_for_sale.xlsx", index=False)

In [17]:
city_owned_join = city_owned_join.set_index("pin", drop=False)

In [18]:
city_owned_attribution = f"Data sources: https://data.cityofchicago.org/Community-Economic-Development/City-Owned-Land-Inventory/aksk-kvfp (last updated at {city_owned_last_updated}) and https://datacatalog.cookcountyil.gov/Property-Taxation/Assessor-Archived-05-11-2022-Property-Locations/c49d-89sn (last updated at {properties_last_updated}), accessed on {fetchtime}."
sunshine_gospel_attribution = f"Data source: https://datacatalog.cookcountyil.gov/Property-Taxation/Assessor-Archived-05-11-2022-Property-Locations/c49d-89sn (last updated at {properties_last_updated}), accessed on {fetchtime}."

with open("../data/city-owned/city_owned_data.js", "w", encoding="utf-8") as f:
    f.write(f"const sunshine_gospel_attribution = \"{sunshine_gospel_attribution}\";")
    f.write("\n\nconst sunshineGospel = ") # assign sgm JSON data to variable for easier access by JavaScript scripts in browser
    f.write(sgm.to_json(orient="index") + ";") # output sgm as JSON
    f.write(f"\n\nconst city_owned_attribution = \"{city_owned_attribution}\";")
    f.write("\n\nconst city_owned_data = ") # assign join JSON data to variable for easier access by JavaScript scripts in browser
    f.write(city_owned_join.to_json(orient="index") + ";") # output as JSON

In [19]:
business_license_attribution = f"Current active business licenses in the City of Chicago. Data sources: https://data.cityofchicago.org/Community-Economic-Development/Business-Licenses-Current-Active/uupf-x98q (accessed on {fetchtime}."
with open("../data/business-licenses/business_license_data.js", "w", encoding="utf-8") as f:
    f.write(f"const business_license_attribution = \"{business_license_attribution}\";")
    f.write("\n\nconst business_license_data = ") # assign join JSON data to variable for easier access by JavaScript scripts in browser
    f.write(business_licenses_filtered.to_json(orient="records") + ";") # output as JSON

In [20]:
# scavenger sale data from 28 January 2022 (source: https://web.archive.org/web/20220128184252/https://www.cookcountytreasurer.com/pdfs/scavsale/2022cookcountyscavengertaxsalelist.xlsx, accessed 3 June 2022)
scavenger = pd.read_excel("../data/scavenger-sale/2022cookcountyscavengertaxsalelist_1-28-22.xlsx")

In [21]:
# standardize columns and PIN and classification formats
def standardize_columns(df):
    """standardize the names of all columns in df by making them lowercase and snake_case"""
    df.columns = df.columns.str.lower().str.split().str.join('_')

In [22]:
standardize_columns(scavenger)
scavenger["pin"] = scavenger["pin"].str.replace("-", "")
scavenger["classification"] = scavenger["classification"].str.strip()

In [23]:
# property class data (source: https://www.cookcountyassessor.com/form-document/codes-classification-property, accessed 21 June 2022)
property_classes = pd.read_excel("../data/scavenger-sale/property_classes.xlsx")

In [24]:
# join scavenger sale data with property class data
scavenger_with_property_classes = pd.merge(scavenger, property_classes, how="left", left_on = "classification", right_on="property_code").drop(columns=['classification'])

In [25]:
# make sure all scavenger sale entries have associated property class data
assert scavenger_with_property_classes[pd.isna(scavenger_with_property_classes['property_code'])].shape[0] == 0

In [26]:
# get location data for pins on scavenger sale list
scavenger_final_join = pd.merge(scavenger_with_property_classes, properties, how="inner", on="pin", suffixes=["_scavenger_sale", "_c49d-89sn"])

In [27]:
# make sure all pins on scavenger sale list have associated location data
assert scavenger_final_join.shape[0] == scavenger_with_property_classes.shape[0]

In [28]:
scavenger_final_join = scavenger_final_join[scavenger_final_join['property_city'] == 'CHICAGO'].filter(items = [
    'pin',
    'property_address_scavenger_sale',
    'ward',
    'delinquent_tax_year_range',
    'delinquent_tax',
    'delinquent_interest',
    'total_delinquency',
    '2020_taxes_billed',
    'property_code',
    'property_code_meaning',
    'property_code_class',
    'property_zip',
    'longitude',
    'latitude',
    'tract_geoid',
]).rename(columns = {
    'property_address_scavenger_sale' : 'property_address',
}).set_index('pin', drop=False)

In [29]:
# make sure there are no duplicated entries for pins
assert scavenger_final_join.index.unique().shape[0] == scavenger_final_join.index.shape[0]

In [30]:
scavenger_sale_attribution = f"PINs on the scavenger tax sale list where 'property_city' == 'CHICAGO'. Data sources: https://web.archive.org/web/20220128184252/https://www.cookcountytreasurer.com/pdfs/scavsale/2022cookcountyscavengertaxsalelist.xlsx (archived on 28 January 2022, accessed on 3 June 2022), https://datacatalog.cookcountyil.gov/Property-Taxation/Assessor-Archived-05-11-2022-Property-Locations/c49d-89sn (last updated at {properties_last_updated}, accessed on {fetchtime}), and https://www.cookcountyassessor.com/form-document/codes-classification-property (accessed 22 June 2022)."
with open("../data/scavenger-sale/scavenger_data.js", "w", encoding="utf-8") as f:
    f.write(f"const scavenger_sale_attribution = \"{scavenger_sale_attribution}\";")
    f.write("\n\nconst scavenger_sale_data = ") # assign join JSON data to variable for easier access by JavaScript scripts in browser
    f.write(scavenger_final_join.to_json(orient="index") + ";") # output as JSON

In [31]:
# Ward 20 Properties on Tax Sale list for Tax Year 2019, accessed on 17 May 2022 around 11 am ET
taxsale = pd.read_excel("../data/tax-sale/tax_sale_tax_year_2019_ward20_accessed_17_May_2022.xlsx")

In [32]:
standardize_columns(taxsale)
taxsale['pin'] = taxsale['pin'].str.replace('-', '')
taxsale = taxsale.filter(items=[
    'pin',
    'property_address',
    'current_mailing_address',
    'taxpayer_name',
    'tax_type',
    'tax_year',
    'total_tax_due',
    'total_due_(including_interest)',
    'classification',
    'prior_tax_years_may_also_be_unpaid',
])
taxsale = taxsale[taxsale['tax_year'] == 2019]

In [33]:
# make sure no entries have duplicate pins
assert taxsale['pin'].shape[0] == taxsale['pin'].unique().shape[0]

In [34]:
taxsale_join = pd.merge(taxsale, properties, how="inner", on="pin", suffixes=["_tax_sale", "_c49d-89sn"])

In [35]:
# make sure all pins have associated location data and are in Ward 20
assert taxsale_join.shape[0] == taxsale.shape[0]
assert taxsale_join.shape[0] == taxsale_join[taxsale_join['ward'] == 20].shape[0]

In [36]:
taxsale_join = taxsale_join.drop(columns=[
    'property_address_c49d-89sn',
]).rename(columns={
    'property_address_tax_sale' : 'property_address',
    'total_due_(including_interest)' : 'total_due_including_interest',
}).set_index('pin', drop=False)

In [37]:
taxsale_ward20_attribution = f"PINs in Ward 20 on the tax sale list with tax owed for tax year 2019. Data sources: https://www.cookcountytreasurer.com/delinquenttaxes.aspx (accessed on 17 May 2022 around 11 am ET) and https://datacatalog.cookcountyil.gov/Property-Taxation/Assessor-Archived-05-11-2022-Property-Locations/c49d-89sn (last updated at {properties_last_updated}, accessed on {fetchtime})."
with open("../data/tax-sale/tax_sale_ward20_tax_year_2019.js", "w", encoding="utf-8") as f:
    f.write(f"const taxsale_ward20_attribution = \"{taxsale_ward20_attribution}\";")
    f.write("\n\nconst taxsale_ward20_data = ") # assign JSON data to variable for easier access by JavaScript scripts in browser
    f.write(taxsale_join.to_json(orient="index") + ";") # output as JSON