In [1]:
import pandas as pd
from nvi_etl import make_engine_for
from sqlalchemy import text

In [15]:
q = text("""
SELECT *
FROM raw.regrid_tax_foreclosures_20230308;
""")

db = make_engine_for("ipds")

fcs = pd.read_sql(q, db)

In [18]:
stubnames = ["status", "due", "tax", "fees"]

long = pd.wide_to_long(
    fcs.query("city=='detroit'").drop(["status", "due"], axis=1), 
    stubnames=stubnames, i="parcelnumb", j="year", sep="_"
)[stubnames]

In [30]:
long.reset_index()[long.reset_index()["status"].notna()][["year"]].value_counts().sort_index()

year
2013       441
2014       668
2015      1424
2016      2674
2017      3275
2018      4244
2019      8112
2020     24334
2021     48677
2022    127977
Name: count, dtype: int64

In [33]:
long[
    long.status.notna()
    & (long.status != 'UNCONFIRMED')
].reset_index()["year"].value_counts().sort_index()

year
2013      441
2014      668
2015     1424
2016     2674
2017     3270
2018     4244
2019     8112
2020    24332
2021    48677
Name: count, dtype: int64

In [None]:
from pathlib import Path
vault = Path("V:")



In [165]:
foreclosures_sales_files = [
    (2018, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2018\\Prepped\\wcto_foreclosed__sep_10012018.csv", {"BIDDER_NBR": "BIDDER_ID"}),
    (2019, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2019\\Prepped\\wcto_foreclosed__oct_11202019.csv", {"BIDDER_NBR": "BIDDER_ID"}),
    (2019, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2019\\Prepped\\wcto_foreclosed__sep_11202019.csv", {"BIDDER_NBR": "BIDDER_ID"}),
    (2021, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2021\\Prepped\\wcto_auction__oct_01102022.csv", {"BIDDER_NBR": "BIDDER_ID"}),
    (2021, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2021\\Prepped\\wcto_auction__sep_01102022.csv", {}),
    (2022, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2022\\Prepped\\wcto_auction__oct_10122022.csv", {"PROP_CITY_ZIP": "PROP_CITY_STATE_ZIP", "BIDDER_NBR": "BIDDER_ID"}),
    (2022, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2022\\Prepped\\wcto_auction__sep_10122022.csv", {"PROP_CITY_ZIP": "PROP_CITY_STATE_ZIP", "BIDDER_NBR": "BIDDER_ID"}),
    (2023, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2023\\Prepped\\wcto_auction__dec_01042024.csv", {"BIDDER_NBR": "BIDDER_ID", "PROP_CITY_ZIP": "PROP_CITY_STATE_ZIP"}),
    (2023, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2023\\Prepped\\wcto_auction__oct_01042024.csv", {"BIDDER_NBR": "BIDDER_ID", "PROP_CITY_ZIP": "PROP_CITY_STATE_ZIP"}),
    (2023, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2023\\Prepped\\wcto_auction__sep_10102023.csv", {"BIDDER_NBR": "BIDDER_ID", "PROP_CITY_ZIP": "PROP_CITY_STATE_ZIP"}),
    (2024, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2024\\Processed\\wcto-parcels-sold-in-2024-september-auction.xlsx", {"BIDDERNBR": "BIDDER_ID", "PROP_CITY_ZIP": "PROP_CITY_STATE_ZIP"}),
    (2024, "V:\\IPDS\\Wayne County Tax Auction\\Data\\2024\\Processed\\wcto-properties-sold-in-2024-october-auction-111924_updated.xlsx", {"BIDDER_NBR": "BIDDER_ID", "PROP_CITY_ZIP": "PROP_CITY_STATE_ZIP"}),
] 

In [198]:
def fix_errored_ids(row):
    try:
        pid, *address = row["PARCEL_ID"].split()
    except:
        return row
    if not address:
        return row
    
    row["PARCEL_ID"] = pid
    row["PROP_ADDRESS"] = " ".join(address)

    return row


def split_city_state_zip(row):
    """
    This needs to handle two cases, ' MI ' in the middle
    or not.
    """
    try:
        parts = row["PROP_CITY_STATE_ZIP"].split()

    except:
        try:
            row["PROP_CITY"] = row["PROP_CITY"].strip()
            row["PROP_ZIP"] = row["PROP_ZIP"].strip()
        except:
            row["PROP_CITY"] = ""
            row["PROP_ZIP"] = ""

        return row

    zip_code = parts.pop()

    if parts[-1] == "MI":
        parts.pop() # Throw out MI

    row["PROP_CITY"] = " ".join(parts).upper().strip()
    row["PROP_ZIP"] = zip_code.strip()

    return row
        

def fix_bidder_nbr(row):
    number, *extra = str(row["BIDDER_ID"]).split()

    if not extra:
        return row
    
    row["BIDDER_ID"] = number
    row["BIDDER_NAME"] = " ".join(extra).upper()

    return row

def split_bdr_city_state_zip(row):
    """
    Always keep the state on this one.
    """
    try:
        parts = row["BIDDER_CITY_STATE_ZIP"].split()

        if len(parts) < 2:
            print(parts)
            return row

        # There is an assumption here that parts > 1
        zip_code = parts.pop()
        state = parts.pop()

        if len(state) != 2: # Sloppy but OK
            parts.append(state)
            state = ""

        row["BIDDER_ADDRESS"] = str(row["BIDDER_ADDRESS"]).upper().strip()
        row["BIDDER_CITY"] = " ".join(parts).upper().strip()
        row["BIDDER_STATE"] = str(state).upper().strip()
        row["BIDDER_ZIP"] = str(zip_code).strip()

        return row

    except:
        try:
            row["BIDDER_ADDRESS"] = str(row["BIDDER_ADDRESS"]).upper()
            row["BIDDER_CITY"] = row["BIDDER_CITY"]
            row["BIDDER_STATE"] = row["BIDDER_STATE"]
            row["BIDDER_ZIP"] = row["BIDDER_ZIP"]
        except:
            row["BIDDER_ADDRESS"] = str(row["BIDDER_ADDRESS"]).upper()
            row["BIDDER_CITY"] = ""
            row["BIDDER_STATE"] = ""
            row["BIDDER_ZIP"] = ""


    return row    

In [199]:
result = []
for year, path, rename in foreclosures_sales_files:
    print(path)
    try:
        frame = pd.read_csv(path)

    except:
        frame = pd.read_excel(path)

    if '1' in frame.columns:
        frame.columns = frame.iloc[0]

    non_matches = set(frame.columns).symmetric_difference({
        'PARCEL_ID', 'PROP_ADDRESS', 'PROP_CITY', 'PROP_ZIP', 
        'SALE_AMOUNT', 'BIDDER_NBR', 'BIDDER_NAME', 'BIDDER_ADDRESS', 
        'BIDDER_CITY', 'BIDDER_STATE', 'BIDDER_ZIP'
    })

    if not non_matches:
        continue


    removed_headers = frame[
        frame["PARCEL_ID"] != "PARCEL_ID"
    ]

    try:
        result.append(
            removed_headers
            .rename(columns=rename)
            .apply(fix_errored_ids, axis=1)
            .apply(split_city_state_zip, axis=1)
            .apply(fix_bidder_nbr, axis=1)
            .apply(split_bdr_city_state_zip, axis=1)
            .assign(auction_year=year)
        )
    except KeyError as e:
        print(non_matches)
        print(removed_headers.columns)
        print(e)
        continue

V:\IPDS\Wayne County Tax Auction\Data\2018\Prepped\wcto_foreclosed__sep_10012018.csv
V:\IPDS\Wayne County Tax Auction\Data\2019\Prepped\wcto_foreclosed__oct_11202019.csv
V:\IPDS\Wayne County Tax Auction\Data\2019\Prepped\wcto_foreclosed__sep_11202019.csv
V:\IPDS\Wayne County Tax Auction\Data\2021\Prepped\wcto_auction__oct_01102022.csv
V:\IPDS\Wayne County Tax Auction\Data\2021\Prepped\wcto_auction__sep_01102022.csv
V:\IPDS\Wayne County Tax Auction\Data\2022\Prepped\wcto_auction__oct_10122022.csv
V:\IPDS\Wayne County Tax Auction\Data\2022\Prepped\wcto_auction__sep_10122022.csv
V:\IPDS\Wayne County Tax Auction\Data\2023\Prepped\wcto_auction__dec_01042024.csv
V:\IPDS\Wayne County Tax Auction\Data\2023\Prepped\wcto_auction__oct_01042024.csv
V:\IPDS\Wayne County Tax Auction\Data\2023\Prepped\wcto_auction__sep_10102023.csv
V:\IPDS\Wayne County Tax Auction\Data\2024\Processed\wcto-parcels-sold-in-2024-september-auction.xlsx
V:\IPDS\Wayne County Tax Auction\Data\2024\Processed\wcto-properties-

In [200]:
full_set = pd.concat(result)[["PARCEL_ID", "PROP_ADDRESS", "PROP_CITY", "SALE_AMT", "SALE_AMOUNT", "SALE PRICE", "BIDDER_ID", "BIDDER_NAME", "BIDDER_ADDRESS", "BIDDER_CITY", "BIDDER_STATE", "BIDDER_ZIP", "auction_year"]]
full_set["SALE_AMOUNT"] = full_set[["SALE_AMT", "SALE_AMOUNT", "SALE PRICE",]].bfill(axis=1).iloc[:, 0]
full_set = full_set.drop(["SALE_AMT", "SALE PRICE"], axis=1)

  full_set["SALE_AMOUNT"] = full_set[["SALE_AMT", "SALE_AMOUNT", "SALE PRICE",]].bfill(axis=1).iloc[:, 0]


In [201]:
# 2020 no foreclosures
# 2021 no data
# 2022 no data
# 2023 

In [206]:
full_set["auction_year"].value_counts().sort_index()

auction_year
2018    2407
2019    2031
2022    2183
2023    1217
2024    1537
Name: count, dtype: int64