In [54]:
import sys
import pandas as pd
import geopandas as gpd
import os
from datetime import datetime

sys.path.append('..')  # Add parent directory to path

# Import from local utility modules
from lvt_utils import ensure_geodataframe
from cloud_utils import get_feature_data_with_geometry

# Set pandas to display all columns
pd.set_option('display.max_columns', None)

# Define Baltimore Realproperty_OB layer endpoint components
dataset_name = "12/query"
base_url = "https://maps.co.ramsey.mn.us/arcgis/rest/services/OpenData/OpenData/FeatureServer"
layer_id = 12

# Set scrape variable as needed
data_scrape = 1  # set to 0 or 1 as required

save_dir = os.path.join("data", "st_paul")
os.makedirs(save_dir, exist_ok=True)

if data_scrape == 1:
    # Download data with geometry (paginate=True to pull all records)
    ramsey_county_gdf = get_feature_data_with_geometry(dataset_name, base_url, layer_id, paginate=True)
    # Make sure it's a GeoDataFrame
    ramsey_county_gdf = ensure_geodataframe(ramsey_county_gdf)
    # Compose filename with date
    today_str = datetime.now().strftime("%Y%m%d")
    fname = f"ramsey_county_{today_str}.gpq"
    fpath = os.path.join(save_dir, fname)
    ramsey_county_gdf.to_parquet(fpath)
    display(ramsey_county_gdf.head())
else:
    # Find most recent geoparquet in data/baltimore
    try:
        files = [f for f in os.listdir(save_dir) if f.lower().endswith(".gpq")]
        if not files:
            raise FileNotFoundError("No ramsey_county geoparquet scrapes found in data/ramsey_county/")

        # Get the latest by date in filename
        files_with_dates = []
        for fname in files:
            try:
                date_str = fname.split("_")[1].split(".")[0]
                dt = datetime.strptime(date_str, "%Y%m%d")
                files_with_dates.append((dt, fname))
            except Exception:
                continue
        if not files_with_dates:
            raise FileNotFoundError("No valid ramsey_county geoparquet scrapes found in data/ramsey_county/")
        latest_fname = max(files_with_dates, key=lambda x: x[0])[1]
        fpath = os.path.join(save_dir, latest_fname)
        ramsey_county_gdf = gpd.read_parquet(fpath)
        display(ramsey_county_gdf.head())
    except Exception as e:
        raise RuntimeError(f"Failed to find any previous scraped data: {e}")


Layer metadata CRS WKID: None
⚠️ Could not detect layer CRS from metadata. Defaulting to EPSG:3857 (risky).
Total records in 12/query: 167662
Query response spatialReference: {'wkid': 4326, 'latestWkid': 4326}
Fetched records 0 to 1000
Fetched records 1000 to 2000
Fetched records 2000 to 3000
Fetched records 3000 to 4000
Fetched records 4000 to 5000
Fetched records 5000 to 6000
Fetched records 6000 to 7000
Fetched records 7000 to 8000
Fetched records 8000 to 9000
Fetched records 9000 to 10000
Fetched records 10000 to 11000
Fetched records 11000 to 12000
Fetched records 12000 to 13000
Fetched records 13000 to 14000
Fetched records 14000 to 15000
Fetched records 15000 to 16000
Fetched records 16000 to 17000
Fetched records 17000 to 18000
Fetched records 18000 to 19000
Fetched records 19000 to 20000
Fetched records 20000 to 21000
Fetched records 21000 to 22000
Fetched records 22000 to 23000
Fetched records 23000 to 24000
Fetched records 24000 to 25000
Fetched records 25000 to 26000
Fetche

Unnamed: 0,OBJECTID,CountyID,ParcelID,FIPsCodeParcelID,RollType,BuildingNumber,BuildingNumberSuffix,UnitType,UnitNumber,Unit,StreetPrefixDirection,StreetPrefixType,StreetName,StreetSuffixType,StreetSuffixDirection,StreetNameAll,SiteAddress,SiteCityNameUSPS,SiteCityNameCode,SiteCityName,SiteZIP5,SiteZIP4,SiteZIP,SiteCityStateZIP,OwnershipCategory,OwnerLastName,OwnerName,OwnerName1,OwnerName2,OwnerAddress1,OwnerAddress2,OwnerCityStateZIP,TaxName1,TaxName2,TaxAddress1,TaxAddress2,TaxCityStateZIP,HomesteadName1,HomesteadName2,HomesteadAddress1,HomesteadAddress2,HomesteadCityStateZIP,NeighborhoodCode,MunicipalCode,TIFDistrict,SchoolDistrictNumber,SchoolDistrictName,WatershedIDTax,WatershedDistrictNameTax,WatershedDistNamePoly,PlatID,PlatName,TaxDescription,Block,Lot,ParcelAcresDeed,ParcelSquareFeet,ParcelAcresPolygon,ParcelFrontage,TaxYear,EMVYear,EMVLand,EMVBuilding,EMVTotal,TotalTax,SpecialAssessmentDue,TaxCapacity,CostLandValue,TaxYear1,EMVYear1,EMVLand1,EMVBuilding1,EMVTotal1,TotalTax1,SpecialAssessmentDue1,TaxYear2,EMVYear2,EMVLand2,EMVBuilding2,EMVTotal2,TotalTax2,SpecialAssessmentDue2,LandmarkBusinessName,LandUseCode,LandUseCodeDescription,MultipleUseYN,UseTypeCode1,UseType1,UseTypeCode2,UseType2,UseTypeCode3,UseType3,UseTypeCode4,UseType4,TaxExemptYN,ExemptUse1,ExemptUse2,ExemptUse3,ExemptUse4,GreenAcresYN,OpenSpaceYN,AgriculturalPreserveYN,AgPreserveEnrolled,AgPreserveExpire,HomesteadYN,HomesteadDescription,StructureCode,StructureDescription,DwellingType,LivingUnit,HomeStyleCode,HomeStyleDescription,ExteriorWallCode,ExteriorWallDescription,Stories,RoomTotal,BedRoom,FamilyRoom,BasementYN,HeatSystemCode,HeatSystemType,HeatCode,HeatType,LivingAreaSquareFeet,BusinessSquareFeet,GarageYN,GarageSquareFeet,YearBuilt,EffectiveYearBuilt,TopologyCode,TopologyDescription,UtilityCode,UtilityDescription,LastSaleDate,SalePrice,InspectionYear,InspectionStatus,X,Y,Latitude,Longitude,Section,Township,Range,QuarterQuarter,PolygonPointRelationship,PropertyDataJoinDate,InspectionDataJoinDate,EditDate,geometry
0,30461582,27123,122922340241,27123-122922340241,RP,2480,,,,,,,7TH,AVE,E,7TH AVE E,2480 7TH AVE E,NORTH ST PAUL,2395261,NORTH SAINT PAUL,55109,2906,55109-2906,NORTH SAINT PAUL MN 55109-2906,Unknown,FLEX HOLDING LLC,FLEX HOLDING LLC,FLEX HOLDING LLC,,2480 7TH AVE E,,NORTH ST PAUL MN 55109-2906,FLEX HOLDING LLC,,2480 7TH AVE E,,NORTH ST PAUL MN 55109-2906,,,,,,CS414000,69,,622,North St. Paul/Maplewood/Oakdale School District,34.0,METRO WATERSHED,Ramsey-Washington Metro WSD,3889,"FIRST ADDITION TO NORTH ST.,PA","FIRST ADDITION TO NORTH ST.,PA LOTS 9 THRU LO...",4.0,9,0.41,17859.6,0.4,,2026.0,2025.0,144000.0,1934800.0,2078800.0,,,40826.0,144000.0,2025.0,2024.0,144000.0,2760600.0,2904600.0,96082.0,,2024.0,2023.0,144000.0,2773100.0,2917100.0,93424.0,,Refelx Medical Moding,340,MANUFACTURING AND ASSEMBLY LIGHT,N,234,3A INDUSTRIAL LAND AND BUILDING,,,,,,,N,,,,,N,N,N,,,N,,405.0,RESEARCH & DEVELOPMENT,,0.0,,,,,,,,,,,,,,,28650.0,N,,2010.0,2019.0,1,LEVEL,1,ALL PUBLIC,1255478000000.0,190000.0,2023,Property reviewed 3/12/2025,600102.11,179893.42,45.009573,-92.996316,12,29,22,SESW,1,1768522000000.0,1767312000000.0,1395531174000,"POLYGON ((-92.99633 45.00936, -92.99641 45.009..."
1,30461583,27123,283022130010,27123-283022130010,RP,3860,,,,,,,LABORE,RD,,LABORE RD,3860 LABORE RD,VADNAIS HEIGHTS,2397106,VADNAIS HEIGHTS,55110,4128,55110-4128,VADNAIS HEIGHTS MN 55110-4128,Unknown,SLP CENTER LLC,SLP CENTER LLC,SLP CENTER LLC,,3860 LABORE RD,,SAINT PAUL MN 55110-9787,SLP CENTER LLC,,3860 LABORE RD,,SAINT PAUL MN 55110-9787,,,,,,C0214000,89,,624,White Bear Lake School District,,,Vadnais Lake Area WMO,4724,REGISTERED LAND SURVEY 69,REGISTERED LAND SURVEY 69 TRACT A,,A,0.95,41382.0,0.97,,2026.0,2025.0,165500.0,248000.0,413500.0,,,7520.0,165500.0,2025.0,2024.0,165500.0,280900.0,446400.0,12527.78,314.22,2024.0,2023.0,165600.0,441400.0,607000.0,12347.44,64.56,Bald Eagle Siding,480,IND WAREHOUSE,N,234,3A INDUSTRIAL LAND AND BUILDING,,,,,,,N,,,,,N,N,N,,,N,,398.0,WAREHOUSE,,0.0,,,,,,,,,,,,,,,6844.0,N,,1989.0,1999.0,1,LEVEL,1,ALL PUBLIC,1435622000000.0,355000.0,2024,Property reviewed 9/8/2023,584801.71,197699.36,45.058591,-93.055191,28,30,22,SWNE,1,1768522000000.0,1767312000000.0,1395531174000,"POLYGON ((-93.05433 45.05898, -93.05568 45.058..."
2,30461584,27123,13022120066,27123-013022120066,RP,5929,,,,,,,NORWAY PINE,CT,,NORWAY PINE CT,5929 NORWAY PINE CT,WHITE BEAR TOWN,665981,WHITE BEAR TOWNSHIP,55110,2396,55110-2396,WHITE BEAR TOWNSHIP MN 55110-2396,Unknown,KEMP,JEFFREY L KEMP PEGGY L KEMP,JEFFREY L KEMP,PEGGY L KEMP,5929 NORWAY PINE CT,,WHITE BEAR LAKE MN 55110-2396,JEFFREY L KEMP,,5929 NORWAY PINE CT,,WHITE BEAR LAKE MN 55110-2396,PEGGY L KEMP,PEGGY L KEMP,5929 NORWAY PINE COURT,,WHITE BEAR LAKE MN 55110-2396,00036000,97,,624,White Bear Lake School District,38.0,RICE CREEK W/S,Rice Creek WSD,5810,"WHITE BEAR PONDS,PLAT TWO","WHITE BEAR PONDS,PLAT TWO LOT 19 BLK 1",1.0,19,0.32,13939.2,0.33,106.0,2026.0,2025.0,106300.0,354200.0,460500.0,,,4554.0,106300.0,2025.0,2024.0,106300.0,347100.0,453400.0,6078.0,,2024.0,2023.0,106300.0,327100.0,433400.0,5622.0,,Metlife,510,"SINGLE FAMILY DWELLING, PLATTED LOT",N,201,1A/1B/4BB RESIDENTIAL SINGLE UNIT,,,,,,,N,,,,,N,N,N,,,Y,FULL HOMESTEAD,,,"SINGLE FAMILY DWELLING, PLATTED LOT",1.0,7.0,TWO STORY,6.0,ALUM/VINYL,2.0,8.0,3.0,1.0,Y,1.0,HOT AIR,4.0,CENTW/AIR,1922.0,,Y,748.0,1996.0,2000.0,1,LEVEL,1,ALL PUBLIC,1124323000000.0,320000.0,2024,Property reviewed 8/27/2024,601126.15,220735.08,45.121568,-92.991593,1,30,22,NWNE,1,1768522000000.0,1767312000000.0,1395531355000,"POLYGON ((-92.99134 45.12143, -92.9917 45.1214..."
3,30461585,27123,293023420022,27123-293023420022,RP,441,,,,,,,OLD HIGHWAY 8,,NW,OLD HIGHWAY 8 NW,441 OLD HIGHWAY 8 NW,NEW BRIGHTON,2395187,NEW BRIGHTON,55112,3206,55112-3206,NEW BRIGHTON MN 55112-3206,Unknown,EVEREST PROPERTIES LLC,EVEREST PROPERTIES LLC,EVEREST PROPERTIES LLC,,212 BRIDGE ST,,SHOREVIEW MN 55126-2152,EVEREST PROPERTIES LLC,,212 BRIDGE ST,,SHOREVIEW MN 55126-2152,,,,,,C0117000,63,,621,Mounds View School District,38.0,RICE CREEK W/S,Rice Creek WSD,3446,SECTION 29 TOWN 30 RANGE 23,SECTION 29 TOWN 30 RANGE 23 EX THE S 41 FT OF...,,,0.72,31363.2,0.7,,2026.0,2025.0,157800.0,483800.0,641600.0,,,12082.0,157800.0,2025.0,2024.0,157800.0,529700.0,687500.0,19918.0,,2024.0,2023.0,157800.0,555400.0,713200.0,20388.0,,Acclaimed Printing [445],447,OFFICE BUILDING 1-2 STORIES,N,233,3A COMMERCIAL LAND AND BUILDING,,,,,,,N,,,,,N,N,N,,,N,,374.0,RETAIL MULTI OCCUP,,0.0,,,,,,,,,,,,,,,27610.0,N,,1962.0,1984.0,4,ROLLING,1,ALL PUBLIC,1002154000000.0,460250.0,2025,Property reviewed 8/14/2025,548044.67,196818.62,45.056496,-93.19743,29,30,23,NWSE,1,1768522000000.0,1767312000000.0,1395531355000,"POLYGON ((-93.19754 45.05633, -93.19792 45.056..."
4,30461586,27123,293023440037,27123-293023440037,RP,205,,,,,,,5TH,AVE,NW,5TH AVE NW,205 5TH AVE NW,NEW BRIGHTON,2395187,NEW BRIGHTON,55112,3267,55112-3267,NEW BRIGHTON MN 55112-3267,Unknown,MARS HAMMOND HOLDINGS LLC,MARS HAMMOND HOLDINGS LLC,MARS HAMMOND HOLDINGS LLC,,4319 W 1ST ST,,DULUTH MN 55807-2759,MARS HAMMOND HOLDINGS LLC,,4319 W 1ST ST,,DULUTH MN 55807-2759,,,,,,C0117000,63,,621,Mounds View School District,38.0,RICE CREEK W/S,Rice Creek WSD,5446,BRIGHTON CORPORATE PARK,BRIGHTON CORPORATE PARK LOT 1 BLK 1,1.0,1,2.13,92782.8,2.13,,2026.0,2025.0,464900.0,2758900.0,3223800.0,,,63726.0,464900.0,2025.0,2024.0,464900.0,2568500.0,3033400.0,93828.0,,2024.0,2023.0,464900.0,2571000.0,3035900.0,92424.0,,Accurate Components&Fasteners,480,IND WAREHOUSE,N,234,3A INDUSTRIAL LAND AND BUILDING,,,,,,,N,,,,,N,N,N,,,N,,398.0,WAREHOUSE,,0.0,,,,,,,,,,,,,,,30432.0,N,,1994.0,2005.0,1,LEVEL,1,ALL PUBLIC,1606176000000.0,2375000.0,2023,Property reviewed 11/9/2023,549484.4,195545.64,45.052995,-93.191871,29,30,23,SESE,1,1768522000000.0,1767312000000.0,1395531443000,"POLYGON ((-93.19107 45.05273, -93.19282 45.052..."


In [None]:
# =============================================================================
# STEP 2: Load Official Tax Base Data for Validation
# =============================================================================
# Source: Minnesota Department of Revenue Property Tax Data
# File: data/st_paul/data-portal-excel.xlsx

#Download data here: https://www.revenue.state.mn.us/property-tax-history-data

import pandas as pd

# Load St. Paul 2025 official tax data from Excel
excel_path = os.path.join("data", "st_paul", "data-portal-excel.xlsx")
city_town_df = pd.read_excel(excel_path, sheet_name="CityTown", header=0)

# Find St. Paul City 2025 row
st_paul_official = city_town_df[
    (city_town_df.iloc[:, 5] == 'ST PAUL CITY OF') & 
    (city_town_df.iloc[:, 1] == 2025)
].iloc[0]

# Extract key validation metrics
official_data = {
    'Estimated Market Value Total': st_paul_official['Estimated Market Value Total'],
    'Taxable Market Value Total': st_paul_official['Taxable Market Value Total'],
    'Local NTC Total (Tax Capacity)': st_paul_official['Local NTC by Class Total'],
    'TIF NTC': st_paul_official['TIF NTC'],
    'Fiscal Disparities NTC': st_paul_official['Fiscal Disparities Cont NTC'],
    'Taxable NTC': st_paul_official['Taxable NTC'],
    'City/Town NTC Levy': st_paul_official['City/Town NTC Levy'],
    'City/Town Avg Tax Rate': st_paul_official['City/Town Avg Local NTC Tax Rate'],
    'TIF Levy': st_paul_official['TIF Levy'],
}

print("=" * 60)
print("OFFICIAL ST. PAUL 2025 TAX BASE (MN Dept of Revenue)")
print("=" * 60)
for key, val in official_data.items():
    if isinstance(val, float) and val < 10:
        print(f"{key}: {val:.4f}")
    else:
        print(f"{key}: ${val:,.0f}")

# Store for later comparison
OFFICIAL_CITY_TAX_RATE = official_data['City/Town Avg Tax Rate']
OFFICIAL_CITY_LEVY = official_data['City/Town NTC Levy']
OFFICIAL_TAXABLE_NTC = official_data['Taxable NTC']
OFFICIAL_TIF_NTC = official_data['TIF NTC']
OFFICIAL_LOCAL_NTC = official_data['Local NTC Total (Tax Capacity)']

print("\n" + "=" * 60)
print("KEY INSIGHT: Parcels in TIF districts pay $0 to the city.")
print("TIF captures both county and city portions of taxes.")
print("=" * 60)

OFFICIAL ST. PAUL 2025 TAX BASE (MN Dept of Revenue)
Estimated Market Value Total: $35,832,893,273
Taxable Market Value Total: $34,673,452,267
Local NTC Total (Tax Capacity): $409,200,716
TIF NTC: $28,684,858
Fiscal Disparities NTC: $38,513,418
Taxable NTC: $342,002,440
City/Town NTC Levy: $180,226,074
City/Town Avg Tax Rate: 0.5270
TIF Levy: $41,313,536

KEY INSIGHT: Parcels in TIF districts pay $0 to the city.
TIF captures both county and city portions of taxes.


In [48]:
# Import all required modules and functions
from cloud_utils import get_feature_data, get_feature_data_with_geometry
from lvt_utils import (model_split_rate_tax, calculate_current_tax, model_full_building_abatement, 
                       model_stacking_improvement_exemption, categorize_property_type, extract_date_from_filename)
from census_utils import (get_census_data, get_census_blockgroups_shapefile, get_census_data_with_boundaries, 
                          match_to_census_blockgroups, calculate_median_percentage_by_quintile, 
                          match_parcels_to_demographics, create_demographic_summary)
from viz import (create_scatter_plot, plot_comparison, calculate_correlations, weighted_median, 
                 create_quintile_summary, plot_quintile_analysis, create_property_category_chart, 
                 create_map_visualization, calculate_block_group_summary, filter_data_for_analysis)

scrape_data = 0

## Step 1: Getting the Data


Data comes from Ramsey County ArcGIS FeatureServer: https://maps.co.ramsey.mn.us/arcgis/rest/services/OpenData/OpenData/FeatureServer

Tax statements can be verified at: https://beacon.schneidercorp.com/application.aspx?app=RamseyCountyMN&PageType=Search

In [55]:
# Filter to St. Paul parcels only
st_paul_gdf = ramsey_county_gdf[ramsey_county_gdf["SiteCityName"] == "SAINT PAUL"].copy()

print(f"Total St. Paul parcels: {len(st_paul_gdf):,}")
print(f"\nColumns available: {len(st_paul_gdf.columns)}")
st_paul_gdf.head()
st_paul_gdf

Total St. Paul parcels: 83,395

Columns available: 150


Unnamed: 0,OBJECTID,CountyID,ParcelID,FIPsCodeParcelID,RollType,BuildingNumber,BuildingNumberSuffix,UnitType,UnitNumber,Unit,StreetPrefixDirection,StreetPrefixType,StreetName,StreetSuffixType,StreetSuffixDirection,StreetNameAll,SiteAddress,SiteCityNameUSPS,SiteCityNameCode,SiteCityName,SiteZIP5,SiteZIP4,SiteZIP,SiteCityStateZIP,OwnershipCategory,OwnerLastName,OwnerName,OwnerName1,OwnerName2,OwnerAddress1,OwnerAddress2,OwnerCityStateZIP,TaxName1,TaxName2,TaxAddress1,TaxAddress2,TaxCityStateZIP,HomesteadName1,HomesteadName2,HomesteadAddress1,HomesteadAddress2,HomesteadCityStateZIP,NeighborhoodCode,MunicipalCode,TIFDistrict,SchoolDistrictNumber,SchoolDistrictName,WatershedIDTax,WatershedDistrictNameTax,WatershedDistNamePoly,PlatID,PlatName,TaxDescription,Block,Lot,ParcelAcresDeed,ParcelSquareFeet,ParcelAcresPolygon,ParcelFrontage,TaxYear,EMVYear,EMVLand,EMVBuilding,EMVTotal,TotalTax,SpecialAssessmentDue,TaxCapacity,CostLandValue,TaxYear1,EMVYear1,EMVLand1,EMVBuilding1,EMVTotal1,TotalTax1,SpecialAssessmentDue1,TaxYear2,EMVYear2,EMVLand2,EMVBuilding2,EMVTotal2,TotalTax2,SpecialAssessmentDue2,LandmarkBusinessName,LandUseCode,LandUseCodeDescription,MultipleUseYN,UseTypeCode1,UseType1,UseTypeCode2,UseType2,UseTypeCode3,UseType3,UseTypeCode4,UseType4,TaxExemptYN,ExemptUse1,ExemptUse2,ExemptUse3,ExemptUse4,GreenAcresYN,OpenSpaceYN,AgriculturalPreserveYN,AgPreserveEnrolled,AgPreserveExpire,HomesteadYN,HomesteadDescription,StructureCode,StructureDescription,DwellingType,LivingUnit,HomeStyleCode,HomeStyleDescription,ExteriorWallCode,ExteriorWallDescription,Stories,RoomTotal,BedRoom,FamilyRoom,BasementYN,HeatSystemCode,HeatSystemType,HeatCode,HeatType,LivingAreaSquareFeet,BusinessSquareFeet,GarageYN,GarageSquareFeet,YearBuilt,EffectiveYearBuilt,TopologyCode,TopologyDescription,UtilityCode,UtilityDescription,LastSaleDate,SalePrice,InspectionYear,InspectionStatus,X,Y,Latitude,Longitude,Section,Township,Range,QuarterQuarter,PolygonPointRelationship,PropertyDataJoinDate,InspectionDataJoinDate,EditDate,geometry
21,30461603,27123,262922330217,27123-262922330217,RP,1864,,,,,,,REANEY,AVE,,REANEY AVE,1864 REANEY AVE,ST PAUL,02396511,SAINT PAUL,55119,3430,55119-3430,SAINT PAUL MN 55119-3430,Unknown,VANG,SIA VANG SANG P LEE,SIA VANG,SANG P LEE,1864 REANEY AVE,,SAINT PAUL MN 55119-3430,SIA VANG,,1864 REANEY AVE,,SAINT PAUL MN 55119-3430,SANG P LEE,SIA VANG,1864 REANEY AVE,,SAINT PAUL MN 55119-3430,00011001,02,,0625,St. Paul School District,034,METRO WATERSHED,Ramsey-Washington Metro WSD,00302,AURORA ADDITION,AURORA ADDITION PART LYING E OF THE FOL DESC ...,8,9,0.10,4356.0,0.10,38.0,2026.0,2025.0,30000.0,284800.0,314800.0,,,2966.0,30000.0,2025.0,2024.0,30000.0,287200.0,317200.0,4884.38,215.62,2024.0,2023.0,25000.0,255600.0,280600.0,4062.08,167.92,,510,"SINGLE FAMILY DWELLING, PLATTED LOT",N,201,1A/1B/4BB RESIDENTIAL SINGLE UNIT,,,,,,,N,,,,,N,N,N,,,Y,FULL HOMESTEAD,,,"SINGLE FAMILY DWELLING, PLATTED LOT",1.0,07,TWO STORY,6,ALUM/VINYL,2.0,7.0,3.0,,Y,1,HOT AIR,4,CENTW/AIR,1536.0,,N,,2010.0,2010.0,2,ABOVE STREET,1,ALL PUBLIC,1.403136e+12,190000.0,2021,Property reviewed 7/26/2021,593620.13,163145.63,44.963723,-93.021665,26,29,22,SWSW,1,1.768522e+12,1.767312e+12,1395531084000,"POLYGON ((-93.0216 44.96356, -93.02175 44.9635..."
59,30461641,27123,262922130031,27123-262922130031,RP,2125,,,,,,,LA CROSSE,AVE,,LA CROSSE AVE,2125 LA CROSSE AVE,ST PAUL,02396511,SAINT PAUL,55119,3351,55119-3351,SAINT PAUL MN 55119-3351,Unknown,WILLIAMS,AMIE N WILLIAMS,AMIE N WILLIAMS,,2125 LA CROSSE AVE,,SAINT PAUL MN 55119-3351,AMIE N WILLIAMS,,2125 LA CROSSE AVE,,SAINT PAUL MN 55119-3351,AMIE N WILLIAMS,,2125 LA CROSSE AVE,,SAINT PAUL MN 55119-3351,00010000,02,,0625,St. Paul School District,034,METRO WATERSHED,Ramsey-Washington Metro WSD,00601,NORTON'S LINCOLN PARK,NORTON'S LINCOLN PARK E 5 FT OF LOT 13 AND AL...,6,13,0.21,9147.6,0.21,60.0,2026.0,2025.0,30000.0,235600.0,265600.0,,,2430.0,30000.0,2025.0,2024.0,30000.0,232000.0,262000.0,3917.90,342.10,2024.0,2023.0,25000.0,240500.0,265500.0,3816.64,285.36,,510,"SINGLE FAMILY DWELLING, PLATTED LOT",N,201,1A/1B/4BB RESIDENTIAL SINGLE UNIT,,,,,,,N,,,,,N,N,N,,,Y,FULL HOMESTEAD,,,"SINGLE FAMILY DWELLING, PLATTED LOT",1.0,03,ONE STORY,6,ALUM/VINYL,1.0,6.0,3.0,1.0,Y,1,HOT AIR,3,CENTRAL,950.0,,N,,1951.0,1975.0,1,LEVEL,1,ALL PUBLIC,1.377130e+12,142500.0,2023,Property reviewed 8/17/2023,596405.30,166017.20,44.971564,-93.010855,26,29,22,SWNE,1,1.768522e+12,1.767312e+12,1395531267000,"POLYGON ((-93.01074 44.97136, -93.01095 44.971..."
176,30461758,27123,262922330218,27123-262922330218,RP,1860,,,,,,,REANEY,AVE,E,REANEY AVE E,1860 REANEY AVE E,ST PAUL,02396511,SAINT PAUL,55119,3430,55119-3430,SAINT PAUL MN 55119-3430,Unknown,THAO,GAO LEE THAO,GAO LEE THAO,,1860 REANEY AVE E,,SAINT PAUL MN 55119-6026,GAO LEE THAO,,1860 REANEY AVE E,,SAINT PAUL MN 55119-6026,GAO LEE THAO,,1860 REANEY AVE E,,SAINT PAUL MN 55119-6026,00011001,02,,0625,St. Paul School District,034,METRO WATERSHED,Ramsey-Washington Metro WSD,00302,AURORA ADDITION,AURORA ADDITION PART LYING W OF THE FOL DESC ...,8,9,0.12,5227.2,0.12,42.0,2026.0,2025.0,30000.0,208300.0,238300.0,,,2132.0,30000.0,2025.0,2024.0,30000.0,200700.0,230700.0,3369.90,342.10,2024.0,2023.0,25000.0,210500.0,235500.0,3328.64,285.36,,510,"SINGLE FAMILY DWELLING, PLATTED LOT",N,201,1A/1B/4BB RESIDENTIAL SINGLE UNIT,,,,,,,N,,,,,N,N,N,,,Y,FULL HOMESTEAD,,,"SINGLE FAMILY DWELLING, PLATTED LOT",1.0,06,BUNGALOW,6,ALUM/VINYL,1.0,5.0,3.0,,Y,1,HOT AIR,4,CENTW/AIR,1102.0,,N,,1936.0,1958.0,1,LEVEL,1,ALL PUBLIC,1.500250e+12,155000.0,2021,Property reviewed 7/26/2021,593580.33,163147.40,44.963729,-93.021819,26,29,22,SWSW,1,1.768522e+12,1.767312e+12,1395531965000,"POLYGON ((-93.02174 44.9639, -93.02174 44.9638..."
204,30461786,27123,082822120114,27123-082822120114,RP,227,,,,,,,WINIFRED,ST,E,WINIFRED ST E,227 WINIFRED ST E,ST PAUL,02396511,SAINT PAUL,55107,2329,55107-2329,SAINT PAUL MN 55107-2329,Unknown,MOORE,PATRICK MOORE,PATRICK MOORE,,227 WINIFRED ST E,,ST PAUL MN 55107-2329,PATRICK MOORE,,227 WINIFRED ST E,,ST PAUL MN 55107-2329,PATRICK MOORE,BOBBIE L MOORE SPOUSE,227 WINIFRED ST E,,ST PAUL MN 55107-2329,50015002,03,,0625,St. Paul School District,,,Lower Mississippi River WMO,05829,"PROSPECT PLATEAU,BLOCK 6 7 8 9","PROSPECT PLATEAU,BLOCK 6 7 8 9 EX NWLY 39 FT;...",1,9,0.18,7840.8,0.18,61.0,2026.0,2025.0,32100.0,258400.0,290500.0,,,2701.0,32100.0,2025.0,2024.0,32100.0,251000.0,283100.0,4218.78,431.22,2024.0,2023.0,24800.0,225800.0,250600.0,3512.18,335.82,,520,TWO FAMILY DWELLING - UP/DWN,N,203,1A/1B/4B1 RESIDENTIAL 1-3 UNITS,,,,,,,N,,,,,N,N,N,,,Y,FULL HOMESTEAD,,,TWO FAMILY DWELLING - UP/DWN,2.0,07,TWO STORY,5,STUCCO,2.0,10.0,4.0,,Y,,,3,CENTRAL,2592.0,,N,,1908.0,1945.0,1,LEVEL,1,ALL PUBLIC,1.230595e+12,115900.0,2021,Property reviewed 10/4/2021,579998.97,151150.12,44.930979,-93.074462,08,28,22,NWNE,1,1.768522e+12,1.767312e+12,1395532129000,"POLYGON ((-93.07425 44.9311, -93.07446 44.9307..."
300,30461882,27123,292922420344,27123-292922420344,RP,579,,,,,,,WELLS,ST,,WELLS ST,579 WELLS ST,ST PAUL,02396511,SAINT PAUL,55130,4134,55130-4134,SAINT PAUL MN 55130-4134,Unknown,AMERICAN INDIAN FAMILY CENTER,AMERICAN INDIAN FAMILY CENTER,AMERICAN INDIAN FAMILY CENTER,,579 WELLS ST,,ST PAUL MN 55130-4134,AMERICAN INDIAN FAMILY CENTER,,579 WELLS ST,,ST PAUL MN 55130-4134,,,,,,C0415000,05,,0625,St. Paul School District,070,CAPITOL REGION W/S,Capitol Region WSD,00209,CHAS WEIDE'S SUB OF B35 ARLING,"CHAS WEIDE'S SUB OF B35 ARLING LOTS 16,17 AND...",35,16,0.31,13503.6,0.30,,2026.0,2025.0,81900.0,516300.0,598200.0,,,,81900.0,2025.0,2024.0,81900.0,574900.0,656800.0,,,2024.0,2023.0,81900.0,577100.0,659000.0,,,,687,"EXEMPT, OFFICE BUILDING",N,931,5E CHARITABLE INSTITUTION,,,,,,,Y,5E CHARITABLE INSTITUTION,,,,N,N,N,,,N,,353,OFFICE BLDG L/R 1-4S,,0.0,,,,,,,,,,,,,,,10332.0,N,,1972.0,1983.0,4,ROLLING,1,ALL PUBLIC,1.164845e+12,100000.0,2025,Property reviewed 5/16/2025,579615.03,164568.36,44.967784,-93.075748,29,29,22,NWSE,1,1.768522e+12,1.767312e+12,1395532621000,"POLYGON ((-93.07585 44.96762, -93.07596 44.967..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167656,30629238,27123,172823130281,27123-172823130281,RP,905,C,,,,,,WOODLAWN,AVE,,WOODLAWN AVE,905 C WOODLAWN AVE,ST PAUL,02396511,SAINT PAUL,55116,,55116,SAINT PAUL MN 55116,Unknown,ZHANG,LEI ZHANG JOANNA YUANYUAN GUAN,LEI ZHANG,JOANNA YUANYUAN GUAN,905 WOODLAWN AVE # C,,SAINT PAUL MN 55116-3144,LEI ZHANG,,905 WOODLAWN AVE # C,,SAINT PAUL MN 55116-3144,,,,,,80839005,15,,0625,St. Paul School District,070,CAPITOL REGION W/S,Capitol Region WSD,07856,CIC NO 839 HIGHLAND BRIDGE ROWHOMES 6TH,CIC NO 839 HIGHLAND BRIDGE ROWHOMES 6TH LOT 3...,1,3,0.03,1306.8,0.03,20.0,2026.0,2025.0,175000.0,499000.0,674000.0,,,7175.0,175000.0,,,,,,,,,,,,,,,,570,TOWNHOME-INNER UNIT,N,201,1A/1B/4BB RESIDENTIAL SINGLE UNIT,,,,,,,N,,,,,N,N,N,,,N,,,,TOWNHOME-INNER UNIT,1.0,04,ROW,2,BRICK,3.0,8.0,3.0,1.0,Y,1,HOT AIR,4,CENTW/AIR,2576.0,,Y,160.0,2024.0,2024.0,1,LEVEL,1,ALL PUBLIC,1.717027e+12,689990.0,,,548488.64,144581.27,44.913228,-93.196180,17,28,23,SWNE,1,1.768522e+12,,1768312434000,"POLYGON ((-93.19628 44.91317, -93.19628 44.913..."
167657,30629239,27123,172823130283,27123-172823130283,RP,905,E,,,,,,WOODLAWN,AVE,,WOODLAWN AVE,905 E WOODLAWN AVE,ST PAUL,02396511,SAINT PAUL,55116,,55116,SAINT PAUL MN 55116,Unknown,KELLY,RANDY MATTHEW KELLY KATHRYN LEE KELLY,RANDY MATTHEW KELLY,KATHRYN LEE KELLY,905 WOODLAWN AVE UNIT E,,SAINT PAUL MN 55116-3144,RANDY MATTHEW KELLY,,905 WOODLAWN AVE UNIT E,,SAINT PAUL MN 55116-3144,,,,,,80839005,15,,0625,St. Paul School District,070,CAPITOL REGION W/S,Capitol Region WSD,07856,CIC NO 839 HIGHLAND BRIDGE ROWHOMES 6TH,CIC NO 839 HIGHLAND BRIDGE ROWHOMES 6TH LOT 5...,1,5,0.03,1306.8,0.03,20.0,2026.0,2025.0,175000.0,520000.0,695000.0,,,7438.0,175000.0,,,,,,,,,,,,,,,,570,TOWNHOME-INNER UNIT,N,201,1A/1B/4BB RESIDENTIAL SINGLE UNIT,,,,,,,N,,,,,N,N,N,,,N,,,,TOWNHOME-INNER UNIT,1.0,04,ROW,2,BRICK,3.0,8.0,3.0,1.0,Y,1,HOT AIR,4,CENTW/AIR,2560.0,,Y,160.0,2024.0,2024.0,1,LEVEL,1,ALL PUBLIC,,,,,548502.30,144543.69,44.913125,-93.196127,17,28,23,SWNE,1,1.768522e+12,,1768312875000,"POLYGON ((-93.19626 44.91312, -93.19608 44.913..."
167658,30629240,27123,172823130282,27123-172823130282,RP,905,D,,,,,,WOODLAWN,AVE,,WOODLAWN AVE,905 D WOODLAWN AVE,ST PAUL,02396511,SAINT PAUL,55116,,55116,SAINT PAUL MN 55116,Unknown,MCGRAW,JOHN MCGRAW ANDREA MCGRAW,JOHN MCGRAW,ANDREA MCGRAW,905 WOODLAWN AVE UNIT D,,SAINT PAUL MN 55116-3144,JOHN MCGRAW,,905 WOODLAWN AVE UNIT D,,SAINT PAUL MN 55116-3144,,,,,,80839005,15,,0625,St. Paul School District,070,CAPITOL REGION W/S,Capitol Region WSD,07856,CIC NO 839 HIGHLAND BRIDGE ROWHOMES 6TH,CIC NO 839 HIGHLAND BRIDGE ROWHOMES 6TH LOT 4...,1,4,0.03,1306.8,0.03,20.0,2026.0,2025.0,175000.0,454000.0,629000.0,,,6613.0,175000.0,,,,,,,,,,,,,,,,570,TOWNHOME-INNER UNIT,N,201,1A/1B/4BB RESIDENTIAL SINGLE UNIT,,,,,,,N,,,,,N,N,N,,,N,,,,TOWNHOME-INNER UNIT,1.0,04,ROW,2,BRICK,2.0,7.0,3.0,1.0,Y,1,HOT AIR,4,CENTW/AIR,2000.0,,Y,160.0,2024.0,2024.0,1,LEVEL,1,ALL PUBLIC,1.743120e+12,636000.0,,,548495.48,144562.49,44.913176,-93.196153,17,28,23,SWNE,1,1.768522e+12,,1768312839000,"POLYGON ((-93.19611 44.91322, -93.19605 44.913..."
167659,30629241,27123,172823130284,27123-172823130284,RP,905,F,,,,,,WOODLAWN,AVE,,WOODLAWN AVE,905 F WOODLAWN AVE,ST PAUL,02396511,SAINT PAUL,55116,,55116,SAINT PAUL MN 55116,Unknown,SCHACHTMAN,HEIDI A SCHACHTMAN SCOTT C SCHACHTMAN,HEIDI A SCHACHTMAN,SCOTT C SCHACHTMAN,905F WOODLAWN AVE,,SAINT PAUL MN 55116-3144,HEIDI A SCHACHTMAN,,905F WOODLAWN AVE,,SAINT PAUL MN 55116-3144,,,,,,80839005,15,,0625,St. Paul School District,070,CAPITOL REGION W/S,Capitol Region WSD,07856,CIC NO 839 HIGHLAND BRIDGE ROWHOMES 6TH,CIC NO 839 HIGHLAND BRIDGE ROWHOMES 6TH LOT 6...,1,6,0.03,1306.8,0.03,20.0,2026.0,2025.0,175000.0,645000.0,820000.0,,,9000.0,175000.0,,,,,,,,,,,,,,,,570,TOWNHOME-INNER UNIT,N,201,1A/1B/4BB RESIDENTIAL SINGLE UNIT,,,,,,,N,,,,,N,N,N,,,N,,,,TOWNHOME-INNER UNIT,1.0,04,ROW,2,BRICK,3.0,8.0,3.0,1.0,Y,1,HOT AIR,4,CENTW/AIR,2560.0,,Y,160.0,2024.0,2024.0,1,LEVEL,1,ALL PUBLIC,1.718928e+12,832940.0,,,548509.16,144524.87,44.913073,-93.196101,17,28,23,SWNE,1,1.768522e+12,,1768313130000,"POLYGON ((-93.19623 44.91307, -93.19605 44.913..."


In [None]:

# STEP 3: Identify Parcels That Pay City Taxes


# Flag parcels in TIF districts
st_paul_gdf['in_tif'] = (
    st_paul_gdf['TIFDistrict'].notna() & 
    (st_paul_gdf['TIFDistrict'].str.strip() != '') &
    (st_paul_gdf['TIFDistrict'] != 'None')
)

# Flag fully tax-exempt parcels
st_paul_gdf['fully_exempt'] = (st_paul_gdf['TaxExemptYN'] == 'Y')

# Flag parcels that pay city taxes (not in TIF, not fully exempt)
st_paul_gdf['pays_city_tax'] = (
    ~st_paul_gdf['in_tif'] & 
    ~st_paul_gdf['fully_exempt'] &
    st_paul_gdf['TaxCapacity'].notna() &
    (st_paul_gdf['TaxCapacity'] > 0)
)

# Summary counts
print("=" * 60)
print("PARCEL CLASSIFICATION SUMMARY")
print("=" * 60)
print(f"Total St. Paul parcels:        {len(st_paul_gdf):,}")
print(f"  - In TIF districts:          {st_paul_gdf['in_tif'].sum():,} ({st_paul_gdf['in_tif'].mean()*100:.1f}%)")
print(f"  - Fully tax exempt:          {st_paul_gdf['fully_exempt'].sum():,} ({st_paul_gdf['fully_exempt'].mean()*100:.1f}%)")
print(f"  - Pays city tax:             {st_paul_gdf['pays_city_tax'].sum():,} ({st_paul_gdf['pays_city_tax'].mean()*100:.1f}%)")

# List unique TIF districts
print(f"\nUnique TIF districts: {st_paul_gdf[st_paul_gdf['in_tif']]['TIFDistrict'].nunique()}")
print("\nTIF District breakdown:")
print(st_paul_gdf[st_paul_gdf['in_tif']]['TIFDistrict'].value_counts().head(10))

PARCEL CLASSIFICATION SUMMARY
Total St. Paul parcels:        83,395
  - In TIF districts:          2,706 (3.2%)
  - Fully tax exempt:          3,406 (4.1%)
  - Pays city tax:             77,214 (92.6%)

Unique TIF districts: 61

TIF District breakdown:
TIFDistrict
282:MN EVENTS DIST                728
225:Rvrfrt Ren - Upper Landing    592
0322-0 STP Ford site              224
228:Emerald Pk - Emerald          212
243:Shep-Dav Owner Occu Hsg #1    145
241:No. Quad Exp #3               141
234:Phalen Village - Uncommit     111
236: JJ Hill Redev #1              94
358-0 LANDMARK TOWERS              81
233 NO. QUAD EXP 1 - DAKOTA        79
Name: count, dtype: int64


In [61]:
st_paul_gdf['TIFDistrict'].unique()

array([None, '234:Phalen Village - Uncommit',
       '224: North Quadrant - Essex', '222:Great Northern Bus Ctr',
       '244:Shep-Dav Rental Housing #2', '225:Rvrfrt Ren - Upper Landing',
       '243:Shep-Dav Owner Occu Hsg #1', '271:#4-4 Carleton Place Lofts',
       '282:MN EVENTS DIST', '213:BLOCK #39/LAWSON;OLD-6N',
       '249:Westminster', '241:No. Quad Exp #3', '279:Minnesota Building',
       '240:Bridgecreek Senior Place', '278:Highland Pointe Lofts',
       '260:North Quad - Sibley Apt', '0293-0 STP Chatsworth Redev',
       '0305-0 STP West Side Flats', '264:Riverfront Ren - Llewellyn',
       '245:Shep-Dav Sr Rent Housing #', '263:Riverfront Ren - Uncommitt',
       '261:Riverfront Ren - US Bank', '0304-0 STP Schmidt Brewery; 03',
       '0325-0 STP SCHMIDT', '212:Block #4/Mn Mutual;Old-5N',
       '236: JJ Hill Redev #1', '262:Riverfront Ren - Drake Mar',
       '089:WESTGATE;OLD-89', '254:Great North Bus Ctr Phs 2',
       '257: Payne Phalen', '269:Phalen Village - Rose 

In [62]:
# =============================================================================
# STEP 4: Compare Scraped Data to Official Tax Base
# =============================================================================

# Calculate totals from our scraped data
scraped_total_ntc = st_paul_gdf['TaxCapacity'].sum()
scraped_tif_ntc = st_paul_gdf[st_paul_gdf['in_tif']]['TaxCapacity'].sum()
scraped_exempt_ntc = st_paul_gdf[st_paul_gdf['fully_exempt']]['TaxCapacity'].sum()
scraped_city_taxable_ntc = st_paul_gdf[st_paul_gdf['pays_city_tax']]['TaxCapacity'].sum()

scraped_total_emv = st_paul_gdf['EMVTotal1'].sum()
scraped_land_value = st_paul_gdf['EMVLand1'].sum()
scraped_building_value = st_paul_gdf['EMVBuilding1'].sum()

print("=" * 60)
print("VALIDATION: SCRAPED DATA vs OFFICIAL TAX BASE")
print("=" * 60)
print(f"\n{'Metric':<35} {'Scraped':>18} {'Official':>18} {'Diff %':>10}")
print("-" * 85)

# Total Tax Capacity (NTC)
diff_pct = (scraped_total_ntc - OFFICIAL_LOCAL_NTC) / OFFICIAL_LOCAL_NTC * 100
print(f"{'Total Tax Capacity (NTC)':<35} ${scraped_total_ntc:>15,.0f} ${OFFICIAL_LOCAL_NTC:>15,.0f} {diff_pct:>9.1f}%")

# TIF NTC
diff_pct = (scraped_tif_ntc - OFFICIAL_TIF_NTC) / OFFICIAL_TIF_NTC * 100
print(f"{'TIF Tax Capacity':<35} ${scraped_tif_ntc:>15,.0f} ${OFFICIAL_TIF_NTC:>15,.0f} {diff_pct:>9.1f}%")

# City-taxable NTC (need to account for Fiscal Disparities in official)
official_city_taxable = OFFICIAL_LOCAL_NTC - OFFICIAL_TIF_NTC - official_data['Fiscal Disparities NTC']
diff_pct = (scraped_city_taxable_ntc - OFFICIAL_TAXABLE_NTC) / OFFICIAL_TAXABLE_NTC * 100
print(f"{'City-Taxable NTC':<35} ${scraped_city_taxable_ntc:>15,.0f} ${OFFICIAL_TAXABLE_NTC:>15,.0f} {diff_pct:>9.1f}%")

# Total EMV
diff_pct = (scraped_total_emv - official_data['Estimated Market Value Total']) / official_data['Estimated Market Value Total'] * 100
print(f"{'Total EMV':<35} ${scraped_total_emv:>15,.0f} ${official_data['Estimated Market Value Total']:>15,.0f} {diff_pct:>9.1f}%")

# City Tax Revenue estimate
city_tax_rate = OFFICIAL_CITY_TAX_RATE
scraped_city_levy = scraped_city_taxable_ntc * city_tax_rate
diff_pct = (scraped_city_levy - OFFICIAL_CITY_LEVY) / OFFICIAL_CITY_LEVY * 100
print(f"\n{'Estimated City Levy':<35} ${scraped_city_levy:>15,.0f} ${OFFICIAL_CITY_LEVY:>15,.0f} {diff_pct:>9.1f}%")

print("\n" + "=" * 60)
print("NOTE: Differences are expected due to:")
print("  - Fiscal Disparities (not captured in parcel-level data)")
print("  - Timing differences between data sources")
print("  - Parcels with special classifications")
print("=" * 60)

VALIDATION: SCRAPED DATA vs OFFICIAL TAX BASE

Metric                                         Scraped           Official     Diff %
-------------------------------------------------------------------------------------
Total Tax Capacity (NTC)            $    402,720,000 $    409,200,716      -1.6%
TIF Tax Capacity                    $     29,901,051 $     28,684,858       4.2%
City-Taxable NTC                    $    371,286,055 $    342,002,440       8.6%
Total EMV                           $ 42,835,106,800 $ 35,832,893,273      19.5%

Estimated City Levy                 $    195,667,751 $    180,226,074       8.6%

NOTE: Differences are expected due to:
  - Fiscal Disparities (not captured in parcel-level data)
  - Timing differences between data sources
  - Parcels with special classifications


In [63]:
# Verify EMV components add up correctly
# EMVLand1 + EMVBuilding1 should equal EMVTotal1

valid = (st_paul_gdf['EMVTotal1'] != 0) & st_paul_gdf['EMVTotal1'].notnull()
valid = valid & st_paul_gdf['EMVLand1'].notnull() & st_paul_gdf['EMVBuilding1'].notnull()

sum_components = st_paul_gdf.loc[valid, 'EMVLand1'] + st_paul_gdf.loc[valid, 'EMVBuilding1']
total = st_paul_gdf.loc[valid, 'EMVTotal1']

matches = (sum_components == total)
percent_exact = matches.sum() / valid.sum() * 100

print(f"Data Quality Check: EMVLand1 + EMVBuilding1 = EMVTotal1")
print(f"Match rate: {percent_exact:.2f}%")

# Summary of land vs building values for city-taxable parcels
city_taxable = st_paul_gdf[st_paul_gdf['pays_city_tax']]
print(f"\nCity-Taxable Parcels Value Summary:")
print(f"  Total Land Value (EMVLand1):     ${city_taxable['EMVLand1'].sum():,.0f}")
print(f"  Total Building Value (EMVBuilding1): ${city_taxable['EMVBuilding1'].sum():,.0f}")
print(f"  Total EMV:                       ${city_taxable['EMVTotal1'].sum():,.0f}")
print(f"  Land as % of Total:              {city_taxable['EMVLand1'].sum() / city_taxable['EMVTotal1'].sum() * 100:.1f}%")

Data Quality Check: EMVLand1 + EMVBuilding1 = EMVTotal1
Match rate: 100.00%

City-Taxable Parcels Value Summary:
  Total Land Value (EMVLand1):     $6,518,240,900
  Total Building Value (EMVBuilding1): $25,731,223,300
  Total EMV:                       $32,249,464,200
  Land as % of Total:              20.2%


In [64]:
# Should the ARTAXBAS be replaced with capacity?
# Calculate the division and scaling
st_paul_gdf['citytax_per_arbase'] = (st_paul_gdf['TotalTax1'] / st_paul_gdf['TaxCapacity'])


# Drop inf, -inf for descriptive stats and suppress SettingWithCopyWarning for summary only
stats_series = st_paul_gdf['citytax_per_arbase'].replace([float('inf'), float('-inf')], pd.NA).dropna()

summary = stats_series.describe(percentiles=[0.25, 0.5, 0.75])
print("TotalTax1 / TaxCapacity summary stats:")
print(f"min:    {summary['min']}")
print(f"25%:    {summary['25%']}")
print(f"median: {summary['50%']}")
print(f"75%:    {summary['75%']}")
print(f"max:    {summary['max']}")


TotalTax1 / TaxCapacity summary stats:
min:    0.0
25%:    1.5
median: 1.5661505443085515
75%:    1.6269835950665308
max:    258.1272727272727


## Step 5: Calculate Current City Tax Revenue

Now we calculate the current city tax revenue using only parcels that actually pay city taxes (excluding TIF districts and fully exempt parcels).

**Key Parameters:**
- City Tax Rate: 0.527 (from MN Dept of Revenue)
- Tax Base: Tax Capacity (NTC) of non-TIF, non-exempt parcels

In [65]:
# Show TIF district summary
print("TIF Districts in St. Paul:")
print(f"Total TIF parcels: {st_paul_gdf['in_tif'].sum():,}")
print(f"TIF Tax Capacity: ${st_paul_gdf[st_paul_gdf['in_tif']]['TaxCapacity'].sum():,.0f}")
print(f"\nTIF Districts:")
print(st_paul_gdf[st_paul_gdf['in_tif']].groupby('TIFDistrict')['TaxCapacity'].agg(['count', 'sum']).sort_values('sum', ascending=False).head(15))

TIF Districts in St. Paul:
Total TIF parcels: 2,706
TIF Tax Capacity: $29,901,051

TIF Districts:
                                count        sum
TIFDistrict                                     
282:MN EVENTS DIST                700  5906392.0
0322-0 STP Ford site              212  4498597.0
225:Rvrfrt Ren - Upper Landing    583  2216532.0
249:Westminster                    16  1533414.0
213:BLOCK #39/LAWSON;OLD-6N         3  1498634.0
212:Block #4/Mn Mutual;Old-5N       1  1055250.0
234:Phalen Village - Uncommit     109   869439.0
0317-0 STP Custom House/Post O      3   733259.0
0321-0 STP WABASHA AND 6TH          2   691708.0
210:Riverbend                       5   637586.0
352-0 STP HRA FARWELL YARDS         2   577779.0
222:Great Northern Bus Ctr          5   576222.0
0345-0 ST PAUL FORD SITE HSG #      3   569174.0
228:Emerald Pk - Emerald          211   553987.0
0319-0 STP 2700 University          3   542133.0


In [74]:

# Calculate Current City Tax for Each Parcel

city_tax_rate = OFFICIAL_CITY_TAX_RATE  # 0.527 from official data

# Calculate city tax for all parcels (0 for non-city-taxable)
st_paul_gdf['city_tax'] = 0.0
st_paul_gdf.loc[st_paul_gdf['pays_city_tax'], 'city_tax'] = (
    st_paul_gdf.loc[st_paul_gdf['pays_city_tax'], 'TaxCapacity'] * city_tax_rate
)

# Create working dataset of only city-taxable parcels for LVT modeling
st_paul_city = st_paul_gdf[st_paul_gdf['pays_city_tax']].copy()

# Calculate current city tax revenue
current_city_revenue = st_paul_city['city_tax'].sum()

print("=" * 60)
print("CURRENT CITY TAX CALCULATION")
print("=" * 60)
print(f"City Tax Rate: {city_tax_rate:.4f}")
print(f"Parcels paying city tax: {len(st_paul_city):,}")
print(f"City-Taxable Tax Capacity: ${st_paul_city['TaxCapacity'].sum():,.0f}")
print(f"\nCalculated City Tax Revenue: ${current_city_revenue:,.0f}")
print(f"Official City NTC Levy:      ${OFFICIAL_CITY_LEVY:,.0f}")
print(f"Difference:                  ${current_city_revenue - OFFICIAL_CITY_LEVY:,.0f} ({(current_city_revenue - OFFICIAL_CITY_LEVY) / OFFICIAL_CITY_LEVY * 100:.1f}%)")

# Show top 5 city taxpayers
print("\nTop 5 City Tax Payers:")
top5 = st_paul_city.nlargest(5, 'city_tax')[['ParcelID', 'SiteAddress', 'city_tax', 'TaxCapacity', 'EMVTotal1']]
print(top5.to_string(index=False))

CURRENT CITY TAX CALCULATION
City Tax Rate: 0.5270
Parcels paying city tax: 77,214
City-Taxable Tax Capacity: $371,286,055

Calculated City Tax Revenue: $195,667,751
Official City NTC Levy:      $180,226,074
Difference:                  $15,441,677 (8.6%)

Top 5 City Tax Payers:
    ParcelID      SiteAddress    city_tax  TaxCapacity  EMVTotal1
122823110008 155 RANDOLPH AVE 1050289.920    1992960.0 40141700.0
312922110068  295 PHALEN BLVD  492499.418     934534.0 45100000.0
272923140032 1180 CUSHING CIR  440065.026     835038.0 66500000.0
032823220172   1555 SELBY AVE  429999.326     815938.0 62250000.0
312922431544    101 10TH ST E  417236.967     791721.0 61750000.0


In [67]:
# Verify a TIF parcel has $0 city tax
tif_example = st_paul_gdf[st_paul_gdf['in_tif']].head(1)
if len(tif_example) > 0:
    print("Example TIF parcel (should have $0 city tax):")
    print(f"  ParcelID: {tif_example['ParcelID'].iloc[0]}")
    print(f"  TIF District: {tif_example['TIFDistrict'].iloc[0]}")
    print(f"  TaxCapacity: ${tif_example['TaxCapacity'].iloc[0]:,.0f}")
    print(f"  City Tax: ${tif_example['city_tax'].iloc[0]:,.2f}")
    print(f"  TotalTax1 (all jurisdictions): ${tif_example['TotalTax1'].iloc[0]:,.2f}")

Example TIF parcel (should have $0 city tax):
  ParcelID: 222922430040
  TIF District: 234:Phalen Village - Uncommit
  TaxCapacity: $11,946
  City Tax: $0.00
  TotalTax1 (all jurisdictions): $19,981.92


In [68]:
# Assign property categories based on LandUseCodeDescription
# This will be used for analyzing tax impacts by property type

def categorize_st_paul_property(row):
    """Categorize St. Paul properties based on LandUseCodeDescription"""
    desc = str(row.get('LandUseCodeDescription', '')).upper()
    use_type = str(row.get('UseType1', '')).upper()
    
    # Residential categories
    if 'SINGLE FAMILY' in desc or 'RESIDENTIAL' in desc or '1A RES' in use_type:
        return 'Single Family Residential'
    elif 'DUPLEX' in desc or 'TRIPLEX' in desc or 'FOURPLEX' in desc or '2-4 UNIT' in desc:
        return 'Small Multi-Family (2-4 units)'
    elif 'APARTMENT' in desc or 'CONDO' in desc or 'MULTI' in desc or '5+ UNIT' in desc:
        return 'Large Multi-Family (5+ units)'
    
    # Commercial
    elif any(x in desc for x in ['RETAIL', 'STORE', 'SHOP', 'RESTAURANT', 'HOTEL', 'MOTEL', 'OFFICE']):
        return 'Commercial/Retail'
    
    # Industrial
    elif any(x in desc for x in ['INDUSTRIAL', 'WAREHOUSE', 'MANUFACTURING', 'FACTORY']):
        return 'Industrial'
    
    # Parking
    elif 'PARKING' in desc:
        return 'Parking'
    
    # Vacant
    elif 'VACANT' in desc or row.get('EMVBuilding1', 0) == 0:
        return 'Vacant Land'
    
    # Other
    else:
        return 'Other'

st_paul_city['PROPERTY_CATEGORY'] = st_paul_city.apply(categorize_st_paul_property, axis=1)

print("Property Category Distribution (City-Taxable Parcels):")
print(st_paul_city['PROPERTY_CATEGORY'].value_counts())

Property Category Distribution (City-Taxable Parcels):
PROPERTY_CATEGORY
Single Family Residential        58898
Other                             8761
Large Multi-Family (5+ units)     6939
Commercial/Retail                 1008
Industrial                         811
Vacant Land                        779
Parking                             18
Name: count, dtype: int64


In [69]:
# Summary of current city tax by property category
print("Current City Tax by Property Category:")
category_summary = st_paul_city.groupby('PROPERTY_CATEGORY').agg({
    'city_tax': ['count', 'sum', 'mean', 'median'],
    'TaxCapacity': 'sum',
    'EMVTotal1': 'sum',
    'EMVLand1': 'sum'
}).round(2)

category_summary.columns = ['Parcel_Count', 'Total_City_Tax', 'Mean_City_Tax', 'Median_City_Tax', 
                            'Total_Tax_Capacity', 'Total_EMV', 'Total_Land_Value']
category_summary['Land_Pct'] = (category_summary['Total_Land_Value'] / category_summary['Total_EMV'] * 100).round(1)
category_summary = category_summary.sort_values('Total_City_Tax', ascending=False)

print(category_summary.to_string())

Current City Tax by Property Category:
                               Parcel_Count  Total_City_Tax  Mean_City_Tax  Median_City_Tax  Total_Tax_Capacity     Total_EMV  Total_Land_Value  Land_Pct
PROPERTY_CATEGORY                                                                                                                                        
Single Family Residential             58898    1.038088e+08        1762.52          1416.05         196980636.0  1.920773e+10      3.952233e+09      20.6
Other                                  8761    3.856281e+07        4401.65          1797.60          73174221.0  6.004135e+09      1.063464e+09      17.7
Large Multi-Family (5+ units)          6939    1.958780e+07        2822.86          1026.07          37168503.0  3.776229e+09      5.045338e+08      13.4
Commercial/Retail                      1008    1.633335e+07       16203.72          6263.40          30993072.0  1.644884e+09      4.503243e+08      27.4
Industrial                           

In [70]:
# Mark vacant land based on EMVBuilding1 = 0
st_paul_city.loc[st_paul_city['EMVBuilding1'] == 0, 'PROPERTY_CATEGORY'] = 'Vacant Land'

print("Updated Property Category Distribution:")
print(st_paul_city['PROPERTY_CATEGORY'].value_counts())

Updated Property Category Distribution:
PROPERTY_CATEGORY
Single Family Residential        57818
Other                             8761
Large Multi-Family (5+ units)     6736
Vacant Land                       2362
Commercial/Retail                 1007
Industrial                         512
Parking                             18
Name: count, dtype: int64


In [73]:
calc_levy = st_paul_city["city_tax"].sum()
calc_base = st_paul_city["TaxCapacity"].sum()
print(calc_levy)
print(calc_base)

print("Implied rate from your taxable base:", calc_levy / calc_base)
print("Official rate you used:", city_tax_rate)

195667750.98500004
371286055.0
Implied rate from your taxable base: 0.5270000000000001
Official rate you used: 0.527


## Step 4: Modeling the Split-Rate Land Value Tax

Now for the exciting part - modeling the LVT shift! We'll create a revenue-neutral policy that taxes land at 4 times the rate of buildings.



In [71]:
# Run the split-rate land value tax model at a 2:1 land:improvement ratio (revenue-neutral)

# Set the ratio of land millage to improvement millage
land_improvement_ratio = 2  # Land is taxed at 2x the rate of improvements

# Run the split-rate calculation using model_split_rate from lvt_utils.py
from lvt_utils import model_split_rate_tax

land_millage, improvement_millage, split_rate_revenue, st_paul_gdf = model_split_rate_tax(
    df=st_paul_gdf,
    land_value_col="CURRLAND",
    improvement_value_col="CURRIMPR",
    current_revenue=current_revenue,
    land_improvement_ratio=land_improvement_ratio,
    exemption_col="EXMPFMV",
    exemption_flag_col="full_exmp",
    percentage_cap_col=None  # set to a column name for a cap if desired
)


split_rate_revenue = st_paul_gdf['new_tax'].sum()

print(f"Total split-rate tax revenue (2:1 ratio): ${split_rate_revenue:,.2f}")


NameError: name 'current_revenue' is not defined

In [None]:
# Calculate and report the sum of the absolute difference between current_tax and new_tax,
# and what percent of the sum of current_tax that represents.

# Calculate absolute difference per parcel
st_paul_gdf['abs_tax_diff'] = (st_paul_gdf['current_tax'] - st_paul_gdf['new_tax']).abs()

# Sum absolute differences
total_abs_tax_diff = st_paul_gdf['abs_tax_diff'].sum()

# Calculate what percent of total current tax that represents
percent_of_current = (total_abs_tax_diff / current_revenue) * 100 if current_revenue != 0 else np.nan

print(f"Sum of absolute value of current_tax minus new_tax: ${total_abs_tax_diff:,.2f}")
print(f"That is {percent_of_current:.2f}% of the sum of current_tax.")


In [None]:

# Before summary: set PROPERTY_CATEGORY to 'vacant' where CURRLAND is zero
if "CURRLAND" in st_paul_gdf.columns and "PROPERTY_CATEGORY" in st_paul_gdf.columns:
    st_paul_gdf.loc[st_paul_gdf["CURRIMPR"] == 0, "PROPERTY_CATEGORY"] = "Vacant Land"

# Calculate and print the summary table for total tax impact (using lvt_utils)
from lvt_utils import calculate_category_tax_summary, print_category_tax_summary

# This will use the default 'PROPERTY_CATEGORY' column if present
output_summary = calculate_category_tax_summary(
    st_paul_gdf,
    category_col='PROPERTY_CATEGORY' if 'PROPERTY_CATEGORY' in st_paul_gdf.columns else st_paul_gdf.columns[0],  # fallback to first col if not present
    current_tax_col='current_tax',
    new_tax_col='new_tax'
)
print_category_tax_summary(output_summary, "Total Tax Impact by Property Category (All sp_ Levies)")



In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Only include categories with property_count > 50
filtered = output_summary[output_summary['property_count'] > 50].copy()

categories = filtered['PROPERTY_CATEGORY'].tolist()
counts = filtered['property_count'].tolist()
median_pct_change = filtered['median_tax_change_pct'].tolist()
median_dollar_change = filtered['median_tax_change'].tolist()
total_tax_change = filtered['total_tax_change'].tolist() if 'total_tax_change' in filtered.columns else (filtered['mean_tax_change'] * filtered['property_count']).tolist()

# Sort by median_pct_change ascending
sorted_idx = np.argsort(median_pct_change)
categories = [categories[i] for i in sorted_idx]
counts = [counts[i] for i in sorted_idx]
median_pct_change = [median_pct_change[i] for i in sorted_idx]
median_dollar_change = [median_dollar_change[i] for i in sorted_idx]
total_tax_change = [total_tax_change[i] for i in sorted_idx]

# Custom color: anything above 0 is dark red, below 0 is green
bar_colors = []
for val in median_pct_change:
    if val > 0:
        bar_colors.append("#8B0000")  # dark red
    else:
        bar_colors.append("#228B22")  # professional green

# Bar settings
bar_height = 0.75
fig_height = len(categories) * 0.8 + 1.2
right_col_pad = 120  # more padding for right column
fig, ax = plt.subplots(figsize=(17, fig_height))  # wider for right column

y = np.arange(len(categories))

# Draw bars
ax.barh(
    y, median_pct_change, color=bar_colors, edgecolor='none',
    height=bar_height, alpha=0.92, linewidth=0, zorder=2
)

# Remove all spines and ticks for a clean look
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# Adjusted vertical spacing
cat_offset = 0.18   # less space between category and median
med_offset = -0.03  # median just below category
count_offset = -0.23  # more space below median for parcels

# For right column: position for total tax change
max_abs = max(abs(min(median_pct_change)), abs(max(median_pct_change)))
right_col_x = max_abs + right_col_pad

# Add Net Change header at the top of the right column
ax.text(
    right_col_x, len(categories) - 0.5, "Net Change", va='bottom', ha='left',
    fontsize=15, fontweight='bold', color='black', fontname='Arial'
)

for i, (cat, val, count, med_dol, tot_change) in enumerate(zip(categories, median_pct_change, counts, median_dollar_change, total_tax_change)):
    # Format median dollar and percent change together
    if med_dol >= 0:
        med_dol_str = f"${med_dol:,.0f}"
    else:
        med_dol_str = f"-${abs(med_dol):,.0f}"
    pct_str = f"{val:+.1f}%"
    median_combo = f"Median: {med_dol_str}, {pct_str}"

    # Position: right of bar for positive, left for negative
    if val < 0:
        xpos = val - 2.5
        ha = 'right'
    else:
        xpos = val + 2.5
        ha = 'left'
    # Category name (bold, bigger)
    ax.text(
        xpos, y[i]+cat_offset, cat, va='center', ha=ha,
        fontsize=14, fontweight='bold', color='#222',
        fontname='Arial'
    )
    # Median (dollar + percent, bold, black, just below category)
    ax.text(
        xpos, y[i]+med_offset, median_combo, va='center', ha=ha,
        fontsize=12, fontweight='bold', color='black',
        fontname='Arial'
    )
    # Count (bold, smaller, below median)
    ax.text(
        xpos, y[i]+count_offset, f"{count:,} parcels", va='center', ha=ha,
        fontsize=11, fontweight='bold', color='#888',
        fontname='Arial'
    )
    # Net change column, always right-aligned in a new column, black text, no "Total:"
    if tot_change >= 0:
        tot_change_str = f"${tot_change:,.0f}"
    else:
        tot_change_str = f"-${abs(tot_change):,.0f}"
    ax.text(
        right_col_x, y[i], tot_change_str, va='center', ha='left',
        fontsize=13, fontweight='bold', color='black',
        fontname='Arial'
    )

# Set x limits for symmetry, make bars longer, and leave space for right column
ax.set_xlim(-right_col_x, right_col_x + 60)

# Remove axis labels/ticks
ax.set_yticks([])
ax.set_xticks([])

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Use output_summary to generate categories and percent increase/decrease, filtering to count > 50

# Filter to property_count > 50
summary_filtered = output_summary[output_summary['property_count'] > 50].copy()

# Sort by pct_increase_gt_threshold ascending (smallest percent increase first)
summary_sorted = summary_filtered.sort_values('pct_increase_gt_threshold', ascending=True)

categories_sorted = summary_sorted['PROPERTY_CATEGORY'].tolist()
pct_increase_sorted = summary_sorted['pct_increase_gt_threshold'].tolist()
pct_decrease_sorted = summary_sorted['pct_decrease_gt_threshold'].tolist()

# Convert to integers for display
pct_increase_int_sorted = [int(round(x)) for x in pct_increase_sorted]
pct_decrease_int_sorted = [int(round(x)) for x in pct_decrease_sorted]

y = np.arange(len(categories_sorted))

fig, ax = plt.subplots(figsize=(8, 6))

# Use specified colors
color_increase = "#8B0000"  # dark red
color_decrease = "#228B22"  # professional green

# Plot left (decrease) bars (green, to the left)
ax.barh(
    y, 
    [-v for v in pct_decrease_sorted], 
    color=color_decrease, 
    edgecolor='none', 
    height=0.7
)

# Plot right (increase) bars (red, to the right)
ax.barh(
    y, 
    pct_increase_sorted, 
    color=color_increase, 
    edgecolor='none', 
    height=0.7
)

# Add percent labels (integer, no decimals), smaller Arial font
for i, (inc, dec) in enumerate(zip(pct_increase_int_sorted, pct_decrease_int_sorted)):
    # Left side (decrease)
    if dec > 0:
        ax.text(
            -dec - 2, y[i], f"{dec}%", 
            va='center', ha='right', 
            fontsize=8, fontweight='normal', color=color_decrease, fontname='Arial'
        )
    # Right side (increase)
    if inc > 0:
        ax.text(
            inc + 2, y[i], f"{inc}%", 
            va='center', ha='left', 
            fontsize=8, fontweight='normal', color=color_increase, fontname='Arial'
        )

# Add category name at end of right bar, bold, smaller Arial, further from percent
for i, (cat, inc) in enumerate(zip(categories_sorted, pct_increase_sorted)):
    xpos = inc + 18 if inc > 0 else 18
    ax.text(
        xpos, y[i], cat, 
        va='center', ha='left', 
        fontsize=9, fontweight='bold', color='#222', fontname='Arial'
    )

# Remove all spines, ticks, and axis lines for minimalist look
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# Remove grid, axis, and titles
ax.set_yticks([])
ax.set_xticks([])
ax.set_ylabel('')
ax.set_xlabel('')
ax.set_title('')

# Set xlim for symmetry
max_val = max(max(pct_increase_sorted), max(pct_decrease_sorted))
ax.set_xlim(-max_val-20, max_val+48)

# --- Add custom titles above left and right bars ---
# Make the titles a little bit bigger and closer to the center
title_fontsize = 10  # increased from 8
title_color = 'black'
title_fontweight = 'normal'
title_fontname = 'Arial'

# Compute center x for both titles, but offset slightly left/right of center
title_y = len(categories_sorted) - 0.2

# Left title (above left bars), closer to center
left_title_x = -max_val * 0.45
ax.text(
    left_title_x, title_y, 
    "Percent of parcels\ndecreasing >10%", 
    ha='center', va='bottom', fontsize=title_fontsize, fontweight=title_fontweight, 
    color=title_color, fontname=title_fontname, 
    bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.15')
)

# Right title (above right bars), closer to center
right_title_x = max_val * 0.45
ax.text(
    right_title_x, title_y, 
    "Percent of parcels\nincreasing >10%", 
    ha='center', va='bottom', fontsize=title_fontsize, fontweight=title_fontweight, 
    color=title_color, fontname=title_fontname, 
    bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.15')
)

plt.tight_layout()
plt.show()


## Step 6: Adding Geographic Context

To make our analysis spatially-aware, we need to add geographic boundaries to our parcel data. This enables us to:

- **Create maps** showing tax changes across the city
- **Analyze patterns by neighborhood** or district  
- **Combine with demographic data** for equity analysis
- **Present results visually** to stakeholders

We'll fetch the parcel boundary data from the same ArcGIS service that contains the geometric information for each property.


### Merging Tax Analysis with Geographic Data

Here we combine our tax analysis results with the geographic boundaries. This creates a spatially-enabled dataset that allows us to:

1. **Map tax changes** across South Bend
2. **Identify spatial patterns** in tax impacts
3. **Prepare for demographic analysis** by having geographic context

The merge should give us the same number of records as our original analysis, now with geographic coordinates for each parcel.


In [None]:
import matplotlib.pyplot as plt
import geopandas as gpd

# Calculate the total bounds of st_paul_gdf: (minx, miny, maxx, maxy)
minx, miny, maxx, maxy = st_paul_gdf.total_bounds

# Create a rectangular polygon for the bounds for visualization
from shapely.geometry import box
bounds_gdf = gpd.GeoDataFrame(
    {"geometry": [box(minx, miny, maxx, maxy)]},
    crs=st_paul_gdf.crs
)

# Plot the Baltimore boundary on a world map using OpenStreetMap (OSM) as a backdrop.
try:
    import contextily as ctx

    # Project the bounds to Web Mercator for OSM compatibility
    bounds_webm = bounds_gdf.to_crs(epsg=4326)
    # Also plot a world background for extra context (we use a wide extent)
    fig, ax = plt.subplots(figsize=(8, 8))

    # Plot OSM basemap for the world region
    dummy_world_poly = gpd.GeoDataFrame(
        geometry=[box(-20026376, -20048966, 20026376, 20048966)],  # Full Mercator extent
        crs="EPSG:3857"
    )
    dummy_world_poly.boundary.plot(ax=ax, color="lightgray", linewidth=0.3, zorder=1)

    # Add the bounding box of Baltimore on top
    bounds_webm.boundary.plot(ax=ax, color="red", linewidth=2, zorder=10)
    
    # autoscale to include some margin outside the bounding box
    expand = 1.5
    xmid = (minx + maxx) / 2
    ymid = (miny + maxy) / 2
    width = (maxx - minx) * expand
    height = (maxy - miny) * expand

    bounds = box(
        xmid - width / 2,
        ymid - height / 2,
        xmid + width / 2,
        ymid + height / 2
    )
    bounds_webm_margin = gpd.GeoDataFrame({'geometry': [bounds]}, crs=st_paul_gdf.crs).to_crs(epsg=3857)

    # Set plot extent so you can see the region in context
    bl, tr = bounds_webm_margin.total_bounds.reshape(2,2)
    ax.set_xlim(bl[0], tr[0])
    ax.set_ylim(bl[1], tr[1])

    # Add the OSM basemap for real-world context!
    ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, alpha=1)
    ax.set_title("Baltimore Bounding Box on OpenStreetMap (global context!)")
    plt.xlabel("")
    plt.ylabel("")
except ImportError:
    ax = bounds_gdf.plot(
        facecolor='none', edgecolor='red', linewidth=2, figsize=(8, 8)
    )
    ax.set_title("Baltimore Parcel Data: Bounding Box (no OSM)")
plt.show()

# Print the geographic coordinates (bounding box) of st_paul_gdf
print("Baltimore parcel data geographic bounds (minx, miny, maxx, maxy):")
print(st_paul_gdf.total_bounds)

# Print a sample of coordinates from all parcels (e.g., the centroid of each geometry)
centroids = st_paul_gdf.geometry.centroid
print("Sample parcel centroids (longitude, latitude):")
print(centroids.head())


In [None]:
# Get census data for Spokane County (FIPS code: 53063)
census_data, census_boundaries = get_census_data_with_boundaries(
    fips_code='24510',  # Maryland (24) + Baltimore City (510)
    year=2022
)
# Set CRS for census boundaries before merging
census_boundaries = census_boundaries.set_crs(epsg=4326)  # Assuming WGS84 coordinate system
boundary_gdf = st_paul_gdf.to_crs(epsg=4326)
  # Set same CRS for boundary data

# Merge census data with our parcel boundaries
df = match_to_census_blockgroups(
    gdf=boundary_gdf,
    census_gdf=census_boundaries,
    join_type="left"
)

print(f"Number of census blocks: {len(census_boundaries)}")
print(f"Number of census data: {len(census_data)}")
print(f"Number of parcels with census data: {len(df)}")

## Step 7: Demographic and Equity Analysis

One of the most important aspects of LVT analysis is understanding the **equity implications** - how does the tax shift affect different income levels and demographic groups?

### Adding Census Data

We'll match each property to its Census Block Group and pull demographic data including:
- **Median household income** 
- **Racial/ethnic composition**
- **Population characteristics**

### Why This Matters

Policy makers need to understand:
- Does the LVT shift disproportionately burden low-income neighborhoods?
- Are there racial equity implications?  
- Does the policy align with broader equity goals?

**Note**: You'll need a Census API key for this section. Get one free at: https://api.census.gov/data/key_signup.html


In [None]:
print("DataFrame columns:")
print(df.columns.tolist())


### Exploring the Enhanced Dataset

With census data merged in, our dataset now contains both property tax information and demographic context. Let's explore what variables we now have available for analysis.

This enhanced dataset allows us to examine relationships between:
- Property characteristics and demographics
- Tax impacts and neighborhood income levels
- Geographic patterns in tax burden shifts


In [None]:
# Display all columns with maximum width
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
display(df.head())


### Viewing the Complete Dataset

Let's examine our enhanced dataset with all the variables we've created and merged. This gives us a comprehensive view of each property with:

- **Property characteristics** (type, value, location)
- **Current tax calculations** 
- **New LVT calculations**
- **Tax change impacts**
- **Demographic context** (income, race/ethnicity)

This rich dataset forms the foundation for sophisticated equity and impact analysis.


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def filter_data(df):
    """Filter data to remove negative or zero median incomes and create non-vacant subset (also with positive income)"""
    df_filtered = df[df['median_income'] > 0].copy()
    non_vacant_df = df[(df['PROPERTY_CATEGORY'] != 'Vacant Land') & (df['median_income'] > 0)].copy()
    return df_filtered, non_vacant_df

def calculate_block_group_summary(df):
    """Calculate summary statistics for census block groups, excluding negative/zero median incomes"""
    # Only include block groups with positive median income
    df = df[df['median_income'] > 0].copy()
    summary = df.groupby('std_geoid').agg(
        median_income=('median_income', 'first'),
        minority_pct=('minority_pct', 'first'),
        black_pct=('black_pct', 'first'),
        total_current_tax=('current_tax', 'sum'),
        total_new_tax=('new_tax', 'sum'),
        mean_tax_change=('tax_change', 'mean'),
        median_tax_change=('tax_change', 'median'),
        median_tax_change_pct=('tax_change_pct', 'median'),
        parcel_count=('tax_change', 'count'),
        has_vacant_land=('PROPERTY_CATEGORY', lambda x: 'Vacant Land' in x.values)
    ).reset_index()
    # Exclude block groups with non-positive median income (shouldn't be needed, but for safety)
    summary = summary[summary['median_income'] > 0].copy()
    summary['mean_tax_change_pct'] = ((summary['total_new_tax'] - summary['total_current_tax']) / 
                                    summary['total_current_tax']) * 100
    return summary

def create_scatter_plot(data, x_col, y_col, ax, title, xlabel, ylabel):
    """Create a scatter plot with trend line, excluding negative/zero incomes"""
    # Exclude rows with non-positive x_col (e.g., median_income)
    data = data[data[x_col] > 0].copy()
    sns.scatterplot(
        data=data,
        x=x_col,
        y=y_col,
        size='parcel_count',
        sizes=(20, 200),
        alpha=0.7,
        ax=ax
    )
    
    ax.axhline(y=0, color='r', linestyle='--')
    
    x = data[x_col].dropna()
    y = data[y_col].dropna()
    mask = ~np.isnan(x) & ~np.isnan(y)
    
    if len(x[mask]) > 1:
        z = np.polyfit(x[mask], y[mask], 1)
        p = np.poly1d(z)
        ax.plot(x[mask], p(x[mask]), "r--")
    
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)

def plot_comparison(data1, data2, x_col, y_col, title_prefix, xlabel):
    """Create side-by-side comparison plots, excluding negative/zero incomes"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8))
    
    create_scatter_plot(data1, x_col, y_col, ax1, 
                       f'{title_prefix} - All Properties', xlabel, 'Mean Tax Change (%)')
    create_scatter_plot(data2, x_col, y_col, ax2,
                       f'{title_prefix} - Excluding Vacant Land', xlabel, 'Mean Tax Change (%)')
    
    plt.tight_layout()
    plt.show()

def calculate_correlations(data1, data2):
    """Calculate correlations between variables, excluding negative/zero incomes"""
    correlations = {}
    for df, suffix in [(data1, 'all'), (data2, 'non_vacant')]:
        # Exclude rows with non-positive median_income for correlation
        df_corr = df[df['median_income'] > 0].copy()
        correlations[f'income_mean_{suffix}'] = df_corr[['median_income', 'mean_tax_change_pct']].corr().iloc[0, 1]
        correlations[f'income_median_{suffix}'] = df_corr[['median_income', 'median_tax_change_pct']].corr().iloc[0, 1]
        correlations[f'minority_mean_{suffix}'] = df_corr[['minority_pct', 'mean_tax_change_pct']].corr().iloc[0, 1]
        correlations[f'black_mean_{suffix}'] = df_corr[['black_pct', 'mean_tax_change_pct']].corr().iloc[0, 1]
    return correlations

def weighted_median(values, weights):
    """Compute the weighted median of values with corresponding weights."""
    # Remove NaNs
    mask = (~np.isnan(values)) & (~np.isnan(weights))
    values = np.array(values)[mask]
    weights = np.array(weights)[mask]
    if len(values) == 0:
        return np.nan
    sorter = np.argsort(values)
    values = values[sorter]
    weights = weights[sorter]
    cumsum = np.cumsum(weights)
    cutoff = weights.sum() / 2.0
    return values[np.searchsorted(cumsum, cutoff)]

def create_quintile_summary(df, group_col, value_col):
    """Create summary statistics by quintiles, using mean/weighted-median tax change percent, excluding negative/zero incomes for income-based quintiles"""
    # If grouping by income, exclude non-positive values
    if group_col == 'median_income':
        df = df[df['median_income'] > 0].copy()
    df[f'{group_col}_quintile'] = pd.qcut(df[group_col], 5, 
                                         labels=["Q1 (Lowest)", "Q2", "Q3", "Q4", "Q5 (Highest)"])
    
    def weighted_median_tax_change_pct(subdf):
        # Use parcel_count as weights if available, else weight each row equally
        if 'parcel_count' in subdf.columns:
            weights = subdf['parcel_count']
        else:
            weights = np.ones(len(subdf))
        return weighted_median(subdf['tax_change_pct'], weights)
    
    # For this context, each row is a parcel, so weight by 1 (or by parcel_count if already aggregated)
    summary = df.groupby(f'{group_col}_quintile').apply(
        lambda g: pd.Series({
            'count': g['tax_change'].count(),
            'mean_tax_change_pct': g['tax_change_pct'].mean(),
            'median_tax_change_pct': weighted_median(g['tax_change_pct'], np.ones(len(g))),
            'mean_value': g[value_col].mean()
        })
    ).reset_index()
    
    return summary

# Main execution
gdf_filtered, non_vacant_gdf = filter_data(df)
print(f"Number of rows in gdf_filtered: {len(gdf_filtered)}")
print(f"Number of rows in non_vacant_gdf: {len(non_vacant_gdf)}")

# Calculate block group summaries (all with positive median_income only)
census_block_groups = calculate_block_group_summary(gdf_filtered)
non_vacant_block_summary = calculate_block_group_summary(non_vacant_gdf)

# Create comparison plots (all with positive median_income only)
plot_comparison(census_block_groups, non_vacant_block_summary, 
               'median_income', 'mean_tax_change_pct', 
               'Mean Tax Change vs. Median Income', 
               'Median Income by Census Block Group ($)')

plot_comparison(census_block_groups, non_vacant_block_summary,
               'minority_pct', 'mean_tax_change_pct',
               'Mean Tax Change vs. Minority Percentage',
               'Minority Population Percentage by Census Block Group')

plot_comparison(census_block_groups, non_vacant_block_summary,
               'black_pct', 'mean_tax_change_pct',
               'Mean Tax Change vs. Black Percentage',
               'Black Population Percentage by Census Block Group')

# Calculate and print correlations (all with positive median_income only)
correlations = calculate_correlations(census_block_groups, non_vacant_block_summary)
for key, value in correlations.items():
    print(f"Correlation {key}: {value:.4f}")

# Create and display quintile summaries (income quintiles exclude negative/zero incomes)
income_quintile_summary = create_quintile_summary(gdf_filtered, 'median_income', 'median_income')
non_vacant_income_quintile_summary = create_quintile_summary(non_vacant_gdf, 'median_income', 'median_income')
minority_quintile_summary = create_quintile_summary(gdf_filtered, 'minority_pct', 'minority_pct')
non_vacant_minority_quintile_summary = create_quintile_summary(non_vacant_gdf, 'minority_pct', 'minority_pct')

print("\nTax impact by income quintile (all properties):")
display(income_quintile_summary)
print("\nTax impact by income quintile (excluding vacant land):")
display(non_vacant_income_quintile_summary)
print("\nTax impact by minority percentage quintile (all properties):")
display(minority_quintile_summary)
print("\nTax impact by minority percentage quintile (excluding vacant land):")
display(non_vacant_minority_quintile_summary)


In [None]:
# Plot 1: Median Income Quintiles vs. Mean Tax Change Percent (Census Block Groups)

plt.figure(figsize=(10, 6))
plt.plot(
    income_quintile_summary['median_income_quintile'],
    income_quintile_summary['mean_tax_change_pct'],
    marker='o',
    label='All Properties'
)
plt.plot(
    non_vacant_income_quintile_summary['median_income_quintile'],
    non_vacant_income_quintile_summary['mean_tax_change_pct'],
    marker='o',
    label='Excluding Vacant Land'
)
plt.xlabel('Median Income Quintile')
plt.ylabel('Mean Tax Change ($)')
plt.title('Mean Tax Change by Median Income Quintile (Census Block Groups)')
plt.legend()
# Remove grid
# Ensure x-axis at y=0 if negative values present
ymin = min(
    income_quintile_summary['mean_tax_change_pct'].min(),
    non_vacant_income_quintile_summary['mean_tax_change_pct'].min()
)
ymax = max(
    income_quintile_summary['mean_tax_change_pct'].max(),
    non_vacant_income_quintile_summary['mean_tax_change_pct'].max()
)
if ymin < 0 < ymax:
    plt.axhline(0, color='black', linewidth=1, linestyle='dotted')
plt.tight_layout()
plt.show()

# Plot 2: Minority Percentage Quintiles vs. Mean Tax Change Percent (Census Block Groups)

plt.figure(figsize=(10, 6))
plt.plot(
    minority_quintile_summary['minority_pct_quintile'],
    minority_quintile_summary['mean_tax_change_pct'],
    marker='o',
    label='All Properties'
)
plt.plot(
    non_vacant_minority_quintile_summary['minority_pct_quintile'],
    non_vacant_minority_quintile_summary['mean_tax_change_pct'],
    marker='o',
    label='Excluding Vacant Land'
)
plt.xlabel('Minority Percentage Quintile')
plt.ylabel('Mean Tax Change ($)')
plt.title('Mean Tax Change by Minority Percentage Quintile (Census Block Groups)')
plt.legend()
# Remove grid
# Ensure x-axis at y=0 if negative values present
ymin2 = min(
    minority_quintile_summary['mean_tax_change_pct'].min(),
    non_vacant_minority_quintile_summary['mean_tax_change_pct'].min()
)
ymax2 = max(
    minority_quintile_summary['mean_tax_change_pct'].max(),
    non_vacant_minority_quintile_summary['mean_tax_change_pct'].max()
)
if ymin2 < 0 < ymax2:
    plt.axhline(0, color='black', linewidth=1, linestyle='dotted')
plt.tight_layout()
plt.show()


In [None]:
# Plot: Median Tax Change by Neighborhood Median Income Excluding Vacant Land

plt.figure(figsize=(10, 6))
plt.plot(
    non_vacant_income_quintile_summary['median_income_quintile'],
    non_vacant_income_quintile_summary['median_tax_change_pct'],
    marker='o',
    label='Excluding Vacant Land'
)
plt.xlabel('Median Income Quintile')
plt.ylabel('Median Tax Change ($)')
plt.title('Median Tax Change by Neighborhood Median Income Excluding Vacant Land')
ymin = non_vacant_income_quintile_summary['median_tax_change_pct'].min()
ymax = non_vacant_income_quintile_summary['median_tax_change_pct'].max()
# Ensure 0 is included on the y-axis
plt.ylim(min(ymin, 0), max(ymax, 0) if ymax < 0 else max(ymax, 0, 1.05*ymax))
plt.axhline(0, color='black', linewidth=1, linestyle='dotted')
plt.tight_layout()
plt.show()

# Plot: Median Tax Change by Minority Percentage Quintile Excluding Vacant Land

plt.figure(figsize=(10, 6))
plt.plot(
    non_vacant_minority_quintile_summary['minority_pct_quintile'],
    non_vacant_minority_quintile_summary['median_tax_change_pct'],
    marker='o',
    label='Excluding Vacant Land'
)
plt.xlabel('Minority Percentage Quintile')
plt.ylabel('Median Tax Change ($)')
plt.title('Median Tax Change by Minority Percentage Quintile Excluding Vacant Land')
ymin2 = non_vacant_minority_quintile_summary['median_tax_change_pct'].min()
ymax2 = non_vacant_minority_quintile_summary['median_tax_change_pct'].max()
# Ensure 0 is included on the y-axis
plt.ylim(min(ymin2, 0), max(ymax2, 0) if ymax2 < 0 else max(ymax2, 0, 1.05*ymax2))
plt.axhline(0, color='black', linewidth=1, linestyle='dotted')
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set a modern style
sns.set_theme(style="whitegrid", font_scale=1.15)

# Upside Down Bar Graph: Median Tax Change by Neighborhood Median Income Excluding Vacant Land
fig, ax = plt.subplots(figsize=(10, 6))

vals = non_vacant_income_quintile_summary['median_tax_change_pct']
labels = non_vacant_income_quintile_summary['median_income_quintile']

# Color mapping: dark green (more negative) to light green (less negative)
colors = sns.color_palette("Greens", n_colors=len(vals))
# Sort so that the most negative (largest magnitude) is darkest
color_map = [colors[i] for i in np.argsort(np.argsort(-vals))]

# To make bars start at the top and go down, invert the y-axis and plot positive heights
bars = ax.bar(
    labels,
    np.abs(vals),
    color=color_map,
    edgecolor='black',
    width=0.7
)

# Invert the y-axis so bars start at the top and go down
ax.invert_yaxis()

# Remove y-axis
ax.yaxis.set_visible(False)
ax.set_ylabel("")
ax.set_xlabel("")
ax.set_title('Median Tax Change by Neighborhood Median Income (Excl. Vacant Land)', weight='bold', pad=30)

# Remove all spines (including bottom)
sns.despine(left=True, right=True, top=True, bottom=True)

# Add value labels (bold, % sign) centered inside each bar (no line below the bar)
for bar, val in zip(bars, vals):
    ax.annotate(
        f"{val:.1f}%",
        xy=(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2),
        xytext=(0, 0),
        textcoords="offset points",
        ha='center', va='center',
        fontsize=13, color='black', fontweight='bold'
    )

# Move x-tick labels to the top
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')
plt.xticks(fontweight='bold')

# Set y-limits to show bars going down from the top
ymax = np.abs(vals).max() * 1.1
ax.set_ylim(ymax, 0)

plt.tight_layout()
plt.show()

# Upside Down Bar Graph: Median Tax Change by Minority Percentage Quintile Excluding Vacant Land
fig, ax = plt.subplots(figsize=(10, 6))

vals2 = non_vacant_minority_quintile_summary['median_tax_change_pct']
labels2 = non_vacant_minority_quintile_summary['minority_pct_quintile']

colors2 = sns.color_palette("Greens", n_colors=len(vals2))
color_map2 = [colors2[i] for i in np.argsort(np.argsort(-vals2))]

bars2 = ax.bar(
    labels2,
    np.abs(vals2),
    color=color_map2,
    edgecolor='black',
    width=0.7
)

ax.invert_yaxis()
ax.yaxis.set_visible(False)
ax.set_ylabel("")
ax.set_xlabel("")
ax.set_title('Median Tax Change by Minority Percentage Quintile (Excl. Vacant Land)', weight='bold', pad=30)
sns.despine(left=True, right=True, top=True, bottom=True)

for bar, val in zip(bars2, vals2):
    ax.annotate(
        f"{val:.1f}%",
        xy=(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2),
        xytext=(0, 0),
        textcoords="offset points",
        ha='center', va='center',
        fontsize=13, color='black', fontweight='bold'
    )

ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')
plt.xticks(fontweight='bold')

ymax2 = np.abs(vals2).max() * 1.1
ax.set_ylim(ymax2, 0)

plt.tight_layout()
plt.show()


In [None]:
# Restrict df to only residential property categories
df_residential = df[df['PROPERTY_CATEGORY'].str.startswith("R")].copy()

# --- Repeat the block group summary and quintile analysis for residential only ---

# Filter data for residential (positive income, non-vacant)
gdf_residential_filtered, non_vacant_residential_gdf = filter_data(df_residential)

# Calculate block group summaries (all with positive median_income only, residential only)
census_block_groups_res = calculate_block_group_summary(gdf_residential_filtered)
non_vacant_block_summary_res = calculate_block_group_summary(non_vacant_residential_gdf)

# Create comparison plots (all with positive median_income only, residential only)
plot_comparison(
    census_block_groups_res, non_vacant_block_summary_res, 
    'median_income', 'median_tax_change_pct', 
    'Median Tax Change vs. Median Income (Residential Only)', 
    'Median Income by Census Block Group ($)'
)

plot_comparison(
    census_block_groups_res, non_vacant_block_summary_res,
    'minority_pct', 'median_tax_change_pct',
    'Median Tax Change vs. Minority Percentage (Residential Only)',
    'Minority Population Percentage by Census Block Group'
)

plot_comparison(
    census_block_groups_res, non_vacant_block_summary_res,
    'black_pct', 'median_tax_change_pct',
    'Median Tax Change vs. Black Percentage (Residential Only)',
    'Black Population Percentage by Census Block Group'
)

# Calculate and print correlations (all with positive median_income only, residential only)
correlations_res = calculate_correlations(census_block_groups_res, non_vacant_block_summary_res)
for key, value in correlations_res.items():
    print(f"[Residential] Correlation {key}: {value:.4f}")

# Create and display quintile summaries (income quintiles exclude negative/zero incomes, residential only)
income_quintile_summary_res = create_quintile_summary(gdf_residential_filtered, 'median_income', 'median_income')
non_vacant_income_quintile_summary_res = create_quintile_summary(non_vacant_residential_gdf, 'median_income', 'median_income')
minority_quintile_summary_res = create_quintile_summary(gdf_residential_filtered, 'minority_pct', 'minority_pct')
non_vacant_minority_quintile_summary_res = create_quintile_summary(non_vacant_residential_gdf, 'minority_pct', 'minority_pct')

print("\n[Residential] Tax impact by income quintile (all properties):")
display(income_quintile_summary_res)
print("\n[Residential] Tax impact by income quintile (excluding vacant land):")
display(non_vacant_income_quintile_summary_res)
print("\n[Residential] Tax impact by minority percentage quintile (all properties):")
display(minority_quintile_summary_res)
print("\n[Residential] Tax impact by minority percentage quintile (excluding vacant land):")
display(non_vacant_minority_quintile_summary_res)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set a modern style
sns.set_theme(style="whitegrid", font_scale=1.15)

# Upside Down Bar Graph: Median Tax Change by Neighborhood Median Income Excluding Vacant Land (Residential Only)
fig, ax = plt.subplots(figsize=(10, 6))

vals = non_vacant_income_quintile_summary_res['median_tax_change_pct']
labels = non_vacant_income_quintile_summary_res['median_income_quintile']

# Color mapping: dark green (more negative) to light green (less negative)
colors = sns.color_palette("Greens", n_colors=len(vals))
# Sort so that the most negative (largest magnitude) is darkest
color_map = [colors[i] for i in np.argsort(np.argsort(-vals))]

# To make bars start at the top and go down, invert the y-axis and plot positive heights
bars = ax.bar(
    labels,
    np.abs(vals),
    color=color_map,
    edgecolor='black',
    width=0.7
)

# Invert the y-axis so bars start at the top and go down
ax.invert_yaxis()

# Remove y-axis
ax.yaxis.set_visible(False)
ax.set_ylabel("")
ax.set_xlabel("")
ax.set_title('Median Tax Change by Neighborhood Median Income (Excl. Vacant Land, Residential Only)', weight='bold', pad=30)

# Remove all spines (including bottom)
sns.despine(left=True, right=True, top=True, bottom=True)

# Add value labels (bold, % sign) centered inside each bar (no line below the bar)
for bar, val in zip(bars, vals):
    ax.annotate(
        f"{val:.1f}%",
        xy=(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2),
        xytext=(0, 0),
        textcoords="offset points",
        ha='center', va='center',
        fontsize=13, color='black', fontweight='bold'
    )

# Move x-tick labels to the top
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')
plt.xticks(fontweight='bold')

# Set y-limits to show bars going down from the top
ymax = np.abs(vals).max() * 1.1
ax.set_ylim(ymax, 0)

plt.tight_layout()
plt.show()

# Upside Down Bar Graph: Median Tax Change by Minority Percentage Quintile Excluding Vacant Land (Residential Only)
fig, ax = plt.subplots(figsize=(10, 6))

vals2 = non_vacant_minority_quintile_summary_res['median_tax_change_pct']
labels2 = non_vacant_minority_quintile_summary_res['minority_pct_quintile']

colors2 = sns.color_palette("Greens", n_colors=len(vals2))
color_map2 = [colors2[i] for i in np.argsort(np.argsort(-vals2))]

bars2 = ax.bar(
    labels2,
    np.abs(vals2),
    color=color_map2,
    edgecolor='black',
    width=0.7
)

ax.invert_yaxis()
ax.yaxis.set_visible(False)
ax.set_ylabel("")
ax.set_xlabel("")
ax.set_title('Median Tax Change by Minority Percentage Quintile (Excl. Vacant Land, Residential Only)', weight='bold', pad=30)
sns.despine(left=True, right=True, top=True, bottom=True)

for bar, val in zip(bars2, vals2):
    ax.annotate(
        f"{val:.1f}%",
        xy=(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2),
        xytext=(0, 0),
        textcoords="offset points",
        ha='center', va='center',
        fontsize=13, color='black', fontweight='bold'
    )

ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')
plt.xticks(fontweight='bold')

ymax2 = np.abs(vals2).max() * 1.1
ax.set_ylim(ymax2, 0)

plt.tight_layout()
plt.show()
