In [None]:
import sys
import pandas as pd
import geopandas as gpd
import os
from datetime import datetime

sys.path.append('..')  # Add parent directory to path

# Import from local utility modules
from lvt_utils import ensure_geodataframe
from cloud_utils import get_feature_data_with_geometry

# Set pandas to display all columns
pd.set_option('display.max_columns', None)

# Define Baltimore Realproperty_OB layer endpoint components
dataset_name = "CityView/Realproperty_OB"
base_url = "https://geodata.baltimorecity.gov/egis/rest/services"
layer_id = 0

# Set scrape variable as needed
data_scrape = 1  # set to 0 or 1 as required

save_dir = os.path.join("data", "baltimore")
os.makedirs(save_dir, exist_ok=True)

if data_scrape == 1:
    # Download data with geometry (paginate=True to pull all records)
    baltimore_gdf = get_feature_data_with_geometry(dataset_name, base_url, layer_id, paginate=True)
    # Make sure it's a GeoDataFrame
    baltimore_gdf = ensure_geodataframe(baltimore_gdf)
    # Compose filename with date
    today_str = datetime.now().strftime("%Y%m%d")
    fname = f"baltimore_{today_str}.gpq"
    fpath = os.path.join(save_dir, fname)
    baltimore_gdf.to_parquet(fpath)
    display(baltimore_gdf.head())
else:
    # Find most recent geoparquet in data/baltimore
    try:
        files = [f for f in os.listdir(save_dir) if f.lower().endswith(".gpq")]
        if not files:
            raise FileNotFoundError("No baltimore geoparquet scrapes found in data/baltimore/")

        # Get the latest by date in filename
        files_with_dates = []
        for fname in files:
            try:
                date_str = fname.split("_")[1].split(".")[0]
                dt = datetime.strptime(date_str, "%Y%m%d")
                files_with_dates.append((dt, fname))
            except Exception:
                continue
        if not files_with_dates:
            raise FileNotFoundError("No valid baltimore geoparquet scrapes found in data/baltimore/")
        latest_fname = max(files_with_dates, key=lambda x: x[0])[1]
        fpath = os.path.join(save_dir, latest_fname)
        baltimore_gdf = gpd.read_parquet(fpath)
        display(baltimore_gdf.head())
    except Exception as e:
        raise RuntimeError(f"Failed to find any previous scraped data: {e}")


In [None]:
baltimore_gdf['CURRFMV'] = baltimore_gdf['CURRLAND'] + baltimore_gdf['CURRIMPR']
baltimore_gdf['EXMPFMV'] = baltimore_gdf['EXMPLAND'] + baltimore_gdf['EXMPIMPR']
baltimore_gdf['BFCVFMV'] = baltimore_gdf['BFCVLAND'] + baltimore_gdf['BFCVIMPR']

In [None]:
# Import all required modules and functions
from cloud_utils import get_feature_data, get_feature_data_with_geometry
from lvt_utils import (model_split_rate_tax, calculate_current_tax, model_full_building_abatement, 
                       model_stacking_improvement_exemption, categorize_property_type, extract_date_from_filename)
from census_utils import (get_census_data, get_census_blockgroups_shapefile, get_census_data_with_boundaries, 
                          match_to_census_blockgroups, calculate_median_percentage_by_quintile, 
                          match_parcels_to_demographics, create_demographic_summary)
from viz import (create_scatter_plot, plot_comparison, calculate_correlations, weighted_median, 
                 create_quintile_summary, plot_quintile_analysis, create_property_category_chart, 
                 create_map_visualization, calculate_block_group_summary, filter_data_for_analysis)

scrape_data = 0

## Step 1: Getting the Data


Charge data comes from:  https://gisdatacatalog-spokanecounty.opendata.arcgis.com/pages/parcel-data-file-downloads


In [None]:

baltimore_gdf['full_exmp'] = (baltimore_gdf['EXMPFMV'] >= baltimore_gdf['BFCVFMV']).astype(int)


In [None]:
print("Counts of exmp_code in parcel_civic_df:")
print(baltimore_gdf['EXMPCODE'].value_counts(dropna=False).to_string())


In [None]:
# What percent of parcels does BFCVLAND and BFCVIMPR add to ARTAXBAS?

# Avoid division by zero and missing values
valid = baltimore_gdf['ARTAXBAS'] != 0
valid = valid & baltimore_gdf['ARTAXBAS'].notnull() & baltimore_gdf['BFCVLAND'].notnull() & baltimore_gdf['BFCVIMPR'].notnull()

bfcv_land_impr_sum = baltimore_gdf.loc[valid, 'BFCVLAND'] + baltimore_gdf.loc[valid, 'BFCVIMPR']
artaxbas = baltimore_gdf.loc[valid, 'ARTAXBAS']

percent = (bfcv_land_impr_sum / artaxbas) * 100
 
# Calculate and print the precise percent (as a count) where sum equals ARTAXBAS exactly
matches = (bfcv_land_impr_sum == artaxbas)
percent_exact = matches.sum() / valid.sum() * 100
print(f"Percent of parcels where BFCVLAND + BFCVIMPR adds to exactly ARTAXBAS: {percent_exact:.2f}%")




In [None]:
# Calculate the division and scaling
baltimore_gdf['citytax_per_arbase'] = (baltimore_gdf['CITY_TAX'] / baltimore_gdf['ARTAXBAS']) * 1000

# Drop inf, -inf for descriptive stats and suppress SettingWithCopyWarning for summary only
stats_series = baltimore_gdf['citytax_per_arbase'].replace([float('inf'), float('-inf')], pd.NA).dropna()

summary = stats_series.describe(percentiles=[0.25, 0.5, 0.75])
print("CITY_TAX / ARTAXBASE * 1000 summary stats:")
print(f"min:    {summary['min']}")
print(f"25%:    {summary['25%']}")
print(f"median: {summary['50%']}")
print(f"75%:    {summary['75%']}")
print(f"max:    {summary['max']}")


## Step 3: Recreating Current Property Tax Revenue

Before we can model an LVT shift, we need to accurately recreate the current property tax system. This validation step ensures our dataset correctly reflects the real-world tax landscape.


In [None]:
# Calculate how often CURRFMV - EXMPFMV equals BFCVFMV

# Create boolean Series for where CURRFMV - EXMPFMV == BFCVFMV
equal_fmv = baltimore_gdf["CURRFMV"] - baltimore_gdf["EXMPFMV"] == baltimore_gdf["BFCVFMV"]

num_matches = equal_fmv.sum()
total = equal_fmv.count()  # Handles any missing

percent_matches = (num_matches / total) * 100 if total else 0

print(f"Number where CURRFMV - EXMPFMV == BFCVFMV: {num_matches} out of {total} ({percent_matches:.2f}%)")


In [None]:
# --- Calculate current tax for total_spokane_boundaries_millage and save as current_tax ---
baltimore_gdf['millage_rate'] = 22.48
print(f"Median millage_rate: {baltimore_gdf['millage_rate'].median()}")
# Filter out rows where full_exmp is True (not 0/False/NaN)
num_full_exmp = baltimore_gdf['full_exmp'].sum()  # assumes full_exmp is 1 for True/exempt, 0 for not
print(f"Number of properties with full_exmp == True: {int(num_full_exmp)}")
baltimore_gdf = baltimore_gdf[baltimore_gdf['full_exmp'] != 1]

# Calculate current revenue for the overall millage using calculate_current_tax
# This will also save the per-row current_tax to baltimore_gdf['current_tax']
current_revenue, second_revenue, baltimore_gdf = calculate_current_tax(
    df=baltimore_gdf, 
    tax_value_col='CURRFMV',
    millage_rate_col='millage_rate',
    exemption_col='EXMPFMV',
    exemption_flag_col='full_exmp'
)

print(f"Total number of properties: {len(baltimore_gdf):,}")
print(f"Current annual revenue with millage rate: ${current_revenue:,.2f}")
print(f"Total land value: ${baltimore_gdf['BFCVLAND'].sum():,.2f}")
print(f"Total overall value: ${baltimore_gdf['BFCVFMV'].sum():,.2f}")
print(f"Total taxable value: ${(baltimore_gdf['BFCVFMV'] ).sum():,.2f}")

total_assessed_minus_exempt = (baltimore_gdf['BFCVFMV'] - baltimore_gdf['EXMPFMV']).sum()
print("Sum of BFCVFMV minus EXMPFMV:", total_assessed_minus_exempt)

# Calculate millage_rate as (current_revenue / total_assessed_minus_exempt) * 1000
millage_rate = (current_revenue / total_assessed_minus_exempt) * 1000
print("Calculated millage_rate:", millage_rate)


In [None]:
def categorize_property_type(prop_use_desc):
    # Direct mapping based on the property use descriptions from the data
    category_mapping = {
        "Single Family": ["Single Unit"],
        "Small Multi-Family (2-4 units)": ["Two-to-Four Unit"],
        "Large Multi-Family (5+ units)": ["Five-Plus Unit"],
        "Other Residential": ["Other Residential", "Vacation Home"],
        "Mobile Home Park": ["Mobile Home Park"],
        "Vacant Land": ["Vacant Land"],
        "Agricultural": ["Cur - Use - Ag", "Agricultural Not Classified", "Agricultural"],
        "Retail/Service/Commercial": [
            "Retail - General Mrchds", "Retail - Other", "Retail - Hardware", "Retail - Food", "Retail - Eating",
            "Retail - Auto", "Retail - Furniture", "Service - Finance", "Service - Professional", "Service - Repair",
            "Service - Education", "Service - Governmental", "Service - Construction", "Service - Personal",
            "Service - Business", "Wholesale", "Hotel/Motel", "Hotel/Condo", "Inst Lodging", "Recreational",
            "Resort - Camping", "Public Assembly", "Churches", "Park", "Other Cultural"
        ],
        "Manufacturing/Industrial": [
            "Manf - Other", "Manf - Fabricated Material", "Manf - Petroleum", "Manf - Printed Material",
            "Manf - Stone/Glass", "Manf - Printing", "Manf - Instrumentation", "Manf - Leather", "Manf - Paper",
            "Manufacturing - Food", "Manufacturing - Lumber", "Mining", "Utilities", "Communication"
        ],
        "Transportation - Parking": ["Trans - Parking"],
        "Transportation/Other": [
            "Trans - Highway", "Trans - Railroad", "Trans - Aircraft", "Trans - Motor", "Trans - Other"
        ],
        "Designated Forest": ["Designated Forest Lnd"],
        "Water Areas": ["Water Area"],
        "Marijuana": ["Marijuana Growing"],
        "Current Use Open": ["Cur - Use - Open"]
    }

    # Check for exact matches first
    for category, keywords in category_mapping.items():
        if prop_use_desc in keywords:
            return category

    # If no match found, return "Other"
    return "Other"

# Apply the function to the DataFrame
baltimore_gdf['PROPERTY_CATEGORY'] = baltimore_gdf['ZONECODE']

## Step 4: Modeling the Split-Rate Land Value Tax

Now for the exciting part - modeling the LVT shift! We'll create a revenue-neutral policy that taxes land at 4 times the rate of buildings.



In [None]:
# Run the split-rate land value tax model at a 2:1 land:improvement ratio (revenue-neutral)

# Set the ratio of land millage to improvement millage
land_improvement_ratio = 2  # Land is taxed at 2x the rate of improvements

# Run the split-rate calculation using model_split_rate from lvt_utils.py
from lvt_utils import model_split_rate_tax

land_millage, improvement_millage, split_rate_revenue, baltimore_gdf = model_split_rate_tax(
    df=baltimore_gdf,
    land_value_col="CURRLAND",
    improvement_value_col="CURRIMPR",
    current_revenue=current_revenue,
    land_improvement_ratio=land_improvement_ratio,
    exemption_col="EXMPFMV",
    exemption_flag_col="full_exmp",
    percentage_cap_col=None  # set to a column name for a cap if desired
)


split_rate_revenue = baltimore_gdf['new_tax'].sum()

print(f"Total split-rate tax revenue (2:1 ratio): ${split_rate_revenue:,.2f}")


In [None]:
# Calculate and report the sum of the absolute difference between current_tax and new_tax,
# and what percent of the sum of current_tax that represents.

# Calculate absolute difference per parcel
baltimore_gdf['abs_tax_diff'] = (baltimore_gdf['current_tax'] - baltimore_gdf['new_tax']).abs()

# Sum absolute differences
total_abs_tax_diff = baltimore_gdf['abs_tax_diff'].sum()

# Calculate what percent of total current tax that represents
percent_of_current = (total_abs_tax_diff / current_revenue) * 100 if current_revenue != 0 else np.nan

print(f"Sum of absolute value of current_tax minus new_tax: ${total_abs_tax_diff:,.2f}")
print(f"That is {percent_of_current:.2f}% of the sum of current_tax.")


In [None]:

# Before summary: set PROPERTY_CATEGORY to 'vacant' where CURRLAND is zero
if "CURRLAND" in baltimore_gdf.columns and "PROPERTY_CATEGORY" in baltimore_gdf.columns:
    baltimore_gdf.loc[baltimore_gdf["CURRIMPR"] == 0, "PROPERTY_CATEGORY"] = "Vacant Land"

# Calculate and print the summary table for total tax impact (using lvt_utils)
from lvt_utils import calculate_category_tax_summary, print_category_tax_summary

# This will use the default 'PROPERTY_CATEGORY' column if present
output_summary = calculate_category_tax_summary(
    baltimore_gdf,
    category_col='PROPERTY_CATEGORY' if 'PROPERTY_CATEGORY' in baltimore_gdf.columns else baltimore_gdf.columns[0],  # fallback to first col if not present
    current_tax_col='current_tax',
    new_tax_col='new_tax'
)
print_category_tax_summary(output_summary, "Total Tax Impact by Property Category (All sp_ Levies)")



In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Only include categories with property_count > 50
filtered = output_summary[output_summary['property_count'] > 50].copy()

categories = filtered['PROPERTY_CATEGORY'].tolist()
counts = filtered['property_count'].tolist()
median_pct_change = filtered['median_tax_change_pct'].tolist()
median_dollar_change = filtered['median_tax_change'].tolist()
total_tax_change = filtered['total_tax_change'].tolist() if 'total_tax_change' in filtered.columns else (filtered['mean_tax_change'] * filtered['property_count']).tolist()

# Sort by median_pct_change ascending
sorted_idx = np.argsort(median_pct_change)
categories = [categories[i] for i in sorted_idx]
counts = [counts[i] for i in sorted_idx]
median_pct_change = [median_pct_change[i] for i in sorted_idx]
median_dollar_change = [median_dollar_change[i] for i in sorted_idx]
total_tax_change = [total_tax_change[i] for i in sorted_idx]

# Custom color: anything above 0 is dark red, below 0 is green
bar_colors = []
for val in median_pct_change:
    if val > 0:
        bar_colors.append("#8B0000")  # dark red
    else:
        bar_colors.append("#228B22")  # professional green

# Bar settings
bar_height = 0.75
fig_height = len(categories) * 0.8 + 1.2
right_col_pad = 120  # more padding for right column
fig, ax = plt.subplots(figsize=(17, fig_height))  # wider for right column

y = np.arange(len(categories))

# Draw bars
ax.barh(
    y, median_pct_change, color=bar_colors, edgecolor='none',
    height=bar_height, alpha=0.92, linewidth=0, zorder=2
)

# Remove all spines and ticks for a clean look
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# Adjusted vertical spacing
cat_offset = 0.18   # less space between category and median
med_offset = -0.03  # median just below category
count_offset = -0.23  # more space below median for parcels

# For right column: position for total tax change
max_abs = max(abs(min(median_pct_change)), abs(max(median_pct_change)))
right_col_x = max_abs + right_col_pad

# Add Net Change header at the top of the right column
ax.text(
    right_col_x, len(categories) - 0.5, "Net Change", va='bottom', ha='left',
    fontsize=15, fontweight='bold', color='black', fontname='Arial'
)

for i, (cat, val, count, med_dol, tot_change) in enumerate(zip(categories, median_pct_change, counts, median_dollar_change, total_tax_change)):
    # Format median dollar and percent change together
    if med_dol >= 0:
        med_dol_str = f"${med_dol:,.0f}"
    else:
        med_dol_str = f"-${abs(med_dol):,.0f}"
    pct_str = f"{val:+.1f}%"
    median_combo = f"Median: {med_dol_str}, {pct_str}"

    # Position: right of bar for positive, left for negative
    if val < 0:
        xpos = val - 2.5
        ha = 'right'
    else:
        xpos = val + 2.5
        ha = 'left'
    # Category name (bold, bigger)
    ax.text(
        xpos, y[i]+cat_offset, cat, va='center', ha=ha,
        fontsize=14, fontweight='bold', color='#222',
        fontname='Arial'
    )
    # Median (dollar + percent, bold, black, just below category)
    ax.text(
        xpos, y[i]+med_offset, median_combo, va='center', ha=ha,
        fontsize=12, fontweight='bold', color='black',
        fontname='Arial'
    )
    # Count (bold, smaller, below median)
    ax.text(
        xpos, y[i]+count_offset, f"{count:,} parcels", va='center', ha=ha,
        fontsize=11, fontweight='bold', color='#888',
        fontname='Arial'
    )
    # Net change column, always right-aligned in a new column, black text, no "Total:"
    if tot_change >= 0:
        tot_change_str = f"${tot_change:,.0f}"
    else:
        tot_change_str = f"-${abs(tot_change):,.0f}"
    ax.text(
        right_col_x, y[i], tot_change_str, va='center', ha='left',
        fontsize=13, fontweight='bold', color='black',
        fontname='Arial'
    )

# Set x limits for symmetry, make bars longer, and leave space for right column
ax.set_xlim(-right_col_x, right_col_x + 60)

# Remove axis labels/ticks
ax.set_yticks([])
ax.set_xticks([])

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Use output_summary to generate categories and percent increase/decrease, filtering to count > 50

# Filter to property_count > 50
summary_filtered = output_summary[output_summary['property_count'] > 50].copy()

# Sort by pct_increase_gt_threshold ascending (smallest percent increase first)
summary_sorted = summary_filtered.sort_values('pct_increase_gt_threshold', ascending=True)

categories_sorted = summary_sorted['PROPERTY_CATEGORY'].tolist()
pct_increase_sorted = summary_sorted['pct_increase_gt_threshold'].tolist()
pct_decrease_sorted = summary_sorted['pct_decrease_gt_threshold'].tolist()

# Convert to integers for display
pct_increase_int_sorted = [int(round(x)) for x in pct_increase_sorted]
pct_decrease_int_sorted = [int(round(x)) for x in pct_decrease_sorted]

y = np.arange(len(categories_sorted))

fig, ax = plt.subplots(figsize=(8, 6))

# Use specified colors
color_increase = "#8B0000"  # dark red
color_decrease = "#228B22"  # professional green

# Plot left (decrease) bars (green, to the left)
ax.barh(
    y, 
    [-v for v in pct_decrease_sorted], 
    color=color_decrease, 
    edgecolor='none', 
    height=0.7
)

# Plot right (increase) bars (red, to the right)
ax.barh(
    y, 
    pct_increase_sorted, 
    color=color_increase, 
    edgecolor='none', 
    height=0.7
)

# Add percent labels (integer, no decimals), smaller Arial font
for i, (inc, dec) in enumerate(zip(pct_increase_int_sorted, pct_decrease_int_sorted)):
    # Left side (decrease)
    if dec > 0:
        ax.text(
            -dec - 2, y[i], f"{dec}%", 
            va='center', ha='right', 
            fontsize=8, fontweight='normal', color=color_decrease, fontname='Arial'
        )
    # Right side (increase)
    if inc > 0:
        ax.text(
            inc + 2, y[i], f"{inc}%", 
            va='center', ha='left', 
            fontsize=8, fontweight='normal', color=color_increase, fontname='Arial'
        )

# Add category name at end of right bar, bold, smaller Arial, further from percent
for i, (cat, inc) in enumerate(zip(categories_sorted, pct_increase_sorted)):
    xpos = inc + 18 if inc > 0 else 18
    ax.text(
        xpos, y[i], cat, 
        va='center', ha='left', 
        fontsize=9, fontweight='bold', color='#222', fontname='Arial'
    )

# Remove all spines, ticks, and axis lines for minimalist look
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# Remove grid, axis, and titles
ax.set_yticks([])
ax.set_xticks([])
ax.set_ylabel('')
ax.set_xlabel('')
ax.set_title('')

# Set xlim for symmetry
max_val = max(max(pct_increase_sorted), max(pct_decrease_sorted))
ax.set_xlim(-max_val-20, max_val+48)

# --- Add custom titles above left and right bars ---
# Make the titles a little bit bigger and closer to the center
title_fontsize = 10  # increased from 8
title_color = 'black'
title_fontweight = 'normal'
title_fontname = 'Arial'

# Compute center x for both titles, but offset slightly left/right of center
title_y = len(categories_sorted) - 0.2

# Left title (above left bars), closer to center
left_title_x = -max_val * 0.45
ax.text(
    left_title_x, title_y, 
    "Percent of parcels\ndecreasing >10%", 
    ha='center', va='bottom', fontsize=title_fontsize, fontweight=title_fontweight, 
    color=title_color, fontname=title_fontname, 
    bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.15')
)

# Right title (above right bars), closer to center
right_title_x = max_val * 0.45
ax.text(
    right_title_x, title_y, 
    "Percent of parcels\nincreasing >10%", 
    ha='center', va='bottom', fontsize=title_fontsize, fontweight=title_fontweight, 
    color=title_color, fontname=title_fontname, 
    bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.15')
)

plt.tight_layout()
plt.show()


## Step 6: Adding Geographic Context

To make our analysis spatially-aware, we need to add geographic boundaries to our parcel data. This enables us to:

- **Create maps** showing tax changes across the city
- **Analyze patterns by neighborhood** or district  
- **Combine with demographic data** for equity analysis
- **Present results visually** to stakeholders

We'll fetch the parcel boundary data from the same ArcGIS service that contains the geometric information for each property.


### Merging Tax Analysis with Geographic Data

Here we combine our tax analysis results with the geographic boundaries. This creates a spatially-enabled dataset that allows us to:

1. **Map tax changes** across South Bend
2. **Identify spatial patterns** in tax impacts
3. **Prepare for demographic analysis** by having geographic context

The merge should give us the same number of records as our original analysis, now with geographic coordinates for each parcel.


In [None]:
import matplotlib.pyplot as plt
import geopandas as gpd

# Calculate the total bounds of baltimore_gdf: (minx, miny, maxx, maxy)
minx, miny, maxx, maxy = baltimore_gdf.total_bounds

# Create a rectangular polygon for the bounds for visualization
from shapely.geometry import box
bounds_gdf = gpd.GeoDataFrame(
    {"geometry": [box(minx, miny, maxx, maxy)]},
    crs=baltimore_gdf.crs
)

# Plot the Baltimore boundary on a world map using OpenStreetMap (OSM) as a backdrop.
try:
    import contextily as ctx

    # Project the bounds to Web Mercator for OSM compatibility
    bounds_webm = bounds_gdf.to_crs(epsg=4326)
    # Also plot a world background for extra context (we use a wide extent)
    fig, ax = plt.subplots(figsize=(8, 8))

    # Plot OSM basemap for the world region
    dummy_world_poly = gpd.GeoDataFrame(
        geometry=[box(-20026376, -20048966, 20026376, 20048966)],  # Full Mercator extent
        crs="EPSG:3857"
    )
    dummy_world_poly.boundary.plot(ax=ax, color="lightgray", linewidth=0.3, zorder=1)

    # Add the bounding box of Baltimore on top
    bounds_webm.boundary.plot(ax=ax, color="red", linewidth=2, zorder=10)
    
    # autoscale to include some margin outside the bounding box
    expand = 1.5
    xmid = (minx + maxx) / 2
    ymid = (miny + maxy) / 2
    width = (maxx - minx) * expand
    height = (maxy - miny) * expand

    bounds = box(
        xmid - width / 2,
        ymid - height / 2,
        xmid + width / 2,
        ymid + height / 2
    )
    bounds_webm_margin = gpd.GeoDataFrame({'geometry': [bounds]}, crs=baltimore_gdf.crs).to_crs(epsg=3857)

    # Set plot extent so you can see the region in context
    bl, tr = bounds_webm_margin.total_bounds.reshape(2,2)
    ax.set_xlim(bl[0], tr[0])
    ax.set_ylim(bl[1], tr[1])

    # Add the OSM basemap for real-world context!
    ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, alpha=1)
    ax.set_title("Baltimore Bounding Box on OpenStreetMap (global context!)")
    plt.xlabel("")
    plt.ylabel("")
except ImportError:
    ax = bounds_gdf.plot(
        facecolor='none', edgecolor='red', linewidth=2, figsize=(8, 8)
    )
    ax.set_title("Baltimore Parcel Data: Bounding Box (no OSM)")
plt.show()

# Print the geographic coordinates (bounding box) of baltimore_gdf
print("Baltimore parcel data geographic bounds (minx, miny, maxx, maxy):")
print(baltimore_gdf.total_bounds)

# Print a sample of coordinates from all parcels (e.g., the centroid of each geometry)
centroids = baltimore_gdf.geometry.centroid
print("Sample parcel centroids (longitude, latitude):")
print(centroids.head())


In [None]:
# Get census data for Spokane County (FIPS code: 53063)
census_data, census_boundaries = get_census_data_with_boundaries(
    fips_code='24510',  # Maryland (24) + Baltimore City (510)
    year=2022
)
# Set CRS for census boundaries before merging
census_boundaries = census_boundaries.set_crs(epsg=4326)  # Assuming WGS84 coordinate system
boundary_gdf = baltimore_gdf.to_crs(epsg=4326)
  # Set same CRS for boundary data

# Merge census data with our parcel boundaries
df = match_to_census_blockgroups(
    gdf=boundary_gdf,
    census_gdf=census_boundaries,
    join_type="left"
)

print(f"Number of census blocks: {len(census_boundaries)}")
print(f"Number of census data: {len(census_data)}")
print(f"Number of parcels with census data: {len(df)}")

## Step 7: Demographic and Equity Analysis

One of the most important aspects of LVT analysis is understanding the **equity implications** - how does the tax shift affect different income levels and demographic groups?

### Adding Census Data

We'll match each property to its Census Block Group and pull demographic data including:
- **Median household income** 
- **Racial/ethnic composition**
- **Population characteristics**

### Why This Matters

Policy makers need to understand:
- Does the LVT shift disproportionately burden low-income neighborhoods?
- Are there racial equity implications?  
- Does the policy align with broader equity goals?

**Note**: You'll need a Census API key for this section. Get one free at: https://api.census.gov/data/key_signup.html


In [None]:
print("DataFrame columns:")
print(df.columns.tolist())


### Exploring the Enhanced Dataset

With census data merged in, our dataset now contains both property tax information and demographic context. Let's explore what variables we now have available for analysis.

This enhanced dataset allows us to examine relationships between:
- Property characteristics and demographics
- Tax impacts and neighborhood income levels
- Geographic patterns in tax burden shifts


In [None]:
# Display all columns with maximum width
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
display(df.head())


### Viewing the Complete Dataset

Let's examine our enhanced dataset with all the variables we've created and merged. This gives us a comprehensive view of each property with:

- **Property characteristics** (type, value, location)
- **Current tax calculations** 
- **New LVT calculations**
- **Tax change impacts**
- **Demographic context** (income, race/ethnicity)

This rich dataset forms the foundation for sophisticated equity and impact analysis.


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def filter_data(df):
    """Filter data to remove negative or zero median incomes and create non-vacant subset (also with positive income)"""
    df_filtered = df[df['median_income'] > 0].copy()
    non_vacant_df = df[(df['PROPERTY_CATEGORY'] != 'Vacant Land') & (df['median_income'] > 0)].copy()
    return df_filtered, non_vacant_df

def calculate_block_group_summary(df):
    """Calculate summary statistics for census block groups, excluding negative/zero median incomes"""
    # Only include block groups with positive median income
    df = df[df['median_income'] > 0].copy()
    summary = df.groupby('std_geoid').agg(
        median_income=('median_income', 'first'),
        minority_pct=('minority_pct', 'first'),
        black_pct=('black_pct', 'first'),
        total_current_tax=('current_tax', 'sum'),
        total_new_tax=('new_tax', 'sum'),
        mean_tax_change=('tax_change', 'mean'),
        median_tax_change=('tax_change', 'median'),
        median_tax_change_pct=('tax_change_pct', 'median'),
        parcel_count=('tax_change', 'count'),
        has_vacant_land=('PROPERTY_CATEGORY', lambda x: 'Vacant Land' in x.values)
    ).reset_index()
    # Exclude block groups with non-positive median income (shouldn't be needed, but for safety)
    summary = summary[summary['median_income'] > 0].copy()
    summary['mean_tax_change_pct'] = ((summary['total_new_tax'] - summary['total_current_tax']) / 
                                    summary['total_current_tax']) * 100
    return summary

def create_scatter_plot(data, x_col, y_col, ax, title, xlabel, ylabel):
    """Create a scatter plot with trend line, excluding negative/zero incomes"""
    # Exclude rows with non-positive x_col (e.g., median_income)
    data = data[data[x_col] > 0].copy()
    sns.scatterplot(
        data=data,
        x=x_col,
        y=y_col,
        size='parcel_count',
        sizes=(20, 200),
        alpha=0.7,
        ax=ax
    )
    
    ax.axhline(y=0, color='r', linestyle='--')
    
    x = data[x_col].dropna()
    y = data[y_col].dropna()
    mask = ~np.isnan(x) & ~np.isnan(y)
    
    if len(x[mask]) > 1:
        z = np.polyfit(x[mask], y[mask], 1)
        p = np.poly1d(z)
        ax.plot(x[mask], p(x[mask]), "r--")
    
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)

def plot_comparison(data1, data2, x_col, y_col, title_prefix, xlabel):
    """Create side-by-side comparison plots, excluding negative/zero incomes"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8))
    
    create_scatter_plot(data1, x_col, y_col, ax1, 
                       f'{title_prefix} - All Properties', xlabel, 'Mean Tax Change (%)')
    create_scatter_plot(data2, x_col, y_col, ax2,
                       f'{title_prefix} - Excluding Vacant Land', xlabel, 'Mean Tax Change (%)')
    
    plt.tight_layout()
    plt.show()

def calculate_correlations(data1, data2):
    """Calculate correlations between variables, excluding negative/zero incomes"""
    correlations = {}
    for df, suffix in [(data1, 'all'), (data2, 'non_vacant')]:
        # Exclude rows with non-positive median_income for correlation
        df_corr = df[df['median_income'] > 0].copy()
        correlations[f'income_mean_{suffix}'] = df_corr[['median_income', 'mean_tax_change_pct']].corr().iloc[0, 1]
        correlations[f'income_median_{suffix}'] = df_corr[['median_income', 'median_tax_change_pct']].corr().iloc[0, 1]
        correlations[f'minority_mean_{suffix}'] = df_corr[['minority_pct', 'mean_tax_change_pct']].corr().iloc[0, 1]
        correlations[f'black_mean_{suffix}'] = df_corr[['black_pct', 'mean_tax_change_pct']].corr().iloc[0, 1]
    return correlations

def weighted_median(values, weights):
    """Compute the weighted median of values with corresponding weights."""
    # Remove NaNs
    mask = (~np.isnan(values)) & (~np.isnan(weights))
    values = np.array(values)[mask]
    weights = np.array(weights)[mask]
    if len(values) == 0:
        return np.nan
    sorter = np.argsort(values)
    values = values[sorter]
    weights = weights[sorter]
    cumsum = np.cumsum(weights)
    cutoff = weights.sum() / 2.0
    return values[np.searchsorted(cumsum, cutoff)]

def create_quintile_summary(df, group_col, value_col):
    """Create summary statistics by quintiles, using mean/weighted-median tax change percent, excluding negative/zero incomes for income-based quintiles"""
    # If grouping by income, exclude non-positive values
    if group_col == 'median_income':
        df = df[df['median_income'] > 0].copy()
    df[f'{group_col}_quintile'] = pd.qcut(df[group_col], 5, 
                                         labels=["Q1 (Lowest)", "Q2", "Q3", "Q4", "Q5 (Highest)"])
    
    def weighted_median_tax_change_pct(subdf):
        # Use parcel_count as weights if available, else weight each row equally
        if 'parcel_count' in subdf.columns:
            weights = subdf['parcel_count']
        else:
            weights = np.ones(len(subdf))
        return weighted_median(subdf['tax_change_pct'], weights)
    
    # For this context, each row is a parcel, so weight by 1 (or by parcel_count if already aggregated)
    summary = df.groupby(f'{group_col}_quintile').apply(
        lambda g: pd.Series({
            'count': g['tax_change'].count(),
            'mean_tax_change_pct': g['tax_change_pct'].mean(),
            'median_tax_change_pct': weighted_median(g['tax_change_pct'], np.ones(len(g))),
            'mean_value': g[value_col].mean()
        })
    ).reset_index()
    
    return summary

# Main execution
gdf_filtered, non_vacant_gdf = filter_data(df)
print(f"Number of rows in gdf_filtered: {len(gdf_filtered)}")
print(f"Number of rows in non_vacant_gdf: {len(non_vacant_gdf)}")

# Calculate block group summaries (all with positive median_income only)
census_block_groups = calculate_block_group_summary(gdf_filtered)
non_vacant_block_summary = calculate_block_group_summary(non_vacant_gdf)

# Create comparison plots (all with positive median_income only)
plot_comparison(census_block_groups, non_vacant_block_summary, 
               'median_income', 'mean_tax_change_pct', 
               'Mean Tax Change vs. Median Income', 
               'Median Income by Census Block Group ($)')

plot_comparison(census_block_groups, non_vacant_block_summary,
               'minority_pct', 'mean_tax_change_pct',
               'Mean Tax Change vs. Minority Percentage',
               'Minority Population Percentage by Census Block Group')

plot_comparison(census_block_groups, non_vacant_block_summary,
               'black_pct', 'mean_tax_change_pct',
               'Mean Tax Change vs. Black Percentage',
               'Black Population Percentage by Census Block Group')

# Calculate and print correlations (all with positive median_income only)
correlations = calculate_correlations(census_block_groups, non_vacant_block_summary)
for key, value in correlations.items():
    print(f"Correlation {key}: {value:.4f}")

# Create and display quintile summaries (income quintiles exclude negative/zero incomes)
income_quintile_summary = create_quintile_summary(gdf_filtered, 'median_income', 'median_income')
non_vacant_income_quintile_summary = create_quintile_summary(non_vacant_gdf, 'median_income', 'median_income')
minority_quintile_summary = create_quintile_summary(gdf_filtered, 'minority_pct', 'minority_pct')
non_vacant_minority_quintile_summary = create_quintile_summary(non_vacant_gdf, 'minority_pct', 'minority_pct')

print("\nTax impact by income quintile (all properties):")
display(income_quintile_summary)
print("\nTax impact by income quintile (excluding vacant land):")
display(non_vacant_income_quintile_summary)
print("\nTax impact by minority percentage quintile (all properties):")
display(minority_quintile_summary)
print("\nTax impact by minority percentage quintile (excluding vacant land):")
display(non_vacant_minority_quintile_summary)


In [None]:
# Plot 1: Median Income Quintiles vs. Mean Tax Change Percent (Census Block Groups)

plt.figure(figsize=(10, 6))
plt.plot(
    income_quintile_summary['median_income_quintile'],
    income_quintile_summary['mean_tax_change_pct'],
    marker='o',
    label='All Properties'
)
plt.plot(
    non_vacant_income_quintile_summary['median_income_quintile'],
    non_vacant_income_quintile_summary['mean_tax_change_pct'],
    marker='o',
    label='Excluding Vacant Land'
)
plt.xlabel('Median Income Quintile')
plt.ylabel('Mean Tax Change ($)')
plt.title('Mean Tax Change by Median Income Quintile (Census Block Groups)')
plt.legend()
# Remove grid
# Ensure x-axis at y=0 if negative values present
ymin = min(
    income_quintile_summary['mean_tax_change_pct'].min(),
    non_vacant_income_quintile_summary['mean_tax_change_pct'].min()
)
ymax = max(
    income_quintile_summary['mean_tax_change_pct'].max(),
    non_vacant_income_quintile_summary['mean_tax_change_pct'].max()
)
if ymin < 0 < ymax:
    plt.axhline(0, color='black', linewidth=1, linestyle='dotted')
plt.tight_layout()
plt.show()

# Plot 2: Minority Percentage Quintiles vs. Mean Tax Change Percent (Census Block Groups)

plt.figure(figsize=(10, 6))
plt.plot(
    minority_quintile_summary['minority_pct_quintile'],
    minority_quintile_summary['mean_tax_change_pct'],
    marker='o',
    label='All Properties'
)
plt.plot(
    non_vacant_minority_quintile_summary['minority_pct_quintile'],
    non_vacant_minority_quintile_summary['mean_tax_change_pct'],
    marker='o',
    label='Excluding Vacant Land'
)
plt.xlabel('Minority Percentage Quintile')
plt.ylabel('Mean Tax Change ($)')
plt.title('Mean Tax Change by Minority Percentage Quintile (Census Block Groups)')
plt.legend()
# Remove grid
# Ensure x-axis at y=0 if negative values present
ymin2 = min(
    minority_quintile_summary['mean_tax_change_pct'].min(),
    non_vacant_minority_quintile_summary['mean_tax_change_pct'].min()
)
ymax2 = max(
    minority_quintile_summary['mean_tax_change_pct'].max(),
    non_vacant_minority_quintile_summary['mean_tax_change_pct'].max()
)
if ymin2 < 0 < ymax2:
    plt.axhline(0, color='black', linewidth=1, linestyle='dotted')
plt.tight_layout()
plt.show()


In [None]:
# Plot: Median Tax Change by Neighborhood Median Income Excluding Vacant Land

plt.figure(figsize=(10, 6))
plt.plot(
    non_vacant_income_quintile_summary['median_income_quintile'],
    non_vacant_income_quintile_summary['median_tax_change_pct'],
    marker='o',
    label='Excluding Vacant Land'
)
plt.xlabel('Median Income Quintile')
plt.ylabel('Median Tax Change ($)')
plt.title('Median Tax Change by Neighborhood Median Income Excluding Vacant Land')
ymin = non_vacant_income_quintile_summary['median_tax_change_pct'].min()
ymax = non_vacant_income_quintile_summary['median_tax_change_pct'].max()
# Ensure 0 is included on the y-axis
plt.ylim(min(ymin, 0), max(ymax, 0) if ymax < 0 else max(ymax, 0, 1.05*ymax))
plt.axhline(0, color='black', linewidth=1, linestyle='dotted')
plt.tight_layout()
plt.show()

# Plot: Median Tax Change by Minority Percentage Quintile Excluding Vacant Land

plt.figure(figsize=(10, 6))
plt.plot(
    non_vacant_minority_quintile_summary['minority_pct_quintile'],
    non_vacant_minority_quintile_summary['median_tax_change_pct'],
    marker='o',
    label='Excluding Vacant Land'
)
plt.xlabel('Minority Percentage Quintile')
plt.ylabel('Median Tax Change ($)')
plt.title('Median Tax Change by Minority Percentage Quintile Excluding Vacant Land')
ymin2 = non_vacant_minority_quintile_summary['median_tax_change_pct'].min()
ymax2 = non_vacant_minority_quintile_summary['median_tax_change_pct'].max()
# Ensure 0 is included on the y-axis
plt.ylim(min(ymin2, 0), max(ymax2, 0) if ymax2 < 0 else max(ymax2, 0, 1.05*ymax2))
plt.axhline(0, color='black', linewidth=1, linestyle='dotted')
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set a modern style
sns.set_theme(style="whitegrid", font_scale=1.15)

# Upside Down Bar Graph: Median Tax Change by Neighborhood Median Income Excluding Vacant Land
fig, ax = plt.subplots(figsize=(10, 6))

vals = non_vacant_income_quintile_summary['median_tax_change_pct']
labels = non_vacant_income_quintile_summary['median_income_quintile']

# Color mapping: dark green (more negative) to light green (less negative)
colors = sns.color_palette("Greens", n_colors=len(vals))
# Sort so that the most negative (largest magnitude) is darkest
color_map = [colors[i] for i in np.argsort(np.argsort(-vals))]

# To make bars start at the top and go down, invert the y-axis and plot positive heights
bars = ax.bar(
    labels,
    np.abs(vals),
    color=color_map,
    edgecolor='black',
    width=0.7
)

# Invert the y-axis so bars start at the top and go down
ax.invert_yaxis()

# Remove y-axis
ax.yaxis.set_visible(False)
ax.set_ylabel("")
ax.set_xlabel("")
ax.set_title('Median Tax Change by Neighborhood Median Income (Excl. Vacant Land)', weight='bold', pad=30)

# Remove all spines (including bottom)
sns.despine(left=True, right=True, top=True, bottom=True)

# Add value labels (bold, % sign) centered inside each bar (no line below the bar)
for bar, val in zip(bars, vals):
    ax.annotate(
        f"{val:.1f}%",
        xy=(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2),
        xytext=(0, 0),
        textcoords="offset points",
        ha='center', va='center',
        fontsize=13, color='black', fontweight='bold'
    )

# Move x-tick labels to the top
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')
plt.xticks(fontweight='bold')

# Set y-limits to show bars going down from the top
ymax = np.abs(vals).max() * 1.1
ax.set_ylim(ymax, 0)

plt.tight_layout()
plt.show()

# Upside Down Bar Graph: Median Tax Change by Minority Percentage Quintile Excluding Vacant Land
fig, ax = plt.subplots(figsize=(10, 6))

vals2 = non_vacant_minority_quintile_summary['median_tax_change_pct']
labels2 = non_vacant_minority_quintile_summary['minority_pct_quintile']

colors2 = sns.color_palette("Greens", n_colors=len(vals2))
color_map2 = [colors2[i] for i in np.argsort(np.argsort(-vals2))]

bars2 = ax.bar(
    labels2,
    np.abs(vals2),
    color=color_map2,
    edgecolor='black',
    width=0.7
)

ax.invert_yaxis()
ax.yaxis.set_visible(False)
ax.set_ylabel("")
ax.set_xlabel("")
ax.set_title('Median Tax Change by Minority Percentage Quintile (Excl. Vacant Land)', weight='bold', pad=30)
sns.despine(left=True, right=True, top=True, bottom=True)

for bar, val in zip(bars2, vals2):
    ax.annotate(
        f"{val:.1f}%",
        xy=(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2),
        xytext=(0, 0),
        textcoords="offset points",
        ha='center', va='center',
        fontsize=13, color='black', fontweight='bold'
    )

ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')
plt.xticks(fontweight='bold')

ymax2 = np.abs(vals2).max() * 1.1
ax.set_ylim(ymax2, 0)

plt.tight_layout()
plt.show()


In [None]:
# Restrict df to only residential property categories
df_residential = df[df['PROPERTY_CATEGORY'].str.startswith("R")].copy()

# --- Repeat the block group summary and quintile analysis for residential only ---

# Filter data for residential (positive income, non-vacant)
gdf_residential_filtered, non_vacant_residential_gdf = filter_data(df_residential)

# Calculate block group summaries (all with positive median_income only, residential only)
census_block_groups_res = calculate_block_group_summary(gdf_residential_filtered)
non_vacant_block_summary_res = calculate_block_group_summary(non_vacant_residential_gdf)

# Create comparison plots (all with positive median_income only, residential only)
plot_comparison(
    census_block_groups_res, non_vacant_block_summary_res, 
    'median_income', 'median_tax_change_pct', 
    'Median Tax Change vs. Median Income (Residential Only)', 
    'Median Income by Census Block Group ($)'
)

plot_comparison(
    census_block_groups_res, non_vacant_block_summary_res,
    'minority_pct', 'median_tax_change_pct',
    'Median Tax Change vs. Minority Percentage (Residential Only)',
    'Minority Population Percentage by Census Block Group'
)

plot_comparison(
    census_block_groups_res, non_vacant_block_summary_res,
    'black_pct', 'median_tax_change_pct',
    'Median Tax Change vs. Black Percentage (Residential Only)',
    'Black Population Percentage by Census Block Group'
)

# Calculate and print correlations (all with positive median_income only, residential only)
correlations_res = calculate_correlations(census_block_groups_res, non_vacant_block_summary_res)
for key, value in correlations_res.items():
    print(f"[Residential] Correlation {key}: {value:.4f}")

# Create and display quintile summaries (income quintiles exclude negative/zero incomes, residential only)
income_quintile_summary_res = create_quintile_summary(gdf_residential_filtered, 'median_income', 'median_income')
non_vacant_income_quintile_summary_res = create_quintile_summary(non_vacant_residential_gdf, 'median_income', 'median_income')
minority_quintile_summary_res = create_quintile_summary(gdf_residential_filtered, 'minority_pct', 'minority_pct')
non_vacant_minority_quintile_summary_res = create_quintile_summary(non_vacant_residential_gdf, 'minority_pct', 'minority_pct')

print("\n[Residential] Tax impact by income quintile (all properties):")
display(income_quintile_summary_res)
print("\n[Residential] Tax impact by income quintile (excluding vacant land):")
display(non_vacant_income_quintile_summary_res)
print("\n[Residential] Tax impact by minority percentage quintile (all properties):")
display(minority_quintile_summary_res)
print("\n[Residential] Tax impact by minority percentage quintile (excluding vacant land):")
display(non_vacant_minority_quintile_summary_res)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set a modern style
sns.set_theme(style="whitegrid", font_scale=1.15)

# Upside Down Bar Graph: Median Tax Change by Neighborhood Median Income Excluding Vacant Land (Residential Only)
fig, ax = plt.subplots(figsize=(10, 6))

vals = non_vacant_income_quintile_summary_res['median_tax_change_pct']
labels = non_vacant_income_quintile_summary_res['median_income_quintile']

# Color mapping: dark green (more negative) to light green (less negative)
colors = sns.color_palette("Greens", n_colors=len(vals))
# Sort so that the most negative (largest magnitude) is darkest
color_map = [colors[i] for i in np.argsort(np.argsort(-vals))]

# To make bars start at the top and go down, invert the y-axis and plot positive heights
bars = ax.bar(
    labels,
    np.abs(vals),
    color=color_map,
    edgecolor='black',
    width=0.7
)

# Invert the y-axis so bars start at the top and go down
ax.invert_yaxis()

# Remove y-axis
ax.yaxis.set_visible(False)
ax.set_ylabel("")
ax.set_xlabel("")
ax.set_title('Median Tax Change by Neighborhood Median Income (Excl. Vacant Land, Residential Only)', weight='bold', pad=30)

# Remove all spines (including bottom)
sns.despine(left=True, right=True, top=True, bottom=True)

# Add value labels (bold, % sign) centered inside each bar (no line below the bar)
for bar, val in zip(bars, vals):
    ax.annotate(
        f"{val:.1f}%",
        xy=(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2),
        xytext=(0, 0),
        textcoords="offset points",
        ha='center', va='center',
        fontsize=13, color='black', fontweight='bold'
    )

# Move x-tick labels to the top
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')
plt.xticks(fontweight='bold')

# Set y-limits to show bars going down from the top
ymax = np.abs(vals).max() * 1.1
ax.set_ylim(ymax, 0)

plt.tight_layout()
plt.show()

# Upside Down Bar Graph: Median Tax Change by Minority Percentage Quintile Excluding Vacant Land (Residential Only)
fig, ax = plt.subplots(figsize=(10, 6))

vals2 = non_vacant_minority_quintile_summary_res['median_tax_change_pct']
labels2 = non_vacant_minority_quintile_summary_res['minority_pct_quintile']

colors2 = sns.color_palette("Greens", n_colors=len(vals2))
color_map2 = [colors2[i] for i in np.argsort(np.argsort(-vals2))]

bars2 = ax.bar(
    labels2,
    np.abs(vals2),
    color=color_map2,
    edgecolor='black',
    width=0.7
)

ax.invert_yaxis()
ax.yaxis.set_visible(False)
ax.set_ylabel("")
ax.set_xlabel("")
ax.set_title('Median Tax Change by Minority Percentage Quintile (Excl. Vacant Land, Residential Only)', weight='bold', pad=30)
sns.despine(left=True, right=True, top=True, bottom=True)

for bar, val in zip(bars2, vals2):
    ax.annotate(
        f"{val:.1f}%",
        xy=(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2),
        xytext=(0, 0),
        textcoords="offset points",
        ha='center', va='center',
        fontsize=13, color='black', fontweight='bold'
    )

ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')
plt.xticks(fontweight='bold')

ymax2 = np.abs(vals2).max() * 1.1
ax.set_ylim(ymax2, 0)

plt.tight_layout()
plt.show()
