# LandVote Analysis
## Visualizations + Statistical Tests 

For details on data processing, please refer to the code for 
- [<u>LandVote</u>](https://github.com/cassiebuhler/datasets/tree/main/landvote)
- [<u>Census boundaires</u>](https://github.com/cassiebuhler/datasets/tree/main/census)
- [<u>Political party</u>](https://github.com/cassiebuhler/datasets/tree/main/political_parties)

In this script, we are visualizing the resulting LandVote data and running statistical tests.

In [1]:
import ibis
from ibis import _
import ibis.expr.datatypes as dt  
import re
from cng.utils import *
from cng.h3 import *
from minio import Minio
import altair as alt
from utils import *

duckdb_install_h3()
con = ibis.duckdb.connect(extensions = ["spatial"])
set_secrets(con)

# Load data 

In [2]:
votes= (con.read_parquet('s3://public-tpl/landvote/landvote_party.parquet')
    .drop('geom')
)

#  collapse multi-county measures to one row per landvote_id 
votes = get_unique_rows(votes)


# Charts

## Pass rates by jurisdiction party 

In [3]:
df = (
    votes
    .filter(_.party.isin(["Democrat", "Republican"]))
    .group_by("year", "party")
    .agg(pass_fraction=((_.status.isin(["Pass", "Pass*"]))).cast("int").mean())
    .order_by("year")
    .execute()
)

chart = year_line(
    df,
    y="pass_fraction",
    group="party",
    title="Conservation Measure Pass Rates by Jurisdiction Party (1988–2025)",
    y_title="% Passed",
    stat='percent'
)


chart.save('percent_passed_party.png', ppi=200)
chart

## Outcome by finance measure

In [4]:
df = (
    votes
    .mutate(
        other_comment_lc = _.other_comment.fill_null("").lower(),
        fm_lc = _.finance_mechanism.fill_null("").lower(),
    )
    .mutate(
        mechanism_group = ibis.ifelse(
            _.fm_lc.contains("bond"),
            "Bond",
            ibis.ifelse(
                # converting other to tax based on the other_comment column  
                (_.fm_lc.contains("tax")) | ((_.finance_mechanism == "Other") & (_.other_comment_lc.contains("tax"))),
                "Tax",
                "Other",
            ),
        ),
    )
    .filter(_.mechanism_group!="Other")
    .filter(_.party.isin(["Democrat", "Republican"]))
    .group_by("mechanism_group", "party")
    .agg(pass_fraction=(_.status.isin(["Pass", "Pass*"])).cast("int").mean())
    .execute()
)


chart = bar_chart(
    df,
    y="pass_fraction",
    group="party",
    title="Conservation Measure Pass Rates by Jurisdiction Party and Finance Mechanism",
    y_title="% Passed",
    stat='percent'
)


chart.save('party_mechanism.png', ppi=200)
chart

## Equivalence Test - two one-sided t-tests (TOST)

Let $\Delta  = p_D-p_R$ where $\delta$ is a equivalence margin.

$H_{0}: \Delta \leq - \delta$ or $\Delta \geq \delta\\$ 
$H_{A}: -\delta < \Delta < \delta\\$

Short Conclusion:
Since both p-values $<0.05$ -> reject non-equivalence
Pass rates are statistically equivalent within $\pm5$ percentage points


In [5]:
import numpy as np
from statsmodels.stats.proportion import test_proportions_2indep, confint_proportions_2indep

def tost(votes):
    df = votes.execute()  # one row per landvote_id
    df = df[df["party"].isin(["Democrat", "Republican"])].copy()
    df["passed"] = df["status"].isin(["Pass", "Pass*"]).astype(int)
    
    g = df.groupby("party")["passed"].agg(["sum", "count"])
    xD, nD = int(g.loc["Democrat", "sum"]), int(g.loc["Democrat", "count"])
    xR, nR = int(g.loc["Republican", "sum"]), int(g.loc["Republican", "count"])
    
    delta = 0.05   # equivalence margin: ±5 percentage points
    alpha = 0.05
    
    pD = xD / nD
    pR = xR / nR
    diff = pD - pR
    # TOST:
    # 1) H0: (pD - pR) <= -delta  vs  H1: (pD - pR) > -delta
    p_lo = test_proportions_2indep(xD, nD, xR, nR, value=-delta, alternative="larger").pvalue
    
    # 2) H0: (pD - pR) >=  delta  vs  H1: (pD - pR) <  delta
    p_hi = test_proportions_2indep(xD, nD, xR, nR, value= delta, alternative="smaller").pvalue
    
    equivalent = (p_lo < alpha) and (p_hi < alpha)
    
    # CI for difference in proportions
    ci_lo, ci_hi = confint_proportions_2indep(xD, nD, xR, nR, method="score")
    
    print(f"Dem pass rate: {pD:.3f} ({xD}/{nD})")
    print(f"Rep pass rate: {pR:.3f} ({xR}/{nR})")
    print(f"Difference (D-R): {diff:.4f}")
    print(f"95% CI: ({ci_lo:.4f}, {ci_hi:.4f})")
    print(f"TOST p-values: lower={p_lo:.4g}, upper={p_hi:.4g}")
    print(f"Equivalent within ±{delta}? {equivalent}")

In [6]:
tost(votes)

Dem pass rate: 0.776 (1504/1937)
Rep pass rate: 0.760 (786/1034)
Difference (D-R): 0.0163
95% CI: (-0.0152, 0.0488)
TOST p-values: lower=2.252e-05, upper=0.02002
Equivalent within ±0.05? True


## Sensitivity analysis
Assigning Missing Partisan to Republican 


In [7]:
## rerun TOST but reclassify parties 
votes= con.read_parquet('s3://public-tpl/landvote/landvote_party.parquet').drop('geom')
votes = get_unique_rows(votes)


party_val = "Republican" 
votes = votes.mutate(party=_.party.substitute({'Mixed':party_val,"None":party_val,"Other":party_val,None:party_val}))
tost(votes)

Dem pass rate: 0.776 (1504/1937)
Rep pass rate: 0.766 (869/1135)
Difference (D-R): 0.0108
95% CI: (-0.0197, 0.0421)
TOST p-values: lower=5.298e-05, upper=0.006602
Equivalent within ±0.05? True


Assigning Missing Partisan to Democrat 



In [8]:
## rerun TOST but reclassify parties 
votes= con.read_parquet('s3://public-tpl/landvote/landvote_party.parquet').drop('geom')
votes = get_unique_rows(votes)

party_val = "Democrat" 
votes = votes.mutate(party=_.party.substitute({'Mixed':party_val,"None":party_val,"Other":party_val,None:party_val}))
tost(votes)

Dem pass rate: 0.779 (1587/2038)
Rep pass rate: 0.760 (786/1034)
Difference (D-R): 0.0185
95% CI: (-0.0126, 0.0507)
TOST p-values: lower=1.026e-05, upper=0.0266
Equivalent within ±0.05? True
