# LandVote Analysis
## Visualizations + Statistical Tests 

For details on data processing, please refer to the code for 
- [<u>LandVote</u>](https://github.com/cassiebuhler/datasets/tree/main/landvote)
- [<u>Census boundaires</u>](https://github.com/cassiebuhler/datasets/tree/main/census)
- [<u>Political party</u>](https://github.com/cassiebuhler/datasets/tree/main/political_parties)

In this script, we are visualizing the resulting LandVote data and running statistical tests.

In [1]:
import ibis
from ibis import _
import ibis.expr.datatypes as dt  
import re
from cng.utils import *
from cng.h3 import *
from minio import Minio
import altair as alt
from utils import *

duckdb_install_h3()
con = ibis.duckdb.connect(extensions = ["spatial"])
set_secrets(con)


# Load data 

In [2]:
votes= (con.read_parquet('s3://public-tpl/landvote/landvote_party.parquet')
    .drop('geom')
)

#  collapse multi-county measures to one row per landvote_id 
votes = get_unique_rows(votes)


### Sensitivity analysis


In [3]:
## uncomment for sensitivity analysis!
## reclassify parties if we do sensitivity analysis
# party_val = "Democrat" 
# votes = votes.mutate(party=_.party.substitute({'Mixed':party_val,"None":party_val,"Other":party_val,None:party_val}))

### Overall Pass Rate

In [4]:
get_passed(votes)
# compute percentage passed over entire dataset

77.25% Measures Passed from 1988 - 2024 



# Pass rates by jurisdiction party 

In [5]:
df = (
    votes
    .filter(_.party.isin(["Democrat", "Republican"]))
    .group_by("year", "party")
    .agg(pass_fraction=((_.status.isin(["Pass", "Pass*"]))).cast("int").mean())
    .order_by("year")
    .execute()
)

chart = year_line(
    df,
    y="pass_fraction",
    group="party",
    title="Conservation Measure Pass Rates by Jurisdiction Party (1988–2025)",
    y_title="% Passed",
    stat='percent'
)


chart.save('percent_passed_party.png', ppi=200)
chart

### Two-tailed Z test

Let $p_D$ and $p_R$ be the ballot measure pass rate for Democrats and Republican, respectively.

$H_0: p_D=p_R \\$
$H_A: p_D\neq p_R$

A two-tailed z-test shows no statistically significant difference in passage rates between Democratic and Republican measures (z = 1.01, p = 0.31, two-sided).


In [6]:
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

df = votes.execute()  # one row per landvote_id

df = df[df["party"].isin(["Democrat", "Republican"])].copy()
df["passed"] = df["status"].isin(["Pass", "Pass*"]).astype(int)

summary = (
    df
    .groupby("party")["passed"]
    .agg(["sum", "count"])
)

count = summary["sum"].values        # number passed
nobs = summary["count"].values       # total measures

z_stat, p_two_sided = proportions_ztest(count, nobs)


print("z =", z_stat)
print("two-sided p =", p_two_sided)


z = 1.007097081588427
two-sided p = 0.31388812353156803


## Equivalence Test - two one-sided t-tests (TOST)

Let $\Delta  = p_D-p_R$ where $\delta$ is a equivalence margin.

$H_{0}: \Delta \leq - \delta$ or $\Delta \geq \delta\\$ 
$H_{A}: -\delta < \Delta < \delta\\$

Short Conclusion:
Since both p-values $<0.05$ -> reject non-equivalence
Pass rates are statistically equivalent within $\pm5$ percentage points

Detailed:
The estimated difference in conservation measures passage rates between Democratic and Republican jurisdictions is 1.6 percentage points ($95% CI: −1.6$ to $4.8$). Using a two one-sided tests (TOST) equivalence framework with a $\pm5$ percentage point margin, we reject non-equivalence ($p < 0.05$ for both bounds), indicating statistically equivalent passage rates across parties.

In [7]:
import numpy as np
from statsmodels.stats.proportion import test_proportions_2indep, confint_proportions_2indep

df = votes.execute()  # one row per landvote_id
df = df[df["party"].isin(["Democrat", "Republican"])].copy()
df["passed"] = df["status"].isin(["Pass", "Pass*"]).astype(int)

g = df.groupby("party")["passed"].agg(["sum", "count"])
xD, nD = int(g.loc["Democrat", "sum"]), int(g.loc["Democrat", "count"])
xR, nR = int(g.loc["Republican", "sum"]), int(g.loc["Republican", "count"])

delta = 0.05   # equivalence margin: ±5 percentage points
alpha = 0.05

# TOST:
# 1) H0: (pD - pR) <= -delta  vs  H1: (pD - pR) > -delta
p_lo = test_proportions_2indep(xD, nD, xR, nR, value=-delta, alternative="larger").pvalue

# 2) H0: (pD - pR) >=  delta  vs  H1: (pD - pR) <  delta
p_hi = test_proportions_2indep(xD, nD, xR, nR, value= delta, alternative="smaller").pvalue

equivalent = (p_lo < alpha) and (p_hi < alpha)

# effect size + CI (helpful to report)
diff = xD / nD - xR / nR
ci_lo, ci_hi = confint_proportions_2indep(xD, nD, xR, nR, method="wald")

print("diff (pD - pR) =", diff)
print("95% CI =", (ci_lo, ci_hi))
print("TOST p-values =", (p_lo, p_hi))
print("Equivalent within ±delta?", equivalent)


diff (pD - pR) = 0.016303702009828003
95% CI = (-0.015658286938978437, 0.04826569095863444)
TOST p-values = (2.2524453542396927e-05, 0.020017487126888136)
Equivalent within ±delta? True


# Are measures proposed more in Democratic-voting jurisdictions?

In [8]:
df = (
    votes
    .filter(_.party.isin(["Democrat", "Republican"]))
    .group_by(_.year, _.party)
    .aggregate(freq=_.count())
    .order_by('year')
    .execute()
)

chart = year_line(
    df,
    y="freq",
    group="party",
    title="Frequency of Conservation Measures by Jurisdiction Party (1988–2025)",
    y_title="Number of Measures",
    stat='count'
)

chart.save('number_measures_party.png', ppi=200)
chart

# Do Democrat-voting jursidictions approve more conservation funding?

In [9]:
df = (
    votes
    .filter(_.party.isin(["Democrat", "Republican"]))
    .filter(_.status.isin(["Pass", "Pass*"]))
    .group_by(_.year, _.party)
    .aggregate(amount_approved=((_.conservation_funds_approved.median())))
    .order_by('year')
    .execute()
)


chart = year_line(
    df,
    y="amount_approved",
    group="party",
    title="Median Conservation Funds Approved by Jurisdiction Party (1988-2025)",
    y_title="Funds ($ millions)",
    stat='median'
)


chart.save('amount_approved_party.png', ppi=200)
chart


# Conservation funds approved is heavily skewed

In [10]:

import numpy as np

df = votes.filter(_.status.isin(["Pass","Pass*"])).execute()
df = df[df.party.isin(["Democrat","Republican"])].copy()

# log-transform (drop non-positive just to be safe)
df = df[df["conservation_funds_approved"] > 0]
df["log_amt"] = np.log(df["conservation_funds_approved"])

table = df.groupby("party").agg(
    n_measures=("conservation_funds_approved", "count"),
    mean_amt=("conservation_funds_approved", "mean"),
    median_amt=("conservation_funds_approved", "median"),
    mean_log_amt=("log_amt", "mean"),
    median_log_amt=("log_amt", "median"),
    total_amt=("conservation_funds_approved", "sum"),
)

print(table)



            n_measures      mean_amt  median_amt  mean_log_amt  \
party                                                            
Democrat          1460  4.587654e+07   4000000.0     15.439897   
Republican         750  3.287483e+07   5940000.0     15.744658   

            median_log_amt     total_amt  
party                                     
Democrat         15.201805  6.697974e+10  
Republican       15.597217  2.465613e+10  


In [11]:
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X(
        "conservation_funds_approved:Q",
        bin=alt.Bin(maxbins=10),
        title="Conservation funds approved ($ billions)",
        axis=alt.Axis(
            format="$,.0f",
            labelExpr="datum.value / 1000000000",
            titleFontSize=16,
            labelFontSize=12

        ),
    ),
    y=alt.Y("count()", title="Number of measures", axis = alt.Axis(titleFontSize=16,
            labelFontSize=12)),
).properties(
    title=alt.TitleParams(
        text="Distribution of Conservation Funds",
        fontSize=16
    ),
    width=340,
    height = 400
)
chart.save('dist_conservation_funds.png',ppi=300)
chart

In [12]:
import numpy as np

df["log_funds"] = np.log10(df["conservation_funds_approved"])

chart = alt.Chart(df).mark_bar().encode(
    x=alt.X(
        "log_funds:Q",
        bin=alt.Bin(maxbins=40),
        title="log(Conservation funds approved)",
        axis=alt.Axis(            titleFontSize=16,
            labelFontSize=12
                     )
    ),
    y=alt.Y("count()", title="Number of measures", axis = alt.Axis(titleFontSize=16,
            labelFontSize=12)),
).properties(
    title=alt.TitleParams(
        text="Distribution of Conservation Funds (Log)",
        fontSize=16
    ),
    width=340,
    height = 400
)
chart.save('log_dist_conservation_funds.png',ppi=300)
chart

## Welch's T Test on logscale funds 

In [13]:
import numpy as np
from scipy.stats import ttest_ind

df = votes.execute()
df = df[
    df["party"].isin(["Democrat", "Republican"])
    & df["status"].isin(["Pass", "Pass*"])
    & df["conservation_funds_approved"].notna()
    & (df["conservation_funds_approved"] > 0)
].copy()

xD = np.log(df.loc[df.party == "Democrat", "conservation_funds_approved"])
xR = np.log(df.loc[df.party == "Republican", "conservation_funds_approved"])

t, p = ttest_ind(xD, xR, equal_var=False)
print("Welch t =", t, "p =", p)


Welch t = -3.932390064971647 p = 8.779667085253896e-05
