# League of Conservation Voters (LCV) Analysis
## Visualizations + Statistical Tests 

For details on data processing, please refer to the processing code for [<u>LCV</u>](https://github.com/cassiebuhler/datasets/tree/main/lcv). 

In this script, we are visualizing the resulting LCV data and running statistical tests.

In [None]:
import os
import geopandas as gpd
import ibis
from ibis import _
from cng.utils import *
from cng.h3 import *
import ibis.expr.datatypes as dt  
import re
from minio import Minio
import altair as alt

from utils import * 

duckdb_install_h3()
con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
con.raw_sql("SET THREADS=100;")
set_secrets(con)

bucket = "public-election"
s3_prefix = "league_of_conservation_voters/congressional_votes"

url = f's3://{bucket}/{s3_prefix}/roll_call_votes/**.csv'
lcv_df = con.read_csv(url).distinct()

# LCV votes

computing pro-conservation vote rate among each party and year 

In [None]:
#computed the annual pro-conservation vote rate among each congressperson
member_year = (lcv_df
    .filter(~_.vote_type.isin(['missing','unknown','excused']))
    .mutate(district_str=_.district.cast("string"))
    .mutate(district_str=_.district_str.fillna("STATEWIDE"))
    .mutate(member_id=_.first_name+'-'+_.last_name+'-'+_.state+'-'+_.chamber+'-'+_.district_str)
    .filter(_.vote_type.isin(['pro','anti','missing']))
    .group_by('year','party','chamber','member_id')
    .agg(total_votes=_.count(),
         total_pro_votes=(_.vote_type=='pro').sum())
    .mutate(pro_rate=_.total_pro_votes/_.total_votes)
    .mutate(party=_.party.substitute({'I':"Independent","R":"Republican","D":"Democrat"}))
    .order_by("total_votes") 
     )

# aggregating by party and year  
party_year = (
    member_year
    .group_by("year", "party")
    .agg(
        med_pro_rate=_.pro_rate.median(),
        n_members=_.member_id.nunique(),
        n_member_years=_.pro_rate.count(),
    )
    .order_by("year")
)


chart_cons_time = year_line(
    party_year,
    y="med_pro_rate",
    group="party",
    title=["Median Pro-Conservation Vote Rate by Party" ],
    y_title=["Pro-Conservation","Vote Rate (Median)"],
    stat="percent",
)

# chart_cons_time.save('lcv_party.png', ppi=300)

chart_cons_time

# Binomial Panel Regression of Pro-Conservation Voting by Party

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd

df = member_year.execute().copy()

df = df.dropna(subset=["total_votes", "total_pro_votes", "party", "year", "chamber", "member_id"])
df = df[df["total_votes"] > 0].copy()

df["pro_rate"] = df["total_pro_votes"] / df["total_votes"]

df["party_clean"] = df["party"].astype(str).str.strip().str.upper().replace(
    {"D":"DEMOCRAT", "R":"REPUBLICAN", "I":"INDEPENDENT"}
)
df = df[df["party_clean"].isin(["DEMOCRAT","REPUBLICAN","INDEPENDENT"])].copy()
df["party_clean"] = pd.Categorical(df["party_clean"], categories=["DEMOCRAT","REPUBLICAN","INDEPENDENT"])

# Grouped-binomial GLM (fractional logit)
m = smf.glm(
    "pro_rate ~ C(party_clean) + C(year) + C(chamber)",
    data=df,
    family=sm.families.Binomial(),
    freq_weights=df["total_votes"],   
).fit(
    cov_type="cluster",
    cov_kwds={"groups": df["member_id"]}
)

print(m.summary())
