In [1]:
import pandas
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

%matplotlib inline
%config InlineBackend.figure_format = "retina"

sns.set(rc={"figure.figsize" : (25, 15)})
sns.set(font_scale=2)
sns.set_style("ticks")

In [2]:
CROSSTAB_COLS = ["CD1", "CD2", "CD3", "CD4", "VOTED_2020_TRUMP", "VOTED_2020_BIDEN", "VOTED_2020_DIDNT",
                 "CHILD_LT_18", "EDU_NO_COLL", "EDU_COLL"]
CROSSTAB_COL_GROUPS = [["CD1", "CD2", "CD3", "CD4"], ["VOTED_2020_TRUMP", "VOTED_2020_BIDEN", "VOTED_2020_DIDNT"],
                       ["CHILD_LT_18"], ["EDU_NO_COLL", "EDU_COLL"]]
CONST_COLS = ["CANDIDATE", "LV"]

In [108]:
# source: https://sos.iowa.gov/elections/pdf/VRStatsArchive/2024/CongOct24.pdf
CD_REG_DF = pandas.DataFrame(
    [[129541 + 48380, 127321 + 42982, 134188 + 44986, 85143 + 38710],
     [149869 + 34385, 149972 + 30288, 151254 + 33128, 191956 + 45281],
     [2394 + 584 + 1670 + 549, 2434 + 534 + 1509 + 528,
      2831 + 535 + 1674 + 514, 2388 + 493 + 1754 + 572]],
    index=["DEM", "REP", "OTHER"], columns=["CD1", "CD2", "CD3", "CD4"])

CD_REG_DF

Unnamed: 0,CD1,CD2,CD3,CD4
DEM,177921,170303,179174,123853
REP,184254,180260,184382,237237
OTHER,5197,5005,5554,5207


In [12]:
def f(weights):
    # TODO: I don't think this is right
    return 1 + ((np.std(weights) / np.mean(weights)) ** 2)

---

## Load Crosstab

In [13]:
crosstab_df = pandas.read_csv("data/iowa_2024/iowa_2024_crosstab.csv")

In [14]:
crosstab_df = crosstab_df[CONST_COLS + CROSSTAB_COLS].copy().fillna(0).set_index("CANDIDATE")

In [15]:
total_wgt = crosstab_df["LV"].loc["WGT"]
total_wgt

849

In [16]:
total_wgt - (crosstab_df["LV"].loc["HARRIS"] + crosstab_df["LV"].loc["TRUMP"])

78

In [17]:
other_df = crosstab_df[~crosstab_df.index.isin(["HARRIS", "TRUMP", "Total Unweighted Respondents", "WGT"])].copy()
other = other_df.sum(axis=0)
other.name = "OTHER"
other["LV"] = other["LV"] + 2
other

LV                  78.0
CD1                 20.0
CD2                 21.0
CD3                 18.0
CD4                 19.0
VOTED_2020_TRUMP    25.0
VOTED_2020_BIDEN    10.0
VOTED_2020_DIDNT    10.0
CHILD_LT_18         21.0
EDU_NO_COLL         51.0
EDU_COLL            24.0
Name: OTHER, dtype: float64

In [18]:
crosstab_df = pandas.concat(
    [crosstab_df[crosstab_df.index.isin(["HARRIS", "TRUMP"])], pandas.DataFrame([other])]).reset_index(names=["CANDIDATE"])

In [19]:
temp = None

for group_cols in CROSSTAB_COL_GROUPS:
    df = crosstab_df[CONST_COLS + group_cols].copy()
    if group_cols[0] != "CD1":
        unk_col = group_cols[0].split("_")[0] + "_" + "UNK"
        df[unk_col] = df["LV"] - df[group_cols].sum(axis=1)
    if temp is None:
        temp = df.copy()
    else:
        temp = temp.merge(df, on=CONST_COLS, how="left")

crosstab_df = temp.copy()

In [20]:
crosstab_df

Unnamed: 0,CANDIDATE,LV,CD1,CD2,CD3,CD4,VOTED_2020_TRUMP,VOTED_2020_BIDEN,VOTED_2020_DIDNT,VOTED_UNK,CHILD_LT_18,CHILD_UNK,EDU_NO_COLL,EDU_COLL,EDU_UNK
0,HARRIS,399.0,113.0,99.0,100.0,87.0,12.0,322.0,48.0,17.0,98.0,301.0,205.0,191.0,3.0
1,TRUMP,372.0,78.0,97.0,91.0,107.0,299.0,13.0,45.0,15.0,114.0,258.0,270.0,97.0,5.0
2,OTHER,78.0,20.0,21.0,18.0,19.0,25.0,10.0,10.0,33.0,21.0,57.0,51.0,24.0,3.0


In [21]:
crosstab_df.set_index("CANDIDATE")["LV"] / crosstab_df["LV"].sum()

CANDIDATE
HARRIS    0.469965
TRUMP     0.438163
OTHER     0.091873
Name: LV, dtype: float64

In [22]:
0.469965 - 0.438163

0.031802

---

## Cell Weighting

In [109]:
temp = crosstab_df.set_index("CANDIDATE")[["CD1", "CD2", "CD3", "CD4"]].copy()
# temp = temp.div(temp.sum(axis=1), axis=0).rename(columns={c : c + "_pct" for c in temp.columns})

In [110]:
temp2 = CD_REG_DF.copy()
temp2.index = ["HARRIS", "TRUMP", "OTHER"]
# temp2 = temp2.div(temp2.sum(axis=1), axis=0).rename(columns={c : c + "_reg" for c in temp2.columns})
temp = temp.join(temp2, rsuffix="_reg")

In [111]:
temp["CD1_weight"] = temp["CD1_reg"] / temp["CD1"]
temp["CD2_weight"] = temp["CD2_reg"] / temp["CD2"]
temp["CD3_weight"] = temp["CD3_reg"] / temp["CD3"]
temp["CD4_weight"] = temp["CD4_reg"] / temp["CD4"]

In [112]:
f(temp[["CD1_weight", "CD2_weight", "CD3_weight", "CD4_weight"]].values)

1.3512612756225795

In [113]:
temp

Unnamed: 0_level_0,CD1,CD2,CD3,CD4,CD1_reg,CD2_reg,CD3_reg,CD4_reg,CD1_weight,CD2_weight,CD3_weight,CD4_weight
CANDIDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
HARRIS,113.0,99.0,100.0,87.0,177921,170303,179174,123853,1574.522124,1720.232323,1791.74,1423.597701
TRUMP,78.0,97.0,91.0,107.0,184254,180260,184382,237237,2362.230769,1858.350515,2026.175824,2217.168224
OTHER,20.0,21.0,18.0,19.0,5197,5005,5554,5207,259.85,238.333333,308.555556,274.052632


In [114]:
(temp[["CD1_weight", "CD2_weight", "CD3_weight", "CD4_weight"]].sum(axis=1) /
 temp[["CD1_weight", "CD2_weight", "CD3_weight", "CD4_weight"]].sum(axis=1).sum())

CANDIDATE
HARRIS    0.405492
TRUMP     0.527189
OTHER     0.067319
dtype: float64