-
Notifications
You must be signed in to change notification settings - Fork 1
/
model-votes.R
129 lines (106 loc) · 5.18 KB
/
model-votes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
rm(list = ls())
library(tidyverse)
library(sf)
# This script models state legislative election outcomes in hypothetical districts
################################################################################
# Set-up the actual election data
# the original election results in the actual districts used in 2022
election.results <- st_read("election-data/ReportingUnitPolygons.geojson") |>
st_drop_geometry() |>
as_tibble()
election.results.by.district <- election.results |>
pivot_longer(cols = c(assembly_district, senate_district),
names_to = "house", values_to = "district") |>
select(house, district, ends_with("22")) |>
pivot_longer(cols = ends_with("22")) |>
group_by(house, district, name) |>
summarise(votes = sum(value),
.groups = "drop") |>
separate(col = name, into = c("office","party","year"), sep = c(3,-2)) |>
pivot_wider(names_from = party, values_from = votes) |>
mutate(house = word(house, 1, sep = "_"),
margin = (DEM/TOT - REP/TOT)*100)
contested.legis <- election.results.by.district |>
filter((house == "assembly" & office == "WSA") |
(house == "senate" & office == "WSS"),
DEM > 0,
REP > 0,
!is.na(TOT)) |>
select(house, district)
# margins for all elections in contested legislative races
margins.actual <- election.results.by.district |>
select(house, district, office, margin) |>
pivot_wider(names_from = office, values_from = margin) |>
inner_join(contested.legis) |>
mutate(actual = if_else(house == "assembly", WSA, WSS))
################################################################################
# build the model using the real election results
# Two-step modelling process
# Use the "v1" model to predict Seat winner
# Use the "v2" model to predict Seat margin using interaction term with predicted winner from "v1"
lm.margin.v1 <- lm(actual ~ GOV + USS + WAG + WST, data = margins.actual)
# demonstrate that any differences in structure of WSA and WSS races are not detectable
lm.margin.v1.1 <- lm(actual ~ GOV*hd + USS*hd + WAG*hd + WST*hd, data = margins.actual |>
mutate(hd = if_else(house == "senate", 1, 0)))
summary(lm.margin.v1.1)
margins.actual.v2 <- margins.actual %>%
mutate(predicted_margin_v1 = predict.lm(lm.margin.v1, newdata = .[]),
predicted_dem_win = if_else(predicted_margin_v1 > 0, 1, 0))
lm.margin.v2 <- lm(actual ~ predicted_dem_win + GOV*predicted_dem_win +
USS*predicted_dem_win + WAG*predicted_dem_win +
WST*predicted_dem_win, data = margins.actual.v2)
################################################################################
# compare the residuals from each model, as well as the simple average of the 4 statewide races
compare.residuals <- margins.actual.v2 %>%
mutate(predict_v1 = predict.lm(lm.margin.v1, newdata = .[]),
predict_v2 = predict.lm(lm.margin.v2, newdata = .[]),
predict_avg = (GOV + USS + WAG + WST)/4,
residuals_v1 = actual - predict_v1,
residuals_v2 = actual - predict_v2,
residuals_avg = actual - predict_avg) |>
select(house, district, actual, starts_with("predict_"),
starts_with("resid")) |>
pivot_longer(cols = -c(house, district, actual)) |>
separate(name, into = c("stat", "model"), sep = "_") |>
pivot_wider(names_from = stat, values_from = value)
compare.residuals |>
ggplot(aes(actual, residuals)) +
geom_point() +
ggrepel::geom_text_repel(data = function(x){filter(x, abs(residuals) > 5)},
aes(label = district), min.segment.length = 0.01) +
facet_grid(rows = vars(house), cols = vars(model))
################################################################################
# predict results
votes.by.district <- read_csv("election-data/votes-in-proposed-districts_2012-22.csv") |>
separate(col = race, into = c("office","party","year"), sep = c(3,-2)) |>
pivot_wider(names_from = party, values_from = votes) |>
mutate(margin = (DEM/TOT - REP/TOT)*100)
votes.by.district.22 <- votes.by.district |> filter(year == "22")
predicted <- votes.by.district.22 |>
select(plan, district, office, margin) |>
pivot_wider(names_from = office, values_from = margin) %>%
mutate(predict_v1 = predict.lm(lm.margin.v1, newdata = .[]),
predicted_dem_win = if_else(predict_v1 > 0, 1, 0)) %>%
mutate(predict_v2 = predict.lm(lm.margin.v2, newdata = .[]))
# total winners by plan
predicted |>
group_by(plan) |>
summarise(dem_seats = sum(predict_v2 > 0))
################################################################################
# save output
final.output <- inner_join(
# 2022 results
predicted |>
select(plan, district, modelled_outcome_22 = predict_v2, GOV_22 = GOV,
USS_22 = USS, WAG_22 = WAG, WST_22 = WST),
# 2012-2020
votes.by.district |>
filter(year < 22) |>
select(plan, district, year, office, margin) |>
pivot_wider(names_from = c(office, year), values_from = margin)
) |>
select(plan, district, modelled_outcome_22, ends_with("22"),
ends_with("20"), ends_with("18"), ends_with("16"), ends_with("14"),
ends_with("12")) |>
filter(district != "ZZZ")
write_csv(final.output, "analysis-r/tables/plan-vote-margins.csv")