In [33]:
import pandas as pd
import math
import random as rand
import time
from dateutil import parser
from scipy.stats import t
import datetime as dtime
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 25)

In [3]:
df = pd.read_csv("C:/Users/horat/Python Project/Senate 2022/Test Forecast/data.csv")
candidates = pd.read_csv("C:/Users/horat/Python Project/Senate 2022/Test Forecast/senate-candidates.csv")
generic_ballot = pd.read_csv("C:/Users/horat/Python Project/Senate 2022/Test Forecast/generic-ballot.csv")
biden_approval = pd.read_csv("C:/Users/horat/Python Project/Senate 2022/Test Forecast/biden-approval.csv")

In [4]:
start_time = time.time()
today = dtime.datetime(2022, 6, 1)
sims = 10 # no. of simulations run
national_third_party = 2.5
fund_weight = 40
exp_weight = 20

In [5]:
exp_scale = pd.DataFrame([
    {"rating": "Tossup", "margin": 0},
    {"rating": "Tilt R", "margin": 3},
    {"rating": "Lean R", "margin": 5.4},
    {"rating": "Likely R", "margin": 10.0},
    {"rating": "Very Likely R", "margin": 13.0},
    {"rating": "Solid R", "margin": 20},
    {"rating": "Tilt D", "margin": -3},
    {"rating": "Lean D", "margin": -5.4},
    {"rating": "Likely D", "margin": -10.0},
    {"rating": "Very Likely D", "margin": -13.0},
    {"rating": "Solid D", "margin": -20},
    {"rating": "", "margin": 0},
])

Unnamed: 0,rating,margin
0,Tossup,0.0
1,Tilt R,3.0
2,Lean R,5.4
3,Likely R,10.0
4,Very Likely R,13.0
5,Solid R,20.0
6,Tilt D,-3.0
7,Lean D,-5.4
8,Likely D,-10.0
9,Very Likely D,-13.0


In [6]:
## CONVERT EXPERT RATINGS TO MARGINS ##
experts = ["cnalysis", "cook", "inside", "sabato"]
categories = ["fund", "exp", "poll", "state_sim"]

In [9]:
for i, d in df.iterrows():
    exp_ratings = []
    for x in experts: # ["cnalysis", "cook", "inside", "sabato"]
        pct = exp_scale[exp_scale["rating"] == d[x]]["margin"].values[0] #5 20.0, value[0] return 20
        n = d["neutral_margin"] # 29.9, Solid R
        if d[x] == "Solid D":
            if abs(pct)>abs(n):
                m=pct
            else:
                m=n
        elif d[x] == "Solid R": # Solid R
            if abs(pct)>abs(n): #20 !> 29.9
                m=pct
            else:
                m=n
        else:
            m = pct
        exp_ratings.append(m)
    df.at[i, "exp_margin"] = (exp_ratings[0] * .7 + exp_ratings[1] * 1.1 + exp_ratings[2] * 1.1 + exp_ratings[3] * 1.1) / 4
    df.at[i, "variance"] = 7

In [10]:
sim_date=dtime.datetime(2022, 6, 1)

In [11]:
ballot = generic_ballot[generic_ballot["date"] == sim_date.strftime("%Y-%m-%d")]["margin"].values[0]
ballot

1.33

In [12]:
approval = -biden_approval[biden_approval["date"] == sim_date.strftime("%Y-%m-%d")]["margin"].values[0]
approval

12.35

In [13]:
proj_gen_ballot = ballot * .8 + approval * .1
proj_gen_ballot

2.2990000000000004

In [14]:
## FUNDAMENTALS & EXPERTS ###
for i, d in df.iterrows():
    df.at[i, "fund_margin"] = (d["neutral_margin"] + proj_gen_ballot * d["elasticity"])

In [21]:
for i, d in candidates.iterrows():
    s = df[df["state_id"] == d["state_id"]] # get the state information in df
    f_margin = s["fund_margin"].values[0] # get fundamental margin from df
    e_margin = s["exp_margin"].values[0] # get expert margin from df
    p = d["party"] # get party affiliation for the candidate
    third_vote = national_third_party * s["third_index"].values[0] #2.5*0.73
    no_cands = len(candidates[candidates["state_id"] == d["state_id"]]) # number of candidates in the state (alabama)
    leftover = third_vote / ((no_cands + .001) - 2) #2.5*0.73/(3+0.001-2)
    if p == "R":
        f_pct = (50 - leftover) + (f_margin / 2)
        e_pct = (50 - leftover) + (e_margin / 2)
    elif p == "D":
        f_pct = (50 - leftover) - (f_margin / 2)
        e_pct = (50 - leftover) - (e_margin / 2)
    else:
        f_pct = leftover
        e_pct = leftover
    candidates.at[i, "fund_weight"] = fund_weight
    candidates.at[i, "exp_weight"] = exp_weight
    candidates.at[i, "fund_pct"] = round(f_pct, 2)
    candidates.at[i, "exp_pct"] = round(e_pct, 2)

In [26]:
for i, d in candidates.iterrows():
    candidates.at[i, "poll_weight"] = 0
    candidates.at[i, "state_sim_weight"] = 0
    candidates.at[i, "poll_pct"] = 0
    candidates.at[i, "state_sim_pct"] = 0

In [31]:
for i, d in candidates.iterrows():
    pct_points = 0
    weight_points = 0
    for x in categories:
        pct_points += d[x + "_pct"] * d[x + "_weight"] #["fund", "exp", "poll", "state_sim"] (64.15*40=2566.0)+(63.13*20)+..../60
        weight_points += d[x + "_weight"] #40+20+0+0=60
    candidates.at[i, "proj_vote"] = round(pct_points / weight_points, 2) 
    candidates.at[i, "variance"] = 7
    candidates.at[i, "index"] = i
    candidates.at[i, "win"] = 0

In [78]:
unique_results = pd.DataFrame() #result simulation -> column: states1, states2, ...., result compilation, no. of seats, prob distribution
rep_seats = 29 # no of rep seats not up for election
dem_seats = 36 # no of dem seats not up for election

In [79]:
for z in range(sims):
    nat_rand = rand.random()
    rep_seats_sim=rep_seats
    identifier = ""
    for i, d in df.iterrows():
        state_rand = rand.random()
        cands = candidates[candidates["state_id"] == d["state_id"]].copy() # get the three candidates in alabama
        cand_rand = nat_rand * .6 + state_rand * .4
        sim_variance = t.ppf(cand_rand, 10) * d["variance"] # TODO: look into t.ppf * variance https://stackoverflow.com/questions/67340028/how-to-use-t-ppf-which-are-the-arguments
        st_out = []
        for k, j in cands.iterrows():
            p = j["party"]
            proj = j["proj_vote"]
            if p == "R":
                sim_pct = proj + sim_variance
            elif p == "D":
                sim_pct = proj - sim_variance
            else:
                sim_pct = proj
            st_out.append([sim_pct, j["index"], p]) # [[61.7648557859007, 0.0, 'R'],[31.154125286436305, 1.0, 'D'],[1.82, 2.0, 'I']]
        st_out.sort(reverse=True) # sort the list based on the first element of each list in reversing order
        winner = st_out[0][2] # get winner party affiliation
        index = st_out[0][1] # get winner index

        if winner == "R": # number of seats won by Republican
            rep_seats_sim = rep_seats_sim + 1
        else:
            rep_seats_sim = rep_seats_sim
        unique_results.at[z, d["state_id"]] = winner # at sim iter_z, under column name state, the winner is winner
        identifier = identifier + winner # string of results ie. RDDRRRRDD...
        candidates.at[index, "win"] += 1 # no. of wins
    unique_results.at[z, "id"] = identifier
    unique_results.at[z, "seats"] = rep_seats_sim

In [80]:
unique_results

Unnamed: 0,AL,AK,AZ,AR,CA,CO,CT,FL,GA,HI,ID,IL,IN,IA,KS,KY,LA,MD,MO,NV,NH,NY,NC,ND,OH,OK,OKS,OR,PA,SC,SD,UT,VT,WA,WI,id,seats
0,R,R,D,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,D,D,D,D,R,R,R,R,D,D,R,R,R,D,D,R,RRDRDDDRRDRDRRRRRDRDD...,49.0
1,R,R,D,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,D,D,D,R,R,R,R,R,D,D,R,R,R,D,D,R,RRDRDDDRRDRDRRRRRDRDD...,50.0
2,R,R,R,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,R,D,D,R,R,R,R,R,D,R,R,R,R,D,D,R,RRRRDDDRRDRDRRRRRDRRD...,53.0
3,R,R,D,R,D,D,D,R,D,D,R,D,R,R,R,R,R,D,R,D,D,D,R,R,R,R,R,D,D,R,R,R,D,D,D,RRDRDDDRDDRDRRRRRDRDD...,48.0
4,R,R,D,R,D,D,D,D,D,D,R,D,R,R,R,R,R,D,R,D,D,D,D,R,R,R,R,D,R,R,R,R,D,D,R,RRDRDDDDDDRDRRRRRDRDD...,48.0
5,R,R,D,R,D,D,D,D,D,D,R,D,R,R,R,R,R,D,R,D,D,D,D,R,D,R,R,D,D,R,R,R,D,D,D,RRDRDDDDDDRDRRRRRDRDD...,45.0
6,R,R,D,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,R,D,D,R,R,R,R,R,D,D,R,R,R,D,D,R,RRDRDDDRRDRDRRRRRDRRD...,51.0
7,R,R,R,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,R,R,D,R,R,R,R,R,D,R,R,R,R,D,D,R,RRRRDDDRRDRDRRRRRDRRR...,54.0
8,R,R,R,R,D,D,D,R,D,D,R,D,R,R,R,R,R,D,R,R,R,D,R,R,R,R,R,D,D,R,R,R,D,D,R,RRRRDDDRDDRDRRRRRDRRR...,52.0
9,R,R,R,R,D,D,D,R,D,D,R,D,R,R,R,R,R,D,R,R,D,D,R,R,R,R,R,D,R,R,R,R,D,D,R,RRRRDDDRDDRDRRRRRDRRD...,52.0


In [81]:
for i, d in candidates.iterrows():
    candidates.at[i, "win"] = round(d["win"] / sims * 100, 2) # convert no. of wins by candidates to percentage

In [82]:
outcomes = list(unique_results["id"])
outcomes

['RRDRDDDRRDRDRRRRRDRDDDDRRRRDDRRRDDR',
 'RRDRDDDRRDRDRRRRRDRDDDRRRRRDDRRRDDR',
 'RRRRDDDRRDRDRRRRRDRRDDRRRRRDRRRRDDR',
 'RRDRDDDRDDRDRRRRRDRDDDRRRRRDDRRRDDD',
 'RRDRDDDDDDRDRRRRRDRDDDDRRRRDRRRRDDR',
 'RRDRDDDDDDRDRRRRRDRDDDDRDRRDDRRRDDD',
 'RRDRDDDRRDRDRRRRRDRRDDRRRRRDDRRRDDR',
 'RRRRDDDRRDRDRRRRRDRRRDRRRRRDRRRRDDR',
 'RRRRDDDRDDRDRRRRRDRRRDRRRRRDDRRRDDR',
 'RRRRDDDRDDRDRRRRRDRRDDRRRRRDRRRRDDR']

In [83]:
u_outcomes = list(set(outcomes)) # filter out repeated outcomes
u_outcomes

['RRDRDDDDDDRDRRRRRDRDDDDRRRRDRRRRDDR',
 'RRRRDDDRRDRDRRRRRDRRRDRRRRRDRRRRDDR',
 'RRRRDDDRDDRDRRRRRDRRDDRRRRRDRRRRDDR',
 'RRDRDDDRRDRDRRRRRDRDDDDRRRRDDRRRDDR',
 'RRRRDDDRRDRDRRRRRDRRDDRRRRRDRRRRDDR',
 'RRDRDDDDDDRDRRRRRDRDDDDRDRRDDRRRDDD',
 'RRRRDDDRDDRDRRRRRDRRRDRRRRRDDRRRDDR',
 'RRDRDDDRRDRDRRRRRDRRDDRRRRRDDRRRDDR',
 'RRDRDDDRRDRDRRRRRDRDDDRRRRRDDRRRDDR',
 'RRDRDDDRDDRDRRRRRDRDDDRRRRRDDRRRDDD']

In [84]:
for i, d in unique_results.iterrows():
    count = len(unique_results[unique_results["id"] == d["id"]]) # cross reference and get a count of instance
    unique_results.at[i, "prob"] = count / sims * 100 # get senate control probability

1

In [85]:
unique_results = unique_results.drop_duplicates() # TODO: necessary? Make the above parts more concise
unique_results

Unnamed: 0,AL,AK,AZ,AR,CA,CO,CT,FL,GA,HI,ID,IL,IN,IA,KS,KY,LA,MD,MO,NV,NH,NY,NC,ND,OH,OK,OKS,OR,PA,SC,SD,UT,VT,WA,WI,id,seats
0,R,R,D,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,D,D,D,D,R,R,R,R,D,D,R,R,R,D,D,R,RRDRDDDRRDRDRRRRRDRDD...,49.0
1,R,R,D,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,D,D,D,R,R,R,R,R,D,D,R,R,R,D,D,R,RRDRDDDRRDRDRRRRRDRDD...,50.0
2,R,R,R,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,R,D,D,R,R,R,R,R,D,R,R,R,R,D,D,R,RRRRDDDRRDRDRRRRRDRRD...,53.0
3,R,R,D,R,D,D,D,R,D,D,R,D,R,R,R,R,R,D,R,D,D,D,R,R,R,R,R,D,D,R,R,R,D,D,D,RRDRDDDRDDRDRRRRRDRDD...,48.0
4,R,R,D,R,D,D,D,D,D,D,R,D,R,R,R,R,R,D,R,D,D,D,D,R,R,R,R,D,R,R,R,R,D,D,R,RRDRDDDDDDRDRRRRRDRDD...,48.0
5,R,R,D,R,D,D,D,D,D,D,R,D,R,R,R,R,R,D,R,D,D,D,D,R,D,R,R,D,D,R,R,R,D,D,D,RRDRDDDDDDRDRRRRRDRDD...,45.0
6,R,R,D,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,R,D,D,R,R,R,R,R,D,D,R,R,R,D,D,R,RRDRDDDRRDRDRRRRRDRRD...,51.0
7,R,R,R,R,D,D,D,R,R,D,R,D,R,R,R,R,R,D,R,R,R,D,R,R,R,R,R,D,R,R,R,R,D,D,R,RRRRDDDRRDRDRRRRRDRRR...,54.0
8,R,R,R,R,D,D,D,R,D,D,R,D,R,R,R,R,R,D,R,R,R,D,R,R,R,R,R,D,D,R,R,R,D,D,R,RRRRDDDRDDRDRRRRRDRRR...,52.0
9,R,R,R,R,D,D,D,R,D,D,R,D,R,R,R,R,R,D,R,R,D,D,R,R,R,R,R,D,R,R,R,R,D,D,R,RRRRDDDRDDRDRRRRRDRRD...,52.0


In [86]:
unique_results.to_csv("C:/Users/horat/Python Project/Senate 2022/Test Forecast/unique-results.csv", index=False)
candidates.to_csv("C:/Users/horat/Python Project/Senate 2022/Test Forecast/candidates-output.csv", index=False)

In [87]:
print("--- %s seconds ---" % (time.time() - start_time)) # Measure running time of the model

--- 29661.881382465363 seconds ---
