## Import Libraries

In [1]:
import pandas as pd # library for data analysis
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML documents
from datetime import datetime
import numpy as np
import random


In [2]:
url_dummy = f'https://en.wikipedia.org/wiki/2024_United_States_presidential_election_in_Arizona'
all_tables = pd.read_html(url_dummy)

    # Extract the specific table using the index stored in state_key_values
dummy_table = all_tables[15]
dummy_table.head()
#dummy_table.shape

Unnamed: 0,Poll source,Date(s)administered,Samplesize[b],Marginof error,KamalaHarrisDemocratic,DonaldTrumpRepublican,Other /Undecided
0,New York Times/Siena College[18],"August 8 – 15, 2024",677 (RV),± 4.4%,49%,45%,7%
1,New York Times/Siena College[18],"August 8 – 15, 2024",677 (LV),± 4.4%,50%,45%,5%
2,Trafalgar Group (R)[19],"August 6 – 8, 2024","1,092 (LV)",± 2.9%,47%,48%,5%
3,Cook Political Report/BSG (R)/GS Strategy Grou...,"July 26 – August 8, 2024",435 (LV),–,48%,46%,6%
4,HighGround[21],"July 30 – August 5, 2024",500 (LV),± 4.4%,44%,42%,14%[c]


## Create State Key/Values

In [3]:
# state_key_values = {
#     'GA':['Georgia',13],
#     'PA':['Pennsylvania',16]
# }

In [4]:
state_key_values = {
    'Georgia':13,
    'Pennsylvania':17,
    'Wisconsin':15,
    'Michigan':15,
    'Arizona':15,
    'Nevada':16,
    'North_Carolina':14,
    'Florida':18
    
}

In [5]:
def extract_and_reformat_date(date_range):
    # Extract the start date part
    start_date_str = date_range.split('–')[0].strip()
    
    # Remove the year if it exists
    if ',' in start_date_str:
        start_date_str = start_date_str.split(',')[0].strip()
    
    # Add a default year if not provided (e.g., 2024)
    if len(start_date_str.split()) == 2:  # Format like "July 22"
        start_date_str += ' 2024'
    
    return start_date_str

In [6]:
dummy_table.columns

Index(['Poll source', 'Date(s)administered', 'Samplesize[b]', 'Marginof error',
       'KamalaHarrisDemocratic', 'DonaldTrumpRepublican', 'Other /Undecided'],
      dtype='object')

In [65]:
poll_data = []
for state in state_key_values:
    url_dummy = f'https://en.wikipedia.org/wiki/2024_United_States_presidential_election_in_{state}'
    all_tables = pd.read_html(url_dummy)

    # Extract the specific table using the index stored in state_key_values
    dummy_table = all_tables[state_key_values[state]]
    
    # Add a new column 'state' with the current state name
    dummy_table["state"] = state
    
    # Get start date of the poll
    dummy_table['start_date'] = dummy_table['Date(s)administered'].apply(extract_and_reformat_date)

    # Reformat dates
    dummy_table['start_date'] = pd.to_datetime(dummy_table['start_date'], format='%B %d %Y', errors='coerce')
    
    
    dummy_table = dummy_table[dummy_table['Date(s)administered'].str[-2:] == "24"]
    
    # Define the cutoff date for filtering
    cutoff_date = datetime(2024, 7, 21)

    # Filter rows where the date is on or before the cutoff date
    dummy_table = dummy_table[dummy_table['start_date'] > cutoff_date]

    # Append the DataFrame to the list (no need to assign it back to poll_data)
    poll_data.append(dummy_table)

# Combine all DataFrames into a single DataFrame
final_poll_data = pd.concat(poll_data, ignore_index=True)

In [66]:
final_poll_data.shape

(83, 13)

In [67]:
final_poll_data.dtypes

Poll source                                                                    object
Date(s)administered                                                            object
Samplesize[b]                                                                  object
Marginof error                                                                 object
KamalaHarrisDemocratic                                                         object
DonaldTrumpRepublican                                                          object
Other /Undecided                                                               object
Unnamed: 7                                                                     object
state                                                                          object
start_date                                                             datetime64[ns]
Samplesize[c]                                                                  object
KamalaHarris.mw-parser-output .nobold{font-weight:norm

In [68]:
final_poll_data

Unnamed: 0,Poll source,Date(s)administered,Samplesize[b],Marginof error,KamalaHarrisDemocratic,DonaldTrumpRepublican,Other /Undecided,Unnamed: 7,state,start_date,Samplesize[c],KamalaHarris.mw-parser-output .nobold{font-weight:normal}Democratic,DonaldTrump.mw-parser-output .nobold{font-weight:normal}Republican
0,New York Times/Siena College[21],"August 9–14, 2024",661 (RV),± 4.4%,44%,51%,5%,,Georgia,2024-08-09,,,
1,New York Times/Siena College[21],"August 9–14, 2024",661 (LV),± 4.4%,46%,50%,4%,,Georgia,2024-08-09,,,
2,Cook Political Report/BSG (R)/GS Strategy Grou...,"July 26–August 8, 2024",405 (LV),–,48%,48%,4%,,Georgia,2024-07-26,,,
3,Fabrizio Ward (R)/Impact Research (D)[A],"July 24–31, 2024",600 (LV),± 4.0%,48%,48%,4%,,Georgia,2024-07-24,,,
4,Trafalgar Group (R)/InsiderAdvantage (R),"July 29–30, 2024",– (LV),± 3.5%,47%,49%,4%,,Georgia,2024-07-29,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,Cook Political Report/BSG (R)/GS Strategy Grou...,"July 26–August 8, 2024",,–,48%,,5%,,North_Carolina,2024-07-26,403 (LV),,47%
79,Bloomberg/Morning Consult[26],"July 24–28, 2024",,± 4.0%,46%,,6%,,North_Carolina,2024-07-24,706 (RV),,48%
80,Mainstreet Research/Florida Atlantic Universit...,"August 10–11, 2024","1,055 (RV)",± 3.0%,46%,49%,5%[c],,Florida,2024-08-10,,,
81,Mainstreet Research/Florida Atlantic Universit...,"August 10–11, 2024","1,040 (LV)",± 3.0%,47%,50%,3%[d],,Florida,2024-08-10,,,


## Build Webscraper

In [69]:
final_poll_data['KamalaHarrisPct'] = final_poll_data['KamalaHarrisDemocratic'].where(pd.notna(final_poll_data['KamalaHarrisDemocratic']), final_poll_data['KamalaHarris.mw-parser-output .nobold{font-weight:normal}Democratic']).str.replace(r'%\[\w\]', '', regex=True).str.replace('%', '').astype(int)


In [70]:
final_poll_data['DonaldTrumpPct'] = final_poll_data['DonaldTrumpRepublican'].where(pd.notna(final_poll_data['DonaldTrumpRepublican']), final_poll_data['DonaldTrump.mw-parser-output .nobold{font-weight:normal}Republican']).str.replace(r'%\[\w\]', '', regex=True).str.replace('%', '').astype(int)


In [71]:
final_poll_data.dtypes

Poll source                                                                    object
Date(s)administered                                                            object
Samplesize[b]                                                                  object
Marginof error                                                                 object
KamalaHarrisDemocratic                                                         object
DonaldTrumpRepublican                                                          object
Other /Undecided                                                               object
Unnamed: 7                                                                     object
state                                                                          object
start_date                                                             datetime64[ns]
Samplesize[c]                                                                  object
KamalaHarris.mw-parser-output .nobold{font-weight:norm

In [72]:
final_poll_data['KamalaHarrisPct'].value_counts()

49    20
48    15
47    14
46    10
50     8
45     7
44     4
51     2
42     1
43     1
53     1
Name: KamalaHarrisPct, dtype: int64

In [73]:
final_poll_data.head()

Unnamed: 0,Poll source,Date(s)administered,Samplesize[b],Marginof error,KamalaHarrisDemocratic,DonaldTrumpRepublican,Other /Undecided,Unnamed: 7,state,start_date,Samplesize[c],KamalaHarris.mw-parser-output .nobold{font-weight:normal}Democratic,DonaldTrump.mw-parser-output .nobold{font-weight:normal}Republican,KamalaHarrisPct,DonaldTrumpPct
0,New York Times/Siena College[21],"August 9–14, 2024",661 (RV),± 4.4%,44%,51%,5%,,Georgia,2024-08-09,,,,44,51
1,New York Times/Siena College[21],"August 9–14, 2024",661 (LV),± 4.4%,46%,50%,4%,,Georgia,2024-08-09,,,,46,50
2,Cook Political Report/BSG (R)/GS Strategy Grou...,"July 26–August 8, 2024",405 (LV),–,48%,48%,4%,,Georgia,2024-07-26,,,,48,48
3,Fabrizio Ward (R)/Impact Research (D)[A],"July 24–31, 2024",600 (LV),± 4.0%,48%,48%,4%,,Georgia,2024-07-24,,,,48,48
4,Trafalgar Group (R)/InsiderAdvantage (R),"July 29–30, 2024",– (LV),± 3.5%,47%,49%,4%,,Georgia,2024-07-29,,,,47,49


In [74]:
final_poll_data["Samplesize[b]"].value_counts()

800 (RV)      6
400 (LV)      4
500 (RV)      3
804 (RV)      2
500 (LV)      2
800 (LV)      2
801 (LV)      2
850 (RV)      2
845 (RV)      2
661 (RV)      2
600 (LV)      2
877 (RV)      2
1,000 (RV)    2
661 (LV)      2
– (LV)        1
662 (RV)      1
774 (LV)      1
618 (RV)      1
1,092 (LV)    1
411 (LV)      1
700 (RV)      1
1,034 (RV)    1
1,738 (LV)    1
1,046 (RV)    1
693 (RV)      1
505 (RV)      1
677 (RV)      1
405 (LV)      1
1,040 (LV)    1
677 (LV)      1
627 (RV)      1
404 (LV)      1
693 (LV)      1
1,015 (RV)    1
799 (RV)      1
1,078 (LV)    1
1,055 (RV)    1
435 (LV)      1
976 (LV)      1
Name: Samplesize[b], dtype: int64

In [75]:
final_poll_data['poll_type_dummy1'] = final_poll_data['Samplesize[b]'].str[-3] + final_poll_data['Samplesize[b]'].str[-2]
final_poll_data['poll_type_dummy2'] = final_poll_data['Samplesize[c]'].str[-3] + final_poll_data['Samplesize[c]'].str[-2]


In [76]:
final_poll_data['poll_type'] = final_poll_data['poll_type_dummy1'].where(pd.notna(final_poll_data['poll_type_dummy1']), final_poll_data['poll_type_dummy2'])


In [77]:
final_poll_data['poll_type'].value_counts()

RV    44
LV    39
Name: poll_type, dtype: int64

In [78]:
final_poll_data.loc[final_poll_data['poll_type']=="LV",:]['state'].value_counts()

Pennsylvania      8
Wisconsin         7
Arizona           5
Michigan          5
Georgia           5
Nevada            4
North_Carolina    3
Florida           2
Name: state, dtype: int64

In [79]:
final_poll_data.loc[final_poll_data['poll_type']=="RV",:]['state'].value_counts()

Pennsylvania      11
Wisconsin         10
Michigan           7
Georgia            6
Arizona            5
Nevada             2
North_Carolina     2
Florida            1
Name: state, dtype: int64

In [80]:
final_poll_data.columns

Index(['Poll source', 'Date(s)administered', 'Samplesize[b]', 'Marginof error',
       'KamalaHarrisDemocratic', 'DonaldTrumpRepublican', 'Other /Undecided',
       'Unnamed: 7', 'state', 'start_date', 'Samplesize[c]',
       'KamalaHarris.mw-parser-output .nobold{font-weight:normal}Democratic',
       'DonaldTrump.mw-parser-output .nobold{font-weight:normal}Republican',
       'KamalaHarrisPct', 'DonaldTrumpPct', 'poll_type_dummy1',
       'poll_type_dummy2', 'poll_type'],
      dtype='object')

In [81]:
final_poll_data['KamalaHarrisPct'] = final_poll_data['KamalaHarrisPct']/100
final_poll_data['DonaldTrumpPct'] = final_poll_data['DonaldTrumpPct']/100

In [82]:
final_poll_data['month'] = final_poll_data['start_date'].dt.month


In [83]:
simulation_parameters = final_poll_data.groupby('state').agg(
    KamalaHarrisAvg = ('KamalaHarrisPct','mean'),
    KamalaHarrisStd = ('KamalaHarrisPct','std'),
    KamalaHarrisCnt = ('KamalaHarrisPct','count'),
    DonaldTrumpAvg = ('DonaldTrumpPct','mean'),
    DonaldTrumpStd = ('DonaldTrumpPct','std'),
    DonaldTrumpCnt = ('DonaldTrumpPct','count')
).reset_index()

In [84]:
month_simulation_parameters = final_poll_data.groupby(['state','month']).agg(
    KamalaHarrisAvg = ('KamalaHarrisPct','mean'),
    KamalaHarrisStd = ('KamalaHarrisPct','std'),
    KamalaHarrisCnt = ('KamalaHarrisPct','count'),
    DonaldTrumpAvg = ('DonaldTrumpPct','mean'),
    DonaldTrumpStd = ('DonaldTrumpPct','std'),
    DonaldTrumpCnt = ('DonaldTrumpPct','count')
).reset_index()

In [85]:
month_simulation_parameters

Unnamed: 0,state,month,KamalaHarrisAvg,KamalaHarrisStd,KamalaHarrisCnt,DonaldTrumpAvg,DonaldTrumpStd,DonaldTrumpCnt
0,Arizona,7,0.46,0.023094,7,0.477143,0.033523,7
1,Arizona,8,0.486667,0.015275,3,0.46,0.017321,3
2,Florida,7,0.42,,1,0.49,,1
3,Florida,8,0.465,0.007071,2,0.495,0.007071,2
4,Georgia,7,0.473333,0.01,9,0.484444,0.013333,9
5,Georgia,8,0.45,0.014142,2,0.505,0.007071,2
6,Michigan,7,0.48,0.028868,7,0.468571,0.030237,7
7,Michigan,8,0.48,0.018708,5,0.464,0.020736,5
8,Nevada,7,0.456667,0.011547,3,0.463333,0.015275,3
9,Nevada,8,0.46,0.01,3,0.48,0.0,3


In [86]:
standard_deviations = simulation_parameters[["state","KamalaHarrisStd","DonaldTrumpStd"]]

In [87]:
standard_deviations

Unnamed: 0,state,KamalaHarrisStd,DonaldTrumpStd
0,Arizona,0.023944,0.02974
1,Florida,0.026458,0.005774
2,Georgia,0.013751,0.014709
3,Michigan,0.024121,0.025702
4,Nevada,0.009832,0.013292
5,North_Carolina,0.018166,0.011402
6,Pennsylvania,0.01615,0.018016
7,Wisconsin,0.013477,0.021727


In [88]:
month_simulation_parameters = month_simulation_parameters.drop(["KamalaHarrisStd","DonaldTrumpStd"],axis=1)

In [89]:
month_simulation_parameters = pd.merge(month_simulation_parameters, standard_deviations, how='left', on='state')

In [90]:
month_simulation_parameters

Unnamed: 0,state,month,KamalaHarrisAvg,KamalaHarrisCnt,DonaldTrumpAvg,DonaldTrumpCnt,KamalaHarrisStd,DonaldTrumpStd
0,Arizona,7,0.46,7,0.477143,7,0.023944,0.02974
1,Arizona,8,0.486667,3,0.46,3,0.023944,0.02974
2,Florida,7,0.42,1,0.49,1,0.026458,0.005774
3,Florida,8,0.465,2,0.495,2,0.026458,0.005774
4,Georgia,7,0.473333,9,0.484444,9,0.013751,0.014709
5,Georgia,8,0.45,2,0.505,2,0.013751,0.014709
6,Michigan,7,0.48,7,0.468571,7,0.024121,0.025702
7,Michigan,8,0.48,5,0.464,5,0.024121,0.025702
8,Nevada,7,0.456667,3,0.463333,3,0.009832,0.013292
9,Nevada,8,0.46,3,0.48,3,0.009832,0.013292


In [91]:
simulation_parameters

Unnamed: 0,state,KamalaHarrisAvg,KamalaHarrisStd,KamalaHarrisCnt,DonaldTrumpAvg,DonaldTrumpStd,DonaldTrumpCnt
0,Arizona,0.468,0.023944,10,0.472,0.02974,10
1,Florida,0.45,0.026458,3,0.493333,0.005774,3
2,Georgia,0.469091,0.013751,11,0.488182,0.014709,11
3,Michigan,0.48,0.024121,12,0.466667,0.025702,12
4,Nevada,0.458333,0.009832,6,0.471667,0.013292,6
5,North_Carolina,0.474,0.018166,5,0.474,0.011402,5
6,Pennsylvania,0.480526,0.01615,19,0.476316,0.018016,19
7,Wisconsin,0.487647,0.013477,17,0.467059,0.021727,17


In [92]:
simulation_parameters['KamalaHarrisUpperEnd'] = simulation_parameters['KamalaHarrisAvg'] + 1.96*(simulation_parameters['KamalaHarrisStd']/np.sqrt(simulation_parameters['KamalaHarrisCnt']))

In [93]:
simulation_parameters['DonaldTrumpUpperEnd'] = simulation_parameters['DonaldTrumpAvg'] + 1.96*(simulation_parameters['DonaldTrumpStd']/np.sqrt(simulation_parameters['DonaldTrumpCnt']))

In [94]:
simulation_parameters['KamalaHarrisLowerEnd'] = simulation_parameters['KamalaHarrisAvg'] - 1.96*(simulation_parameters['KamalaHarrisStd']/np.sqrt(simulation_parameters['KamalaHarrisCnt']))

In [95]:
simulation_parameters['DonaldTrumpLowerEnd'] = simulation_parameters['DonaldTrumpAvg'] - 1.96*(simulation_parameters['DonaldTrumpStd']/np.sqrt(simulation_parameters['DonaldTrumpCnt']))

In [96]:
simulation_parameters["state"]

0           Arizona
1           Florida
2           Georgia
3          Michigan
4            Nevada
5    North_Carolina
6      Pennsylvania
7         Wisconsin
Name: state, dtype: object

In [39]:
# Initialize counters

states = simulation_parameters["state"]

for state in states:
    print(state)
    dem_win = 0
    rep_win = 0
    other_win = 0
    for x in range(1, 100000):
        # Get parameters for the specific state
        state_parameters = simulation_parameters.loc[simulation_parameters["state"] == state, :]

        # Extract the bounds for percentages
        dem_lower = state_parameters['KamalaHarrisLowerEnd'].values[0]
        dem_upper = state_parameters['KamalaHarrisUpperEnd'].values[0]
        rep_lower = state_parameters['DonaldTrumpLowerEnd'].values[0]
        rep_upper = state_parameters['DonaldTrumpUpperEnd'].values[0]

        # Generate random percentages
        dem_pct = random.uniform(dem_lower, dem_upper)
        rep_pct = random.uniform(rep_lower, rep_upper)

        # print(f"Harris: {dem_pct} | Trump: {rep_pct}")

        # Determine the winner
        if dem_pct > rep_pct:
            dem_win += 1
        elif rep_pct > dem_pct:
            rep_win += 1
        else:
            other_win += 1
            
    
    other_win = 100000-dem_win-rep_win
    print(f"Democratic wins: {dem_win}")
    print(f"Republican wins: {rep_win}")
    print(f"Other wins: {other_win}")

Arizona
Democratic wins: 39077
Republican wins: 60922
Other wins: 1
Florida
Democratic wins: 0
Republican wins: 99999
Other wins: 1
Georgia
Democratic wins: 0
Republican wins: 99999
Other wins: 1
Michigan
Democratic wins: 86045
Republican wins: 13954
Other wins: 1
Nevada
Democratic wins: 3996
Republican wins: 96003
Other wins: 1
North_Carolina
Democratic wins: 49862
Republican wins: 50137
Other wins: 1
Pennsylvania
Democratic wins: 71180
Republican wins: 28819
Other wins: 1
Wisconsin
Democratic wins: 99999
Republican wins: 0
Other wins: 1


## Weighted Simulation Work

In [97]:
month_simulation_parameters.head()

Unnamed: 0,state,month,KamalaHarrisAvg,KamalaHarrisCnt,DonaldTrumpAvg,DonaldTrumpCnt,KamalaHarrisStd,DonaldTrumpStd
0,Arizona,7,0.46,7,0.477143,7,0.023944,0.02974
1,Arizona,8,0.486667,3,0.46,3,0.023944,0.02974
2,Florida,7,0.42,1,0.49,1,0.026458,0.005774
3,Florida,8,0.465,2,0.495,2,0.026458,0.005774
4,Georgia,7,0.473333,9,0.484444,9,0.013751,0.014709


In [98]:
current_month = datetime.now().month
current_month

8

In [99]:
month_simulation_parameters['month_diff'] = current_month-month_simulation_parameters['month']

In [100]:
weights = {
    7:[1],
    8:[.60,.40],
    9:[.55,.25,.20],
    10:[.55,.20,.15,.10],
    11:[.50,.20,.15,.10,.5]
}

In [101]:
weights[8][0]

0.6

In [102]:
month_simulation_parameters['month_diff'][1]

0

In [103]:
weights[max(month_simulation_parameters['month'])][month_simulation_parameters['month_diff'][1]]

0.6

In [104]:
most_recent_month = max(month_simulation_parameters['month'])

In [105]:
month_simulation_parameters['weighted_average_kamala_harris'] = month_simulation_parameters.apply(
    lambda row: row['KamalaHarrisAvg'] * weights[max(month_simulation_parameters['month'])][row['month_diff']],
    axis=1
)

In [106]:
month_simulation_parameters['weighted_average_donald_trump'] = month_simulation_parameters.apply(
    lambda row: row['DonaldTrumpAvg'] * weights[max(month_simulation_parameters['month'])][row['month_diff']],
    axis=1
)

In [107]:
month_simulation_parameters

Unnamed: 0,state,month,KamalaHarrisAvg,KamalaHarrisCnt,DonaldTrumpAvg,DonaldTrumpCnt,KamalaHarrisStd,DonaldTrumpStd,month_diff,weighted_average_kamala_harris,weighted_average_donald_trump
0,Arizona,7,0.46,7,0.477143,7,0.023944,0.02974,1,0.184,0.190857
1,Arizona,8,0.486667,3,0.46,3,0.023944,0.02974,0,0.292,0.276
2,Florida,7,0.42,1,0.49,1,0.026458,0.005774,1,0.168,0.196
3,Florida,8,0.465,2,0.495,2,0.026458,0.005774,0,0.279,0.297
4,Georgia,7,0.473333,9,0.484444,9,0.013751,0.014709,1,0.189333,0.193778
5,Georgia,8,0.45,2,0.505,2,0.013751,0.014709,0,0.27,0.303
6,Michigan,7,0.48,7,0.468571,7,0.024121,0.025702,1,0.192,0.187429
7,Michigan,8,0.48,5,0.464,5,0.024121,0.025702,0,0.288,0.2784
8,Nevada,7,0.456667,3,0.463333,3,0.009832,0.013292,1,0.182667,0.185333
9,Nevada,8,0.46,3,0.48,3,0.009832,0.013292,0,0.276,0.288


In [108]:
weighted_month_simulation_parameters = month_simulation_parameters.groupby(['state']).agg(
    KamalaHarrisAvg = ('weighted_average_kamala_harris','sum'),
    KamalaHarrisStd = ('KamalaHarrisStd','mean'),
    KamalaHarrisCnt = ('KamalaHarrisCnt','sum'),
    DonaldTrumpAvg = ('weighted_average_donald_trump','sum'),
    DonaldTrumpStd = ('DonaldTrumpStd','mean'),
    DonaldTrumpCnt = ('DonaldTrumpCnt','sum')
).reset_index()

In [109]:
weighted_month_simulation_parameters['KamalaHarrisUpperEnd'] = weighted_month_simulation_parameters['KamalaHarrisAvg'] + 1.96*(weighted_month_simulation_parameters['KamalaHarrisStd']/np.sqrt(weighted_month_simulation_parameters['KamalaHarrisCnt']))

In [110]:
weighted_month_simulation_parameters['DonaldTrumpUpperEnd'] = weighted_month_simulation_parameters['DonaldTrumpAvg'] + 1.96*(weighted_month_simulation_parameters['DonaldTrumpStd']/np.sqrt(weighted_month_simulation_parameters['DonaldTrumpCnt']))

In [111]:
weighted_month_simulation_parameters['KamalaHarrisLowerEnd'] = weighted_month_simulation_parameters['KamalaHarrisAvg'] - 1.96*(weighted_month_simulation_parameters['KamalaHarrisStd']/np.sqrt(weighted_month_simulation_parameters['KamalaHarrisCnt']))

In [112]:
weighted_month_simulation_parameters['DonaldTrumpLowerEnd'] = weighted_month_simulation_parameters['DonaldTrumpAvg'] - 1.96*(weighted_month_simulation_parameters['DonaldTrumpStd']/np.sqrt(weighted_month_simulation_parameters['DonaldTrumpCnt']))

In [113]:
weighted_month_simulation_parameters

Unnamed: 0,state,KamalaHarrisAvg,KamalaHarrisStd,KamalaHarrisCnt,DonaldTrumpAvg,DonaldTrumpStd,DonaldTrumpCnt,KamalaHarrisUpperEnd,DonaldTrumpUpperEnd,KamalaHarrisLowerEnd,DonaldTrumpLowerEnd
0,Arizona,0.476,0.023944,10,0.466857,0.02974,10,0.490841,0.48529,0.461159,0.448424
1,Florida,0.447,0.026458,3,0.493,0.005774,3,0.476939,0.499533,0.417061,0.486467
2,Georgia,0.459333,0.013751,11,0.496778,0.014709,11,0.46746,0.50547,0.451207,0.488085
3,Michigan,0.48,0.024121,12,0.465829,0.025702,12,0.493648,0.480371,0.466352,0.451286
4,Nevada,0.458667,0.009832,6,0.473333,0.013292,6,0.466534,0.483969,0.450799,0.462698
5,North_Carolina,0.474,0.018166,5,0.474,0.011402,5,0.489923,0.483994,0.458077,0.464006
6,Pennsylvania,0.481523,0.01615,19,0.47475,0.018016,19,0.488785,0.482851,0.474261,0.466649
7,Wisconsin,0.487909,0.013477,17,0.464364,0.021727,17,0.494315,0.474692,0.481503,0.454035


In [114]:
import pandas as pd
import random

# Assuming you have weighted_month_simulation_parameters already defined
states = weighted_month_simulation_parameters["state"].unique()  # Get unique states
results = []

for state in states:
    print(state)
    dem_win = 0
    rep_win = 0
    other_win = 0
    
    for x in range(100000):  # Simulate 100,000 times
        # Get parameters for the specific state
        state_parameters = weighted_month_simulation_parameters.loc[weighted_month_simulation_parameters["state"] == state]

        # Extract the bounds for percentages
        dem_lower = state_parameters['KamalaHarrisLowerEnd'].values[0]
        dem_upper = state_parameters['KamalaHarrisUpperEnd'].values[0]
        rep_lower = state_parameters['DonaldTrumpLowerEnd'].values[0]
        rep_upper = state_parameters['DonaldTrumpUpperEnd'].values[0]

        # Generate random percentages
        dem_pct = random.uniform(dem_lower, dem_upper)
        rep_pct = random.uniform(rep_lower, rep_upper)

        # Determine the winner
        if dem_pct > rep_pct:
            dem_win += 1
        elif rep_pct > dem_pct:
            rep_win += 1
        else:
            other_win += 1
    
    # Store the results for this state
    results.append({'state': state, 'dem_win': dem_win, 'rep_win': rep_win})

# Convert results to a DataFrame
simulation_results = pd.DataFrame(results)

# Show the resulting DataFrame
print(simulation_results)


Arizona
Florida
Georgia
Michigan
Nevada
North_Carolina
Pennsylvania
Wisconsin
            state  dem_win  rep_win
0         Arizona    73386    26614
1         Florida        0   100000
2         Georgia        0   100000
3        Michigan    87471    12529
4          Nevada     2222    97778
5  North_Carolina    49690    50310
6    Pennsylvania    84440    15560
7       Wisconsin   100000        0


In [115]:
simulation_results.replace("North_Carolina", "North Carolina", inplace=True)
simulation_results[["dem_win","rep_win"]] = simulation_results[["dem_win","rep_win"]]/100000

In [116]:
simulation_results

Unnamed: 0,state,dem_win,rep_win
0,Arizona,0.73386,0.26614
1,Florida,0.0,1.0
2,Georgia,0.0,1.0
3,Michigan,0.87471,0.12529
4,Nevada,0.02222,0.97778
5,North Carolina,0.4969,0.5031
6,Pennsylvania,0.8444,0.1556
7,Wisconsin,1.0,0.0


In [127]:

current_date_dummy = datetime.now()
year = current_date_dummy.year
month = current_date_dummy.month
day = current_date_dummy.day

current_date = str(day)+"_"+month+"_"+year

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [126]:
current_date

datetime.datetime(2024, 8, 18, 18, 50, 18, 110451)

In [117]:

simulation_results.to_csv(f'simulation_results_{}.csv', index=False)


In [118]:
!git init


Reinitialized existing Git repository in /Users/geoffrey/Documents/GitHub/election-forecast/.git/


In [119]:
!git add simulation_results.csv

In [120]:
!git commit -m "Add CSV file of DataFrame"


[main 0793756] Add CSV file of DataFrame
 1 file changed, 5 insertions(+), 5 deletions(-)


In [121]:
!git remote add origin https://github.com/gcdean38/election-forecast.git


fatal: remote origin already exists.
