## Reformat Raw Data

Reformat the data into the following columns: <br/>
date -- date that the model was run<br/>
state -- state that we are predicting <br/>
dem_chance -- probability that state is won by Joseph Biden (D)<br/>
rep_chance -- probability that the state is won by Donald Trump (R)<br/>


In [1]:
import csv
import pandas as pd

### 538 Model

In [3]:
df = pd.read_csv('538_2020.csv')
output = pd.DataFrame()

states = []
dates = []
dem_chances = []

for i, row in df.iterrows():
    dem_chances.append(df['winstate_chal'][i])
    states.append(df['state'][i])
    dates.append(df['modeldate'][i])

output['date'] = dates
output['date']= pd.to_datetime(output['date'])
output['state'] = states
output['dem_chance'] = dem_chances

output = output.sort_values(by=['state', 'date'])
output = output[output.state != 'US']
output.to_csv('../data/538.csv', index=False)

In [4]:
states_1 = set(output['state'].tolist())
print(states_1)

{'Maine', 'District of Columbia', 'Rhode Island', 'South Carolina', 'ME-2', 'Pennsylvania', 'Arkansas', 'Tennessee', 'Mississippi', 'Hawaii', 'New Hampshire', 'Colorado', 'ME-1', 'NE-1', 'NE-2', 'Nebraska', 'Alaska', 'Nevada', 'New York', 'New Mexico', 'Alabama', 'South Dakota', 'Utah', 'Michigan', 'New Jersey', 'NE-3', 'Kansas', 'Illinois', 'Georgia', 'Wisconsin', 'California', 'Iowa', 'Idaho', 'Delaware', 'Florida', 'Indiana', 'West Virginia', 'Minnesota', 'Kentucky', 'North Carolina', 'Oklahoma', 'North Dakota', 'Montana', 'Washington', 'Louisiana', 'Missouri', 'Connecticut', 'Ohio', 'Massachusetts', 'Virginia', 'Texas', 'Maryland', 'Oregon', 'Wyoming', 'Arizona', 'Vermont'}


### JHK Model

In [5]:
df = pd.read_csv('jhk_2020.csv')
output = pd.DataFrame()

states = []
dates = []
dem_chances = []

dem_party = df['party'] == 'DEM'
df = df[dem_party]

for i, row in df.iterrows():
    states.append(df['state'][i])
    dates.append(df['forecastDate'][i])
    dem_chances.append(df['win'][i])

output['date'] = dates
output['date']= pd.to_datetime(output['date'])
output['state'] = states
output['dem_chance'] = dem_chances

output = output.replace({"Maine CD-1":"ME-1", "Maine CD-2":"ME-2", "Nebraska CD-1":"NE-1","Nebraska CD-2":"NE-2", \
                         "Nebraska CD-3":"NE-3"})
output = output[output.state != 'US']
output = output.sort_values(by=['state', 'date'])
output.to_csv('../data/jhk.csv', index=False)

In [6]:
states_2 = set(output['state'].tolist())
print(states_2)

{'Maine', 'District of Columbia', 'Rhode Island', 'South Carolina', 'ME-2', 'Pennsylvania', 'Arkansas', 'Tennessee', 'Mississippi', 'Hawaii', 'New Hampshire', 'Colorado', 'ME-1', 'NE-1', 'NE-2', 'Nebraska', 'Alaska', 'Nevada', 'New York', 'New Mexico', 'Alabama', 'South Dakota', 'Utah', 'Michigan', 'New Jersey', 'NE-3', 'Kansas', 'Illinois', 'Georgia', 'Wisconsin', 'California', 'Iowa', 'Idaho', 'Delaware', 'Florida', 'Indiana', 'West Virginia', 'Minnesota', 'Kentucky', 'North Carolina', 'Oklahoma', 'North Dakota', 'Montana', 'Washington', 'Louisiana', 'Missouri', 'Connecticut', 'Ohio', 'Massachusetts', 'Virginia', 'Texas', 'Maryland', 'Oregon', 'Wyoming', 'Arizona', 'Vermont'}


### Economist Model

In [7]:
df = pd.read_csv('economist_2020.csv')
output = pd.DataFrame()

states = []
dates = []
dem_chances = []

for i, row in df.iterrows():
    if i % 2 == 0:
        dem_chances.append(df['win_prob'][i])
        states.append(df['state'][i])
        dates.append(df['date'][i])

output['date'] = dates
output['date']= pd.to_datetime(output['date'])
output['state'] = states
output['dem_chance'] = dem_chances

output = output[output.state != 'US']
output = output.replace(to_replace ="Washington DC", 
                 value ="District of Columbia") 
output = output.sort_values(by=['state', 'date'])
output.to_csv('../data/economist.csv', index=False)

In [8]:
states_3 = set(output['state'].tolist())
print(states_3)

{'Maine', 'District of Columbia', 'Rhode Island', 'South Carolina', 'Pennsylvania', 'Arkansas', 'Tennessee', 'Mississippi', 'Hawaii', 'New Hampshire', 'Colorado', 'Nebraska', 'Alaska', 'Nevada', 'New York', 'New Mexico', 'Alabama', 'South Dakota', 'Utah', 'Michigan', 'New Jersey', 'Kansas', 'Illinois', 'Georgia', 'Wisconsin', 'California', 'Iowa', 'Idaho', 'Delaware', 'Florida', 'Indiana', 'West Virginia', 'Minnesota', 'Kentucky', 'North Carolina', 'Oklahoma', 'North Dakota', 'Montana', 'Washington', 'Louisiana', 'Missouri', 'Connecticut', 'Ohio', 'Massachusetts', 'Virginia', 'Texas', 'Maryland', 'Oregon', 'Wyoming', 'Arizona', 'Vermont'}


### Check

In [9]:
states_1 

{'Alabama',
 'Alaska',
 'Arizona',
 'Arkansas',
 'California',
 'Colorado',
 'Connecticut',
 'Delaware',
 'District of Columbia',
 'Florida',
 'Georgia',
 'Hawaii',
 'Idaho',
 'Illinois',
 'Indiana',
 'Iowa',
 'Kansas',
 'Kentucky',
 'Louisiana',
 'ME-1',
 'ME-2',
 'Maine',
 'Maryland',
 'Massachusetts',
 'Michigan',
 'Minnesota',
 'Mississippi',
 'Missouri',
 'Montana',
 'NE-1',
 'NE-2',
 'NE-3',
 'Nebraska',
 'Nevada',
 'New Hampshire',
 'New Jersey',
 'New Mexico',
 'New York',
 'North Carolina',
 'North Dakota',
 'Ohio',
 'Oklahoma',
 'Oregon',
 'Pennsylvania',
 'Rhode Island',
 'South Carolina',
 'South Dakota',
 'Tennessee',
 'Texas',
 'Utah',
 'Vermont',
 'Virginia',
 'Washington',
 'West Virginia',
 'Wisconsin',
 'Wyoming'}