## Reformat Raw Data

Reformat the data into the following columns: <br/>
date -- date that the model was run<br/>
state -- state that we are predicting <br/>
dem_chance -- probability that state is won by Joseph Biden (D)<br/>
rep_chance -- probability that the state is won by Donald Trump (R)<br/>


In [1]:
import csv
import pandas as pd

### 538 Model

In [19]:
df = pd.read_csv('538_2020.csv')
output = pd.DataFrame()

states = []
dates = []
dem_chances = []

for i, row in df.iterrows():
    dem_chances.append(df['winstate_chal'][i])
    states.append(df['state'][i])
    dates.append(df['modeldate'][i])

output['date'] = dates
output['date']= pd.to_datetime(output['date'])
output['state'] = states
output['dem_chance'] = dem_chances

output = output.sort_values(by=['state', 'date'])
output = output[output.state != 'US']
output.to_csv('../data/538.csv', index=False)

In [20]:
states = set(output['state'].tolist())
print(states)

{'Vermont', 'Arizona', 'Indiana', 'Illinois', 'Pennsylvania', 'South Carolina', 'NE-3', 'ME-1', 'Nevada', 'North Dakota', 'Utah', 'Iowa', 'Hawaii', 'California', 'NE-1', 'Alabama', 'Wisconsin', 'Arkansas', 'Colorado', 'Montana', 'Maryland', 'Missouri', 'Alaska', 'New Mexico', 'Louisiana', 'Rhode Island', 'Washington', 'New York', 'Texas', 'Massachusetts', 'Michigan', 'District of Columbia', 'Kentucky', 'Mississippi', 'New Jersey', 'Virginia', 'Delaware', 'Ohio', 'Oregon', 'Wyoming', 'Minnesota', 'New Hampshire', 'West Virginia', 'Connecticut', 'Georgia', 'Oklahoma', 'South Dakota', 'NE-2', 'Nebraska', 'ME-2', 'Maine', 'Kansas', 'Florida', 'North Carolina', 'Tennessee', 'Idaho'}


### JHK Model

In [21]:
df = pd.read_csv('jhk_2020.csv')
output = pd.DataFrame()

states = []
dates = []
dem_chances = []

dem_party = df['party'] == 'DEM'
df = df[dem_party]

for i, row in df.iterrows():
    states.append(df['state'][i])
    dates.append(df['forecastDate'][i])
    dem_chances.append(df['win'][i])

output['date'] = dates
output['date']= pd.to_datetime(output['date'])
output['state'] = states
output['dem_chance'] = dem_chances

output = output.sort_values(by=['state', 'date'])
output = output[output.state != 'US']
output.to_csv('../data/jhk.csv', index=False)

In [22]:
states = set(output['state'].tolist())
print(states)

{'Vermont', 'Arizona', 'Indiana', 'Maine CD-2', 'Illinois', 'Maine CD-1', 'Pennsylvania', 'South Carolina', 'North Dakota', 'Nevada', 'Utah', 'Iowa', 'Hawaii', 'California', 'Alabama', 'Wisconsin', 'Arkansas', 'Nebraska CD-1', 'Nebraska CD-3', 'Colorado', 'Montana', 'Maryland', 'Missouri', 'Alaska', 'New Mexico', 'Louisiana', 'Rhode Island', 'Washington', 'New York', 'Texas', 'Massachusetts', 'Michigan', 'District of Columbia', 'Kentucky', 'Mississippi', 'New Jersey', 'Virginia', 'Delaware', 'Ohio', 'Oregon', 'Wyoming', 'Minnesota', 'New Hampshire', 'West Virginia', 'Connecticut', 'Georgia', 'Oklahoma', 'South Dakota', 'Nebraska', 'Nebraska CD-2', 'Maine', 'Kansas', 'Florida', 'North Carolina', 'Tennessee', 'Idaho'}


### Economist Model

In [34]:
df = pd.read_csv('economist_2020.csv')
output = pd.DataFrame()

states = []
dates = []
dem_chances = []

for i, row in df.iterrows():
    if i % 2 == 0:
        dem_chances.append(df['win_prob'][i])
        states.append(df['state'][i])
        dates.append(df['date'][i])

output['date'] = dates
output['date']= pd.to_datetime(output['date'])
output['state'] = states
output['dem_chance'] = dem_chances

output = output[output.state != 'US']
output = output.replace(to_replace ="Washington DC", 
                 value ="District of Columbia") 
output = output.sort_values(by=['state', 'date'])
output.to_csv('../data/economist.csv', index=False)

In [35]:
states = set(output['state'].tolist())
print(states)

{'Vermont', 'Arizona', 'Indiana', 'Illinois', 'Pennsylvania', 'South Carolina', 'North Dakota', 'Nevada', 'Utah', 'Iowa', 'Hawaii', 'California', 'Alabama', 'Wisconsin', 'Arkansas', 'Colorado', 'Montana', 'Maryland', 'Missouri', 'Alaska', 'New Mexico', 'Louisiana', 'Rhode Island', 'Washington', 'New York', 'Texas', 'Massachusetts', 'Michigan', 'District of Columbia', 'Kentucky', 'Mississippi', 'New Jersey', 'Virginia', 'Delaware', 'Ohio', 'Oregon', 'Wyoming', 'Minnesota', 'New Hampshire', 'West Virginia', 'Connecticut', 'Georgia', 'Oklahoma', 'South Dakota', 'Nebraska', 'Maine', 'Kansas', 'Florida', 'North Carolina', 'Tennessee', 'Idaho'}
