In [1]:
#Import dependencies
import pandas as pd
import os
import requests
import json
from pprint import pprint

In [2]:
#Read in CSV
forecast_csv = os.path.join("data", "forecast_df.csv")
forecast_df = pd.read_csv(forecast_csv)

results_csv = os.path.join("data", "results_df.csv")
results_df = pd.read_csv(results_csv)

In [3]:
#Manipulate CSV to desired outcome
#now_cast_df = forecast_df.loc[forecast_df['type'] == 'now-cast', ['state','type','pollster','startdate', 'enddate','rawpoll_clinton','rawpoll_trump','adjpoll_clinton','adjpoll_trump']]

# Changed to eliminate Pollster column due to "," CSV Delimiter params, also removing date collumns for now
now_cast_df = forecast_df.loc[forecast_df['type'] == 'now-cast', ['state','type', 'rawpoll_clinton','rawpoll_trump','adjpoll_clinton','adjpoll_trump']]
now_cast_df

Unnamed: 0,state,type,rawpoll_clinton,rawpoll_trump,adjpoll_clinton,adjpoll_trump
4208,U.S.,now-cast,47.00,43.00,45.23046,41.68534
4209,U.S.,now-cast,38.03,35.69,43.36603,41.20737
4210,U.S.,now-cast,42.00,39.00,42.05478,38.81421
4211,U.S.,now-cast,45.00,41.00,45.60811,40.89765
4212,U.S.,now-cast,47.00,43.00,46.93448,42.38145
...,...,...,...,...,...,...
8411,North Carolina,now-cast,45.00,44.00,44.40263,44.69241
8412,North Carolina,now-cast,42.00,49.00,38.91546,48.83620
8413,North Carolina,now-cast,42.00,46.00,42.90049,48.56213
8414,Utah,now-cast,33.67,38.53,33.17161,40.04859


In [4]:
#Filter out unwanted data
states_now_df = now_cast_df[now_cast_df.state.isin(["U.S."]) == False]
states_now_df

Unnamed: 0,state,type,rawpoll_clinton,rawpoll_trump,adjpoll_clinton,adjpoll_trump
4216,New Mexico,now-cast,46.00,44.00,45.04927,41.92541
4221,Virginia,now-cast,48.00,43.00,47.45700,42.35281
4223,Iowa,now-cast,39.00,46.00,39.36898,45.67372
4225,Wisconsin,now-cast,46.00,40.00,46.10277,41.02478
4226,North Carolina,now-cast,44.00,44.00,44.26048,44.98719
...,...,...,...,...,...,...
8411,North Carolina,now-cast,45.00,44.00,44.40263,44.69241
8412,North Carolina,now-cast,42.00,49.00,38.91546,48.83620
8413,North Carolina,now-cast,42.00,46.00,42.90049,48.56213
8414,Utah,now-cast,33.67,38.53,33.17161,40.04859


In [5]:
short_results_df = results_df.loc[:, ["path", "count"]]
short_results_df

Unnamed: 0,path,count
0,Texas,254
1,Georgia,159
2,Virginia,134
3,Kentucky,120
4,Missouri,115
5,Kansas,105
6,Illinois,102
7,North Carolina,100
8,Iowa,99
9,Tennessee,95


In [6]:
# List of columns needed
columns = ['state','type','rawpoll_clinton','rawpoll_trump','adjpoll_clinton','adjpoll_trump']
# List of new column names
my_cols = ['State', 'Poll Type', 'Clinton', 'Trump', 'Clinton Adjusted', 'Trump Adjusted']


# Columns new column names from Results
my_cols_res = ['Electoral Votes', 'State']
now_cast_df = forecast_df.loc[forecast_df['type'] == 'now-cast', columns]

#Make Titles more descriptive
states_now_df= states_now_df.rename(columns={"state":"State", 
                                             "type":"Poll Type",
                                             #"pollster": "Pollster",
                                             #"startdate": "Start Date",
                                             #"enddate": "End Date",
                                             "rawpoll_clinton":"Clinton",
                                             "rawpoll_trump":"Trump",
                                             "adjpoll_clinton":"Clinton Adjusted",
                                             "adjpoll_trump":"Trump Adjusted"})

short_results_df= short_results_df.rename(columns={"path":"State", 
                                                   "count":"Electoral Votes"})

In [7]:
short_results_df.head()

Unnamed: 0,State,Electoral Votes
0,Texas,254
1,Georgia,159
2,Virginia,134
3,Kentucky,120
4,Missouri,115


In [8]:
states_now_df.head()

Unnamed: 0,State,Poll Type,Clinton,Trump,Clinton Adjusted,Trump Adjusted
4216,New Mexico,now-cast,46.0,44.0,45.04927,41.92541
4221,Virginia,now-cast,48.0,43.0,47.457,42.35281
4223,Iowa,now-cast,39.0,46.0,39.36898,45.67372
4225,Wisconsin,now-cast,46.0,40.0,46.10277,41.02478
4226,North Carolina,now-cast,44.0,44.0,44.26048,44.98719


In [9]:
#Export dataframe to csv output file
output_file = os.path.join("data","now_cast_df.csv")
now_cast_df.to_csv(output_file, index=False, header=True)

In [10]:
#Export dataframe to csv output file
output_file = os.path.join("data","states_now_df.csv")
states_now_df.to_csv(output_file, index=False, header=True)

In [11]:
#Export dataframe to csv output file
output_file = os.path.join("data","short_results_df.csv")
short_results_df.to_csv(output_file, index=False, header=True)