## Clean and Transform PVI Data

In [11]:
import pandas as pd
from pathlib import Path
import plotly.express as px

In [12]:
#read in data and save as DataFrame
df = pd.read_csv(Path('PartisanVotingIndex_WorldPopulationReview.csv'), index_col=False)
df.head(5)

Unnamed: 0,State,partisanVotingIndexDemocratic
0,Vermont,-15.0
1,Hawaii,-15.0
2,Massachusetts,-14.0
3,Maryland,-14.0
4,California,-14.0


In [13]:
#rename columns
df.rename(columns = {"State": "state_name"}, inplace = True)
df.head(5)

Unnamed: 0,state_name,partisanVotingIndexDemocratic
0,Vermont,-15.0
1,Hawaii,-15.0
2,Massachusetts,-14.0
3,Maryland,-14.0
4,California,-14.0


In [14]:
#create state rank cloumn 
df['state_rank'] = df.index
df.head(5)

Unnamed: 0,state_name,partisanVotingIndexDemocratic,state_rank
0,Vermont,-15.0,0
1,Hawaii,-15.0,1
2,Massachusetts,-14.0,2
3,Maryland,-14.0,3
4,California,-14.0,4


In [15]:
#verify data types
df.dtypes

state_name                        object
partisanVotingIndexDemocratic    float64
state_rank                         int64
dtype: object

In [16]:
# convert Nan's to 0
df['partisanVotingIndexDemocratic'] =df['partisanVotingIndexDemocratic'].fillna(0)

In [17]:
# assign political party based on PVI score
Party = []

for PVI in df['partisanVotingIndexDemocratic']:
    if PVI <= 0:
        Party.append("D")
    else:
        Party.append("R")
Party

['D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'D',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R',
 'R']

In [18]:
#add party col to DF
df['party'] = Party
df.head()

Unnamed: 0,state_name,partisanVotingIndexDemocratic,state_rank,party
0,Vermont,-15.0,0,D
1,Hawaii,-15.0,1,D
2,Massachusetts,-14.0,2,D
3,Maryland,-14.0,3,D
4,California,-14.0,4,D


In [22]:
#check plot 
fig = px.scatter(df, 
                    x="partisanVotingIndexDemocratic", 
                    y="state_rank", 
                    hover_name ="state_name", 
                    color = "state_rank", 
                    color_continuous_scale=px.colors.diverging.balance,
                    title = "State PVI Ratings",
                    symbol ="party",
                    width=800)
fig.update_layout(legend_x = 0.85, legend_y = 0.1, title_x=0.5)
fig.show() 

In [19]:
#drop unneeded column
PVI_df = df.drop(["partisanVotingIndexDemocratic"], axis=1)
PVI_df.head()

Unnamed: 0,state_name,state_rank,party
0,Vermont,0,D
1,Hawaii,1,D
2,Massachusetts,2,D
3,Maryland,3,D
4,California,4,D


In [23]:
#check plot 
fig = px.scatter(PVI_df, 
                    x="state_name", 
                    y="state_rank", 
                    hover_name ="state_name", 
                    color = "state_rank", 
                    color_continuous_scale=px.colors.diverging.balance,
                    title = "State PVI Rank",
                    symbol ="party",
                    width=800)
fig.update_layout(legend_x = 0.85, legend_y = 0.1, title_x=0.5)
fig.show()

## Send PVI Data to Database

In [11]:
from sqlalchemy import create_engine
from getpass import getpass

In [12]:
#store password variable
password = getpass("enter database pw")

enter database pw········


In [13]:
# connect to database
engine = create_engine(f"postgresql://postgres:{password}@localhost:5432/CovidSearchTrends")

In [14]:
# import PVI data to database
PVI_df.to_sql(name="partisan_voting_index", con=engine, index = False, if_exists='append')

## Save PVI Data as CSV

In [15]:
PVI_df.to_csv("..\Resources\partisan_voting_index.csv")