## Clean and Transform Vaccine Search Term Data

In [3]:
#dependencies
import pandas as pd
from pathlib import Path
import plotly.express as px

In [4]:
#read in data
df = pd.read_csv(Path('VaccineSearchTerms_ByStateComparison.csv'), skiprows=2)
df.head(5)


Unnamed: 0,Region,covid after vaccine: (11/30/19 - 4/21/22),vaccine side effects: (11/30/19 - 4/21/22),covid vaccine cvs: (11/30/19 - 4/21/22),Walgreens covid vaccine: (11/30/19 - 4/21/22),vaccine mandate: (11/30/19 - 4/21/22)
0,Massachusetts,18%,16%,45%,13%,8%
1,New Jersey,19%,16%,41%,15%,9%
2,Rhode Island,17%,17%,41%,18%,7%
3,Connecticut,21%,19%,36%,18%,6%
4,Illinois,20%,20%,13%,35%,12%


In [5]:
#check column names
[print(col) for col in df.columns]

Region
covid after vaccine: (11/30/19 - 4/21/22)
vaccine side effects: (11/30/19 - 4/21/22)
covid vaccine cvs: (11/30/19 - 4/21/22)
Walgreens covid vaccine: (11/30/19 - 4/21/22)
vaccine mandate: (11/30/19 - 4/21/22)


[None, None, None, None, None, None]

In [6]:
#rename columns
df.rename(columns = {"Region": "state_name", 
                     "covid after vaccine: (11/30/19 - 4/21/22)":"covid_after_vax", 
                     "vaccine side effects: (11/30/19 - 4/21/22)": "vax_side_effects", 
                     "covid vaccine cvs: (11/30/19 - 4/21/22)":"covid_vax_cvs", 
                     "Walgreens covid vaccine: (11/30/19 - 4/21/22)": "covid_vax_walgreens",
                     "vaccine mandate: (11/30/19 - 4/21/22)":"vax_mandate"  
                    }, inplace = True)
df.head(5)

Unnamed: 0,state_name,covid_after_vax,vax_side_effects,covid_vax_cvs,covid_vax_walgreens,vax_mandate
0,Massachusetts,18%,16%,45%,13%,8%
1,New Jersey,19%,16%,41%,15%,9%
2,Rhode Island,17%,17%,41%,18%,7%
3,Connecticut,21%,19%,36%,18%,6%
4,Illinois,20%,20%,13%,35%,12%


In [7]:
# Check data types 
df.dtypes

state_name             object
covid_after_vax        object
vax_side_effects       object
covid_vax_cvs          object
covid_vax_walgreens    object
vax_mandate            object
dtype: object

In [8]:
#convert select columns to int

# remove % symbol 
df=df.replace("%", "", regex=True)

# convert Nan's to 0
df =df.fillna(0)

df.astype({'covid_after_vax':'int32',
           'vax_side_effects':'int32', 
           'covid_vax_cvs':'int32', 
           'vax_side_effects':'int32', 
           'covid_vax_walgreens':'int32', 
           'vax_mandate':'int32'}
         ).dtypes

state_name             object
covid_after_vax         int32
vax_side_effects        int32
covid_vax_cvs           int32
covid_vax_walgreens     int32
vax_mandate             int32
dtype: object

In [9]:
# reorder columns
vax_df = df[['state_name',
                    'covid_vax_cvs', 
                    'covid_vax_walgreens', 
                    'vax_side_effects', 
                    'covid_after_vax',
                    'vax_mandate']]
vax_df

Unnamed: 0,state_name,covid_vax_cvs,covid_vax_walgreens,vax_side_effects,covid_after_vax,vax_mandate
0,Massachusetts,45,13,16,18,8
1,New Jersey,41,15,16,19,9
2,Rhode Island,41,18,17,17,7
3,Connecticut,36,18,19,21,6
4,Illinois,13,35,20,20,12
5,Maryland,30,20,20,22,8
6,Delaware,17,32,21,20,10
7,Virginia,31,12,23,24,10
8,Pennsylvania,27,9,25,26,13
9,New York,25,19,19,21,16


In [13]:
#check plot 
fig = px.scatter_3d(vax_df, 
                    x="vax_side_effects", 
                    y="covid_after_vax", 
                    z="vax_mandate", 
                    hover_name ="state_name", 
                    color = df.index, 
                    color_continuous_scale=px.colors.sequential.ice,
                    title = "Vaccine Search Trends",
                    width=800)
fig.update_layout(legend=dict(x=0,y=1), title_x=0.5)
fig.show() 

## Send Vaccine Search Term Data to Database

In [97]:
from sqlalchemy import create_engine
from getpass import getpass

In [98]:
#store password variable
password = getpass("enter database pw")

enter database pw········


In [99]:
# connect to database
engine = create_engine(f"postgresql://postgres:{password}@localhost:5432/CovidSearchTrends")

In [101]:
# import PVI data to database
vax_df.to_sql(name="vaccine_search_terms", con=engine, index = False, if_exists='append')

## Save Vaccine Search Term Data as CSV

In [107]:
vax_df.to_csv("../Resources/vaccine_search_terms.csv")