## Clean and Transform Virus Search Term Data

In [1]:
#dependencies
import pandas as pd
from pathlib import Path


In [2]:
#read in data
df = pd.read_csv(Path('VirusNameSearchTerms_ByStateComparison.csv'), skiprows=2)
df.head(5)


Unnamed: 0,Region,coronavirus: (11/30/19 - 4/21/22),covid: (11/30/19 - 4/21/22),covid 19: (11/30/19 - 4/21/22)
0,Vermont,28%,63%,9%
1,Hawaii,29%,60%,11%
2,Alaska,27%,62%,11%
3,New Hampshire,29%,60%,11%
4,Massachusetts,33%,59%,8%


In [3]:
#check column names
[print(col) for col in df.columns]

Region
coronavirus: (11/30/19 - 4/21/22)
covid: (11/30/19 - 4/21/22)
covid 19: (11/30/19 - 4/21/22)


[None, None, None, None]

In [4]:
#rename columns
df.rename(columns = {"Region": "state_name", 
                     "covid: (11/30/19 - 4/21/22)": "covid", 
                     "covid 19: (11/30/19 - 4/21/22)":"covid19",
                     "coronavirus: (11/30/19 - 4/21/22)":"coronavirus"                     
                    }, inplace = True)
df.head(5)

Unnamed: 0,state_name,coronavirus,covid,covid19
0,Vermont,28%,63%,9%
1,Hawaii,29%,60%,11%
2,Alaska,27%,62%,11%
3,New Hampshire,29%,60%,11%
4,Massachusetts,33%,59%,8%


In [5]:
# Check data types 
df.dtypes

state_name     object
coronavirus    object
covid          object
covid19        object
dtype: object

In [6]:
#convert select columns to int

# remove % symbol 
df=df.replace("%", "", regex=True)

# convert Nan's to 0
df =df.fillna(0)

df.astype({'coronavirus':'int32', 
           'covid':'int32', 
           'covid19':'int32'}
         ).dtypes

state_name     object
coronavirus     int32
covid           int32
covid19         int32
dtype: object

In [7]:
# reorder columns
virus_df = df[['state_name',
                    'covid', 
                    'covid19', 
                    'coronavirus']]
virus_df

Unnamed: 0,state_name,covid,covid19,coronavirus
0,Vermont,63,9,28
1,Hawaii,60,11,29
2,Alaska,62,11,27
3,New Hampshire,60,11,29
4,Massachusetts,59,8,33
5,Maine,57,10,33
6,Connecticut,59,8,33
7,Rhode Island,59,9,32
8,Minnesota,58,10,32
9,Oregon,57,10,33


## Send Virus Search Term Data to Database

In [8]:
from sqlalchemy import create_engine
from getpass import getpass

In [9]:
#store password variable
password = getpass("enter database pw")

enter database pw········


In [10]:
# connect to database
engine = create_engine(f"postgresql://postgres:{password}@localhost:5432/CovidSearchTrends")

In [12]:
# import PVI data to database
virus_df.to_sql(name="virus_search_terms", con=engine, index = False, if_exists='append')

## Save Virus Search Term Data as CSV

In [13]:
virus_df.to_csv("../Resources/virus_search_terms.csv")