## Clean and Transform Vaccine Search Term Timeline Data

In [1]:
#dependencies
import pandas as pd
from pathlib import Path
import plotly.express as px

In [2]:
#read in data
df = pd.read_csv(Path('VaccineSearchTerms_ByDate.csv'), skiprows=2)
df.head(5)


Unnamed: 0,Week,covid after vaccine: (United States),vaccine side effects: (United States),covid vaccine cvs: (United States),Walgreens covid vaccine: (United States),vaccine mandate: (United States)
0,2019-12-01,0,1,0,0,<1
1,2019-12-08,0,1,0,0,<1
2,2019-12-15,0,1,0,0,<1
3,2019-12-22,0,1,0,0,<1
4,2019-12-29,0,1,0,0,<1


In [3]:
#check column names
[print(col) for col in df.columns]

Week
covid after vaccine: (United States)
vaccine side effects: (United States)
covid vaccine cvs: (United States)
Walgreens covid vaccine: (United States)
vaccine mandate: (United States)


[None, None, None, None, None, None]

In [4]:
#rename columns
df.rename(columns = {"Week": "week", 
                     "covid after vaccine: (United States)":"covid_after_vax", 
                     "vaccine side effects: (United States)": "vax_side_effects", 
                     "covid vaccine cvs: (United States)":"covid_vax_cvs", 
                     "Walgreens covid vaccine: (United States)": "covid_vax_walgreens",
                     "vaccine mandate: (United States)":"vax_mandate"  
                    }, inplace = True)
df.head(5)

Unnamed: 0,week,covid_after_vax,vax_side_effects,covid_vax_cvs,covid_vax_walgreens,vax_mandate
0,2019-12-01,0,1,0,0,<1
1,2019-12-08,0,1,0,0,<1
2,2019-12-15,0,1,0,0,<1
3,2019-12-22,0,1,0,0,<1
4,2019-12-29,0,1,0,0,<1


In [5]:
# Check data types 
df.dtypes

week                   object
covid_after_vax        object
vax_side_effects       object
covid_vax_cvs          object
covid_vax_walgreens    object
vax_mandate            object
dtype: object

In [7]:
#convert select columns to int

# remove % symbol 
df=df.replace("<1", "0.1", regex=True)

# convert Nan's to 0
df =df.fillna(0)

df.astype({'covid_after_vax':'float',
           'vax_side_effects':'float', 
           'covid_vax_cvs':'float', 
           'vax_side_effects':'float', 
           'covid_vax_walgreens':'float', 
           'vax_mandate':'float'}
         ).dtypes

week                    object
covid_after_vax        float64
vax_side_effects       float64
covid_vax_cvs          float64
covid_vax_walgreens    float64
vax_mandate            float64
dtype: object

In [8]:
df

Unnamed: 0,week,covid_after_vax,vax_side_effects,covid_vax_cvs,covid_vax_walgreens,vax_mandate
0,2019-12-01,0,1,0,0,0.1
1,2019-12-08,0,1,0,0,0.1
2,2019-12-15,0,1,0,0,0.1
3,2019-12-22,0,1,0,0,0.1
4,2019-12-29,0,1,0,0,0.1
...,...,...,...,...,...,...
120,2022-03-20,2,2,1,1,4
121,2022-03-27,2,2,2,1,2
122,2022-04-03,2,2,2,1,3
123,2022-04-10,2,2,2,1,2


## Save Vaccine Search Term Timeline as CSV

In [9]:
df.to_csv("../Resources/vaccine_search_terms_by_date.csv")