In [50]:
#import dependencies
import pandas as pd

In [51]:
#read in deaths data
deaths_df = pd.read_csv("Resources/normalized_death_data.csv")

In [52]:
deaths_df

Unnamed: 0,Week received by CDC,"AI/AN, NH","Asian/PI, NH","Black, NH",Hispanic,"White, NH"
0,3/7/2020,0.14,0.19,0.26,0.14,0.12
1,3/14/2020,0.54,0.53,1.02,0.48,0.38
2,3/21/2020,0.65,1.29,3.15,1.69,1.17
3,3/28/2020,1.27,2.57,5.86,3.44,2.11
4,4/4/2020,1.30,3.47,7.10,4.80,3.08
...,...,...,...,...,...,...
56,4/3/2021,0.22,0.20,0.55,0.35,0.41
57,4/10/2021,0.04,0.22,0.45,0.25,0.32
58,4/17/2021,0.11,0.13,0.22,0.16,0.22
59,4/24/2021,0.14,0.03,0.12,0.05,0.10


In [53]:
#review data types
deaths_df.dtypes

Week received by CDC     object
AI/AN, NH               float64
Asian/PI, NH            float64
Black, NH               float64
Hispanic                float64
White, NH               float64
dtype: object

In [54]:
#convert week to datetime
deaths_df["Week received by CDC"] = pd.to_datetime(deaths_df["Week received by CDC"])

#add a total deaths column
deaths_df.loc[:, "Total"] = deaths_df.sum(numeric_only=True, axis=1)

#rename columns
deaths_df = deaths_df.rename(columns={"Week received by CDC": "Week", "AI/AN, NH" : "AI_AN_NH", "Asian/PI, NH" : "Asian_PI_NH", "Black, NH" : "Black_NH", "White, NH" : "White_NH"})

In [55]:
deaths_df

Unnamed: 0,Week,AI_AN_NH,Asian_PI_NH,Black_NH,Hispanic,White_NH,Total
0,2020-03-07,0.14,0.19,0.26,0.14,0.12,0.85
1,2020-03-14,0.54,0.53,1.02,0.48,0.38,2.95
2,2020-03-21,0.65,1.29,3.15,1.69,1.17,7.95
3,2020-03-28,1.27,2.57,5.86,3.44,2.11,15.25
4,2020-04-04,1.30,3.47,7.10,4.80,3.08,19.75
...,...,...,...,...,...,...,...
56,2021-04-03,0.22,0.20,0.55,0.35,0.41,1.73
57,2021-04-10,0.04,0.22,0.45,0.25,0.32,1.28
58,2021-04-17,0.11,0.13,0.22,0.16,0.22,0.84
59,2021-04-24,0.14,0.03,0.12,0.05,0.10,0.44


In [56]:
#export clean covid death data by HHS region to CSVs
deaths_df.to_csv("Resources\Death_Data_Normalized.csv", index=False)

In [39]:
#read in vaccine data
JJ_df = pd.read_csv("Resources/Janssen.csv")
Moderna_df = pd.read_csv("Resources/Moderna.csv")
Pfizer_df = pd.read_csv("Resources/Pfizer.csv")

In [40]:
JJ_df

Unnamed: 0,Jurisdiction,Week of Allocations,1st Dose Allocations
0,Connecticut,04/12/2021,6400
1,Maine,04/12/2021,2500
2,Massachusetts,04/12/2021,12300
3,New Hampshire,04/12/2021,2500
4,Rhode Island,04/12/2021,2000
...,...,...,...
373,Alaska,03/01/2021,8900
374,Idaho,03/01/2021,13300
375,Oregon,03/01/2021,34400
376,Washington,03/01/2021,60900


In [41]:
##edit week data to reflect when the vaccination would go into effect - aka 2 weeks after distro
weeks = {
    "1/2/21": "12/14/2020",
    "1/9/21" :'12/21/2020',
    "1/16/21" : '12/28/2020',
    "1/23/21" :'12/28/2020',
    "1/30/21" : '01/11/2021',
    "2/6/21" : '01/18/2021',
    "2/13/21" : '01/25/2021',
    "2/20/21" : '02/01/2021',
    "2/27/21" :'02/08/2021',
    "3/6/21" : '02/15/2021',
    "3/13/21" : '02/22/2021',
    "3/20/21" : '03/01/2021',
    "3/27/21" : '03/08/2021',
    "4/3/21" : '03/15/2021',
    "4/10/21" : '03/22/2021',
    "4/17/21" :'03/29/2021',
    "N/A" : '04/05/2021',
    "N/A" : '04/12/2021',
    "N/A" : '04/19/2021',
    "N/A" : '04/26/2021'
}

In [42]:
#clean JJ data
#edit weeks
JJ_df["Week"] = (
    JJ_df["Week of Allocations"]
    .apply(lambda x: [k for k in weeks.keys() if x in weeks[k]])
    .str[0]
    .fillna("Other")
)

#rename columns
JJ_df = JJ_df.rename(columns={"1st Dose Allocations": "Vaccinations"})

#drop n/a weeks and columns
JJ_df = JJ_df.drop(["Jurisdiction", "Week of Allocations"], axis = 1)
JJ_df = JJ_df.loc[(JJ_df["Week"] != "N/A") & (JJ_df["Week"] != "Other")]

In [43]:
#convert week to datetime
JJ_df["Week"] = pd.to_datetime(JJ_df["Week"])

In [44]:
#get total JnJ vaccines distributed
Total_JJ = JJ_df.groupby(["Week"]).sum()["Vaccinations"].astype(int)

In [45]:
Pfizer_df

Unnamed: 0,Jurisdiction,Week of Allocations,1st Dose Allocations,2nd Dose Allocations
0,Connecticut,04/26/2021,54990,54990
1,Maine,04/26/2021,21060,21060
2,Massachusetts,04/26/2021,105300,105300
3,New Hampshire,04/26/2021,21060,21060
4,Rhode Island,04/26/2021,16380,16380
...,...,...,...,...
1255,Virginia,12/14/2020,72150,72150
1256,Washington,12/14/2020,62400,62400
1257,West Virginia,12/14/2020,16575,16575
1258,Wisconsin,12/14/2020,49725,49725


In [46]:
#edit weeks
Pfizer_df["Week"] = (
    Pfizer_df["Week of Allocations"]
    .apply(lambda x: [k for k in weeks.keys() if x in weeks[k]])
    .str[0]
    .fillna("Other")
)

#rename columns
Pfizer_df = Pfizer_df.rename(columns={"2nd Dose Allocations": "Vaccinations"})

#drop n/a weeks and columns
Pfizer_df = Pfizer_df.drop(["Jurisdiction", "Week of Allocations"], axis = 1)
Pfizer_df = Pfizer_df.loc[(Pfizer_df["Week"] != "N/A") & (Pfizer_df["Week"] != "Other")]

#convert week to datetime
Pfizer_df["Week"] = pd.to_datetime(Pfizer_df["Week"])

#get total Pfizer vaccines distributed
Total_Pfizer = Pfizer_df.groupby(["Week"]).sum()["Vaccinations"].astype(int)


In [47]:
#clean Moderna data
#edit weeks
Moderna_df["Week"] = (
    Moderna_df["Week of Allocations"]
    .apply(lambda x: [k for k in weeks.keys() if x in weeks[k]])
    .str[0]
    .fillna("Other")
)

#rename columns
Moderna_df = Moderna_df.rename(columns={"2nd Dose Allocations": "Vaccinations"})

#drop n/a weeks and columns
Moderna_df = Moderna_df.drop(["Jurisdiction", "Week of Allocations"], axis = 1)
Moderna_df = Moderna_df.loc[(Moderna_df["Week"] != "N/A") & (Moderna_df["Week"] != "Other")]

#convert week to datetime
Moderna_df["Week"] = pd.to_datetime(Moderna_df["Week"])

#get total Moderna vaccines distributed
Total_Moderna = Moderna_df.groupby(["Week"]).sum()["Vaccinations"].astype(int)


In [48]:
#merge all vaccine DFs together for total vaccine distro
#get total JnJ vaccines distributed
total_vaccine_distro = Total_JJ.add(Total_Pfizer, Total_Moderna, fill_value = 0)


In [49]:
#export total vaccine data
total_vaccine_distro.to_csv("Resources\Total_Vaccine_Distro.csv")
