In [2]:
import json
import requests
import numpy as np
import pandas as pd
from pprint import pprint
import matplotlib.pyplot as plt

COVID tracking project API
https://covidtracking.com/data/download

In [3]:
covid_data = pd.read_csv("Folder1/all-states-history.csv")
covid_data.head()

Unnamed: 0,date,state,death,deathConfirmed,deathIncrease,deathProbable,hospitalized,hospitalizedCumulative,hospitalizedCurrently,hospitalizedIncrease,...,totalTestResults,totalTestResultsIncrease,totalTestsAntibody,totalTestsAntigen,totalTestsPeopleAntibody,totalTestsPeopleAntigen,totalTestsPeopleViral,totalTestsPeopleViralIncrease,totalTestsViral,totalTestsViralIncrease
0,3/7/21,AK,305.0,,0,,1293.0,1293.0,33.0,0,...,1731628.0,0,,,,,,0,1731628.0,0
1,3/7/21,AL,10148.0,7963.0,-1,2185.0,45976.0,45976.0,494.0,0,...,2323788.0,2347,,,119757.0,,2323788.0,2347,,0
2,3/7/21,AR,5319.0,4308.0,22,1011.0,14926.0,14926.0,335.0,11,...,2736442.0,3380,,,,481311.0,,0,2736442.0,3380
3,3/7/21,AS,0.0,,0,,,,,0,...,2140.0,0,,,,,,0,2140.0,0
4,3/7/21,AZ,16328.0,14403.0,5,1925.0,57907.0,57907.0,963.0,44,...,7908105.0,45110,580569.0,,444089.0,,3842945.0,14856,7908105.0,45110


In [4]:
removed_columns_df = covid_data[["date", "state", "death"]]
removed_columns_df.head()

Unnamed: 0,date,state,death
0,3/7/21,AK,305.0
1,3/7/21,AL,10148.0
2,3/7/21,AR,5319.0
3,3/7/21,AS,0.0
4,3/7/21,AZ,16328.0


In [5]:
removed_columns_df.count()

date     20780
state    20780
death    19910
dtype: int64

In [6]:
clean_death_data_df = removed_columns_df.dropna(how='any')

In [7]:
clean_death_data_df.count()

date     19910
state    19910
death    19910
dtype: int64

In [8]:
clean_death_data_df

Unnamed: 0,date,state,death
0,3/7/21,AK,305.0
1,3/7/21,AL,10148.0
2,3/7/21,AR,5319.0
3,3/7/21,AS,0.0
4,3/7/21,AZ,16328.0
...,...,...,...
20684,2/14/20,NJ,0.0
20689,2/13/20,NJ,0.0
20694,2/12/20,NJ,0.0
20699,2/11/20,NJ,0.0


In [9]:
unique_state = clean_death_data_df['state'].unique()
unique_state

array(['AK', 'AL', 'AR', 'AS', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL',
       'GA', 'GU', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA',
       'MD', 'ME', 'MI', 'MN', 'MO', 'MP', 'MS', 'MT', 'NC', 'ND', 'NE',
       'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'PR', 'RI',
       'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VI', 'VT', 'WA', 'WI', 'WV',
       'WY'], dtype=object)

In [10]:
as_data = clean_death_data_df[clean_death_data_df.state != 'AS']
dc_data = as_data[as_data.state != 'DC']
gu_data = dc_data[dc_data.state != 'GU']
mp_data = gu_data[gu_data.state != 'MP']
pr_data = mp_data[mp_data.state != 'PR']
clean_data = pr_data[pr_data.state != 'VI']

In [11]:
unique = clean_data['state'].unique()
unique

array(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI',
       'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI',
       'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV',
       'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT',
       'VA', 'VT', 'WA', 'WI', 'WV', 'WY'], dtype=object)

In [12]:
clean_data.head()

Unnamed: 0,date,state,death
0,3/7/21,AK,305.0
1,3/7/21,AL,10148.0
2,3/7/21,AR,5319.0
4,3/7/21,AZ,16328.0
5,3/7/21,CA,54124.0


In [13]:
clean_data = clean_data.sort_values("date", ascending=False)

In [14]:
clean_deathperday_data = clean_data.reset_index(drop=True)

In [15]:
clean_deathperday_data

Unnamed: 0,date,state,death
0,9/9/20,OK,863.0
1,9/9/20,PA,7805.0
2,9/9/20,NC,2958.0
3,9/9/20,ND,160.0
4,9/9/20,NE,406.0
...,...,...,...
17821,1/1/21,NE,1651.0
17822,1/1/21,ND,1289.0
17823,1/1/21,NC,6748.0
17824,1/1/21,MT,961.0


In [16]:
clean_deathperday_data.to_csv("Folder2/cleandeathperday.csv", index=False, header=True)

COVID Tracking Project - Race CSV

In [17]:
race_data = pd.read_csv("Folder1/race-data.csv")
race_data.head()

Unnamed: 0,Date,State,Cases_Total,Cases_White,Cases_Black,Cases_Latinx,Cases_Asian,Cases_AIAN,Cases_NHPI,Cases_Multiracial,...,Tests_Latinx,Tests_Asian,Tests_AIAN,Tests_NHPI,Tests_Multiracial,Tests_Other,Tests_Unknown,Tests_Ethnicity_Hispanic,Tests_Ethnicity_NonHispanic,Tests_Ethnicity_Unknown
0,20210307,AK,59332.0,18300.0,1499.0,,2447.0,12238.0,1508.0,4453.0,...,,,,,,,,,,
1,20210307,AL,499819.0,160347.0,82790.0,,2273.0,,,,...,,,,,,,,,,
2,20210307,AR,324818.0,207596.0,50842.0,,2913.0,1070.0,3358.0,1804.0,...,,,,,,,,,,
3,20210307,AS,,,,,,,,,...,,,,,,,,,,
4,20210307,AZ,826454.0,308453.0,25775.0,244539.0,11921.0,40707.0,,,...,,,,,,,,,,


In [18]:
race_data_df = race_data[["Date", "State", "Deaths_Total", "Deaths_White", "Deaths_Black", "Deaths_Latinx", 
                          "Deaths_Asian", "Deaths_AIAN", "Deaths_NHPI", "Deaths_Multiracial", "Deaths_Other", 
                          "Deaths_Ethnicity_Hispanic", "Deaths_Ethnicity_NonHispanic", "Deaths_Ethnicity_Unknown"]]
race_data_df.head()

Unnamed: 0,Date,State,Deaths_Total,Deaths_White,Deaths_Black,Deaths_Latinx,Deaths_Asian,Deaths_AIAN,Deaths_NHPI,Deaths_Multiracial,Deaths_Other,Deaths_Ethnicity_Hispanic,Deaths_Ethnicity_NonHispanic,Deaths_Ethnicity_Unknown
0,20210307,AK,305.0,127.0,9.0,,29.0,111.0,17.0,4.0,1.0,13.0,280.0,12.0
1,20210307,AL,10148.0,4730.0,2223.0,,24.0,,,,300.0,137.0,6083.0,3928.0
2,20210307,AR,5319.0,4171.0,784.0,,36.0,18.0,56.0,13.0,203.0,186.0,5133.0,0.0
3,20210307,AS,,,,,,,,,,,,
4,20210307,AZ,16328.0,8066.0,433.0,4684.0,220.0,1361.0,,,465.0,4684.0,10545.0,1099.0


In [19]:
as_data2 = race_data_df[race_data_df.State != 'AS']
dc_data2 = as_data2[as_data2.State != 'DC']
gu_data2 = dc_data2[dc_data2.State != 'GU']
mp_data2 = gu_data2[gu_data2.State != 'MP']
pr_data2 = mp_data2[mp_data2.State != 'PR']
race_data_clean = pr_data2[pr_data2.State != 'VI']
#race_data_clean.head()

In [20]:
race_data_clean = race_data_clean.fillna(0)

AIAN or is an abbreviation used by the United States Census Bureau to count population within the Native American and Alaska Natives areas within the United States. The US Census uses other abbreviations such as CDP or census-designated places as well in determining populations within different states.

NHIS - Native Hawaiian and Pacific Islander (NHPI) - CDC

In [21]:
race_data_clean.rename(columns={"Deaths_Total": "Total Deaths", "Deaths_White": "Deaths: White", "Deaths_Black":
                               "Deaths: Black", "Deaths_Latinx": "Deaths: Latinx", "Deaths_Asian": "Deaths: Asian",
                               "Deaths_AIAN": "Deaths: AIAN", "Deaths_NHPI": "Deaths: NHPI", "Deaths_Multiracial": "Deaths: Multiracial",
                               "Deaths_Other": "Deaths: Other", "Deaths_Ethnicity_Hispanic": "Deaths: Hispanic", "Deaths_Ethnicity_NonHispanic":
                               "Deaths: NonHispanic", "Deaths_Ethnicity_Unknown": "Deaths: Ethnicity Unknown"})

Unnamed: 0,Date,State,Total Deaths,Deaths: White,Deaths: Black,Deaths: Latinx,Deaths: Asian,Deaths: AIAN,Deaths: NHPI,Deaths: Multiracial,Deaths: Other,Deaths: Hispanic,Deaths: NonHispanic,Deaths: Ethnicity Unknown
0,20210307,AK,305.0,127.0,9.0,0.0,29.0,111.0,17.0,4.0,1.0,13.0,280.0,12.0
1,20210307,AL,10148.0,4730.0,2223.0,0.0,24.0,0.0,0.0,0.0,300.0,137.0,6083.0,3928.0
2,20210307,AR,5319.0,4171.0,784.0,0.0,36.0,18.0,56.0,13.0,203.0,186.0,5133.0,0.0
4,20210307,AZ,16328.0,8066.0,433.0,4684.0,220.0,1361.0,0.0,0.0,465.0,4684.0,10545.0,1099.0
5,20210307,CA,54124.0,16586.0,3275.0,24402.0,6105.0,184.0,322.0,693.0,1103.0,24402.0,28268.0,1454.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5315,20200412,VT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5316,20200412,WA,508.0,290.0,9.0,24.0,36.0,5.0,1.0,3.0,7.0,24.0,351.0,133.0
5317,20200412,WI,144.0,77.0,61.0,0.0,4.0,1.0,0.0,0.0,0.0,4.0,138.0,2.0
5318,20200412,WV,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
race_data_clean['Date'] = pd.to_datetime(race_data_clean['Date'].astype(str), format='%Y%m%d')
#race_data_clean

In [23]:
race_data_clean = race_data_clean.sort_values("Date", ascending=True)
#race_data_clean

In [24]:
race_data_clean = race_data_clean.reset_index(drop=True)
race_data_clean

Unnamed: 0,Date,State,Deaths_Total,Deaths_White,Deaths_Black,Deaths_Latinx,Deaths_Asian,Deaths_AIAN,Deaths_NHPI,Deaths_Multiracial,Deaths_Other,Deaths_Ethnicity_Hispanic,Deaths_Ethnicity_NonHispanic,Deaths_Ethnicity_Unknown
0,2020-04-12,WY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-04-12,MI,1483.0,535.0,594.0,0.0,14.0,0.0,0.0,14.0,29.0,14.0,951.0,520.0
2,2020-04-12,ME,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-04-12,MD,235.0,83.0,104.0,0.0,8.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0
4,2020-04-12,MA,756.0,199.0,20.0,28.0,13.0,0.0,0.0,0.0,16.0,28.0,49.0,480.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4745,2021-03-07,NH,1184.0,1110.0,13.0,30.0,12.0,1.0,0.0,2.0,12.0,30.0,1150.0,4.0
4746,2021-03-07,NE,2091.0,1360.0,58.0,0.0,30.0,26.0,1.0,0.0,26.0,161.0,1095.0,835.0
4747,2021-03-07,ND,1478.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1478.0
4748,2021-03-07,RI,2541.0,1816.0,104.0,207.0,37.0,1.0,0.0,1.0,1.0,207.0,1960.0,374.0


In [25]:
race_data_clean.to_csv("Folder2/race_data_clean.csv", index=False, header=True)

CDC by age

In [27]:
age_sex_data = pd.read_csv("Folder1/Death_Counts_by_Sex_Age_and_State.csv")
age_sex_data.head()

Unnamed: 0,Data As Of,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths",Footnote
0,4/28/21,1/1/20,4/24/21,By Total,,,United States,All Sexes,All Ages,557486.0,4355577.0,484412.0,272424.0,9083.0,777296.0,
1,4/28/21,1/1/20,4/24/21,By Total,,,United States,All Sexes,Under 1 year,66.0,23862.0,257.0,10.0,21.0,334.0,
2,4/28/21,1/1/20,4/24/21,By Total,,,United States,All Sexes,0-17 years,277.0,41811.0,703.0,54.0,179.0,1105.0,
3,4/28/21,1/1/20,4/24/21,By Total,,,United States,All Sexes,1-4 years,34.0,4286.0,145.0,6.0,61.0,234.0,
4,4/28/21,1/1/20,4/24/21,By Total,,,United States,All Sexes,5-14 years,98.0,6930.0,203.0,21.0,76.0,356.0,


In [31]:
by_age_group = age_sex_data.loc[(age_sex_data['Sex'] == 'All Sexes')]
age_month = by_age_group.loc[by_age_group['Group']=='By Month']
age_state = age_month.loc[age_month['State']!='Puerto Rico']
age_state = age_state.loc[age_state['State']!='United States']
age_state = age_state.loc[age_state['State']!='District of Columbia']
age_data = age_state.loc[age_state['Age Group']!='All Ages']
age_data.head()

Unnamed: 0,Data As Of,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths",Footnote
9079,4/28/21,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,Under 1 year,0.0,31.0,0.0,0.0,0.0,0.0,
9080,4/28/21,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,0-17 years,0.0,71.0,,0.0,,,One or more data cells have counts between 1-9...
9081,4/28/21,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,1-4 years,0.0,12.0,,0.0,0.0,,One or more data cells have counts between 1-9...
9082,4/28/21,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,5-14 years,0.0,16.0,,0.0,0.0,,One or more data cells have counts between 1-9...
9083,4/28/21,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,15-24 years,0.0,56.0,0.0,0.0,,,One or more data cells have counts between 1-9...


In [34]:
del age_data['Footnote']
del age_data['Data As Of']
age_data.head()

Unnamed: 0,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths"
9079,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,Under 1 year,0.0,31.0,0.0,0.0,0.0,0.0
9080,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,0-17 years,0.0,71.0,0.0,0.0,0.0,0.0
9081,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,1-4 years,0.0,12.0,0.0,0.0,0.0,0.0
9082,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,5-14 years,0.0,16.0,0.0,0.0,0.0,0.0
9083,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,15-24 years,0.0,56.0,0.0,0.0,0.0,0.0


In [35]:
age_data = age_data.fillna(0)
age_data.head()

Unnamed: 0,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths"
9079,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,Under 1 year,0.0,31.0,0.0,0.0,0.0,0.0
9080,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,0-17 years,0.0,71.0,0.0,0.0,0.0,0.0
9081,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,1-4 years,0.0,12.0,0.0,0.0,0.0,0.0
9082,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,5-14 years,0.0,16.0,0.0,0.0,0.0,0.0
9083,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,15-24 years,0.0,56.0,0.0,0.0,0.0,0.0


In [36]:
by_age_data = age_state.reset_index(drop=True)
by_age_data

Unnamed: 0,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths"
0,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,All Ages,0.0,4728.0,282.0,0.0,35.0,317.0
1,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,Under 1 year,0.0,31.0,0.0,0.0,0.0,0.0
2,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,0-17 years,0.0,71.0,,0.0,,
3,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,1-4 years,0.0,12.0,,0.0,0.0,
4,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,All Sexes,5-14 years,0.0,16.0,,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13867,4/1/21,4/24/21,By Month,2021.0,4.0,Wyoming,All Sexes,50-64 years,,36.0,0.0,0.0,0.0,
13868,4/1/21,4/24/21,By Month,2021.0,4.0,Wyoming,All Sexes,55-64 years,,33.0,0.0,0.0,0.0,
13869,4/1/21,4/24/21,By Month,2021.0,4.0,Wyoming,All Sexes,65-74 years,,36.0,,,0.0,
13870,4/1/21,4/24/21,By Month,2021.0,4.0,Wyoming,All Sexes,75-84 years,0.0,31.0,0.0,0.0,0.0,0.0


In [37]:
by_age_data.to_csv("Folder2/age_data_clean.csv", index=False, header=True)

CDC by sex


In [38]:
sex_data = pd.read_csv("Folder1/Death_Counts_by_Sex_Age_and_State.csv")
#sex_data.head()

In [39]:
all_ages = sex_data.loc[(sex_data['Age Group'] == 'All Ages')]
sex_month = all_ages.loc[all_ages['Group']=='By Month']
sex_state = sex_month.loc[sex_month['State']!='Puerto Rico']
sex_state = sex_state.loc[sex_state['State']!='United States']
sex_state = sex_state.loc[sex_state['State']!='District of Columbia']
sex_data = sex_state.loc[sex_state['Sex'] != 'All Sexes']
sex_data.head()

Unnamed: 0,Data As Of,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths",Footnote
9095,4/28/21,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,Male,All Ages,0.0,2400.0,151.0,0.0,23.0,174.0,
9112,4/28/21,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,Female,All Ages,0.0,2328.0,131.0,0.0,12.0,143.0,
9146,4/28/21,2/1/20,2/29/20,By Month,2020.0,2.0,Alabama,Male,All Ages,,2391.0,137.0,0.0,24.0,162.0,One or more data cells have counts between 1-9...
9163,4/28/21,2/1/20,2/29/20,By Month,2020.0,2.0,Alabama,Female,All Ages,0.0,2236.0,139.0,0.0,15.0,154.0,
9197,4/28/21,3/1/20,3/31/20,By Month,2020.0,3.0,Alabama,Male,All Ages,31.0,2489.0,164.0,,19.0,205.0,One or more data cells have counts between 1-9...


In [40]:
del sex_data['Footnote']
del sex_data['Data As Of']

sex_data = sex_data.fillna(0)
sex_data.head()


Unnamed: 0,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths"
9095,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,Male,All Ages,0.0,2400.0,151.0,0.0,23.0,174.0
9112,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,Female,All Ages,0.0,2328.0,131.0,0.0,12.0,143.0
9146,2/1/20,2/29/20,By Month,2020.0,2.0,Alabama,Male,All Ages,0.0,2391.0,137.0,0.0,24.0,162.0
9163,2/1/20,2/29/20,By Month,2020.0,2.0,Alabama,Female,All Ages,0.0,2236.0,139.0,0.0,15.0,154.0
9197,3/1/20,3/31/20,By Month,2020.0,3.0,Alabama,Male,All Ages,31.0,2489.0,164.0,0.0,19.0,205.0


In [41]:
sex_data = sex_data.reset_index(drop=True)
sex_data.head()

Unnamed: 0,Start Date,End Date,Group,Year,Month,State,Sex,Age Group,COVID-19 Deaths,Total Deaths,Pneumonia Deaths,Pneumonia and COVID-19 Deaths,Influenza Deaths,"Pneumonia, Influenza, or COVID-19 Deaths"
0,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,Male,All Ages,0.0,2400.0,151.0,0.0,23.0,174.0
1,1/1/20,1/31/20,By Month,2020.0,1.0,Alabama,Female,All Ages,0.0,2328.0,131.0,0.0,12.0,143.0
2,2/1/20,2/29/20,By Month,2020.0,2.0,Alabama,Male,All Ages,0.0,2391.0,137.0,0.0,24.0,162.0
3,2/1/20,2/29/20,By Month,2020.0,2.0,Alabama,Female,All Ages,0.0,2236.0,139.0,0.0,15.0,154.0
4,3/1/20,3/31/20,By Month,2020.0,3.0,Alabama,Male,All Ages,31.0,2489.0,164.0,0.0,19.0,205.0


In [42]:
sex_data.to_csv("Folder2/sex_data_clean.csv", index=False, header=True)