In [147]:
import json
import requests
import numpy as np
import pandas as pd
from sodapy import Socrata
import datetime as datetime
import matplotlib.pyplot as plt
from scipy.stats import linregress
pd.set_option('display.max_rows', None)

In [148]:
us_state_abbrev = {
    'United States': 'USA',
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York City':"NYC",
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

In [149]:
base_url = "https://data.cdc.gov/resource/9bhg-hcku.json"
response = requests.get(base_url)
cdc_data = response.json()

In [150]:
client = Socrata("data.cdc.gov", None)

results = client.get("9bhg-hcku", limit=6000)

results_df = pd.DataFrame.from_records(results)
#results_df



In [151]:
results_df = results_df.loc[results_df['group']=="By Total"]

In [152]:
sex_df = results_df.loc[results_df['age_group']=="All Ages"]

In [153]:
del sex_df['data_as_of']
del sex_df['start_date']
del sex_df['end_date']
del sex_df['group']
del sex_df['age_group']
del sex_df['total_deaths']
del sex_df['pneumonia_deaths']
del sex_df['pneumonia_and_covid_19_deaths']
del sex_df['influenza_deaths']
del sex_df['pneumonia_influenza_or_covid']
del sex_df['footnote']

In [154]:
male_df = sex_df.loc[sex_df["sex"]=="Male"]
#male_df

In [155]:
female_df = sex_df.loc[sex_df["sex"]=="Female"]
#female_df

In [156]:
merged_df = pd.merge(male_df, female_df, on='state')

In [157]:
#merged_df

In [158]:
all_df = sex_df.loc[sex_df["sex"]=="All Sexes"]
#all_df.head()

In [159]:
new_df = pd.merge(merged_df, all_df, how='left', on='state')

In [160]:
#new_df.head()

In [161]:
del new_df['sex_x']
del new_df['sex_y']
del new_df['sex']
del new_df['year']
del new_df['year_x']
del new_df['year_y']

In [162]:
new_df = new_df.rename(columns={"state":"State", "covid_19_deaths_x":"Male COVID-19 Deaths", "covid_19_deaths_y": "Female COVID-19 Deaths", "covid_19_deaths":"All Sexes COVID-19 Deaths"})

In [163]:
new_df = new_df.loc[new_df['State']!="Puerto Rico"]

In [164]:
#new_df

In [165]:
client = Socrata("data.cdc.gov", None)

results = client.get("9bhg-hcku", limit=6000)

results_df = pd.DataFrame.from_records(results)




In [166]:
results_df = results_df.loc[results_df['group']=="By Total"]

In [167]:
age_df = results_df.loc[results_df['sex']=="All Sexes"]

In [168]:
del age_df['data_as_of']
del age_df['start_date']
del age_df['end_date']
del age_df['group']
del age_df['sex']
del age_df['total_deaths']
del age_df['pneumonia_deaths']
del age_df['pneumonia_and_covid_19_deaths']
del age_df['influenza_deaths']
del age_df['pneumonia_influenza_or_covid']
del age_df['footnote']
del age_df['year']

In [169]:
#age_df

In [170]:
age_df["age_group"].value_counts()

25-34 years          54
30-39 years          54
All Ages             54
75-84 years          54
0-17 years           54
55-64 years          54
65-74 years          54
1-4 years            54
50-64 years          54
35-44 years          54
85 years and over    54
18-29 years          54
Under 1 year         54
45-54 years          54
40-49 years          54
15-24 years          54
5-14 years           54
Name: age_group, dtype: int64

In [171]:
age_df = age_df.loc[age_df['age_group'] != "18-29 years"]
age_df = age_df.loc[age_df['age_group'] != "0-17 years"]
age_df = age_df.loc[age_df['age_group'] != "Under 1 year"]
age_df = age_df.loc[age_df['age_group'] != "30-39 years"]
age_df = age_df.loc[age_df['age_group'] != "40-49 years"]
age_df = age_df.loc[age_df['age_group'] != "50-64 years"]

In [172]:
age_df = age_df.replace({"1-4 years":"Under 5 years"})
age_df['age_group'].value_counts()

25-34 years          54
35-44 years          54
55-64 years          54
15-24 years          54
45-54 years          54
Under 5 years        54
75-84 years          54
All Ages             54
85 years and over    54
5-14 years           54
65-74 years          54
Name: age_group, dtype: int64

In [173]:
#age_df.fillna(0)

In [174]:
under5_df = age_df.loc[age_df["age_group"]=="Under 5 years"]

In [175]:
year5_14_df = age_df.loc[age_df["age_group"]=="5-14 years"]

In [176]:
year15_24_df = age_df.loc[age_df["age_group"]=="15-24 years"]

In [177]:
year25_34_df = age_df.loc[age_df["age_group"]=="25-34 years"]

In [178]:
year35_44_df = age_df.loc[age_df["age_group"]=="35-44 years"]

In [179]:
year45_54_df = age_df.loc[age_df["age_group"]=="45-54 years"]

In [180]:
year55_64_df = age_df.loc[age_df["age_group"]=="55-64 years"]

In [181]:
year65_74_df = age_df.loc[age_df["age_group"]=="65-74 years"]

In [182]:
year75_84_df = age_df.loc[age_df["age_group"]=="75-84 years"]

In [183]:
year85_df = age_df.loc[age_df["age_group"]=="85 years and over"]

In [184]:
all_ages_df = age_df.loc[age_df["age_group"]=="All Ages"]

In [185]:
merged_age = pd.merge(under5_df, year5_14_df, on='state')
merged_age = pd.merge(merged_age, year15_24_df, on ='state')
#merged_age.head()

In [186]:
merged_age = merged_age.rename(columns={
                                "covid_19_deaths_x":"Under 5 Years COVID-19 Deaths", 
                                "covid_19_deaths_y": "5-14 Years COVID-19 Deaths", 
                                "covid_19_deaths":"15-24 Years COVID-19 Deaths"
                                       })
del merged_age['age_group_x']
del merged_age['age_group_y']
del merged_age['age_group']
#merged_age.head()

In [187]:
merged_age = pd.merge(merged_age, year25_34_df, on ='state')
merged_age = pd.merge(merged_age, year35_44_df, on ='state')
merged_age = merged_age.rename(columns={
                                "covid_19_deaths_x":"25-34 Years COVID-19 Deaths", 
                                "covid_19_deaths_y": "35-44 Years COVID-19 Deaths"
                                       })
del merged_age['age_group_x']
del merged_age['age_group_y']
#merged_age.head()

In [188]:
merged_age = pd.merge(merged_age, year45_54_df, on ='state')
merged_age = pd.merge(merged_age, year55_64_df, on ='state')
merged_age = merged_age.rename(columns={
                                "covid_19_deaths_x":"45-54 Years COVID-19 Deaths", 
                                "covid_19_deaths_y": "55-64 Years COVID-19 Deaths"
                                       })
del merged_age['age_group_x']
del merged_age['age_group_y']
#merged_age.head()

In [189]:
merged_age = pd.merge(merged_age, year65_74_df, on ='state')
merged_age = pd.merge(merged_age, year75_84_df, on ='state')
merged_age = merged_age.rename(columns={
                                "covid_19_deaths_x":"65-74 Years COVID-19 Deaths", 
                                "covid_19_deaths_y": "75-84 Years COVID-19 Deaths"
                                       })
del merged_age['age_group_x']
del merged_age['age_group_y']
#merged_age.head()

In [190]:
merged_age = pd.merge(merged_age, year85_df, on ='state')
merged_age = pd.merge(merged_age, all_ages_df, on='state')
merged_age = merged_age.rename(columns={
                                "covid_19_deaths_x":"85 Years and Over COVID-19 Deaths",
                                "covid_19_deaths_y":"All Ages COVID-19 Deaths"
                                       })
del merged_age['age_group_x']
del merged_age['age_group_y']
#merged_age.head()

In [191]:
merged_age = merged_age.fillna(0)

In [192]:
merged_age = merged_age.loc[merged_age['state']!='Puerto Rico']

In [193]:
merged_age = merged_age.rename(columns={"state":"State"})
#merged_age.head()

In [194]:
age_sex_df = pd.merge(new_df, merged_age, on='State')
#age_sex_df

In [195]:
age_sex_df = age_sex_df.fillna(0)
#age_sex_df

In [196]:
for index, row in age_sex_df.iterrows():
    state = row[0]
    age_sex_df.loc[index,"State"] = us_state_abbrev[state]

In [197]:
#age_sex_df

In [198]:
#age_sex_df.to_csv("../data-file/total-age-sex-data.csv", index=False, header=True)

In [199]:
population_df = pd.read_csv("../jennie-folder/Population_byCDCAgeGroup_byState.csv")

In [200]:
for index, row in population_df.iterrows():
    state = row[2]
    population_df.loc[index,"State"] = us_state_abbrev[state]

In [201]:
population_df = population_df[['State', 'CDC_AgeGroups', 'Population']]

In [202]:

under5_pop = population_df.loc[population_df['CDC_AgeGroups']=="Under 5 years"]
years5_14_pop = population_df.loc[population_df['CDC_AgeGroups']=="5-14 years"]
years15_24_pop = population_df.loc[population_df['CDC_AgeGroups']=="15-24 years"]
years25_34_pop = population_df.loc[population_df['CDC_AgeGroups']=="25-34 years"]
years35_44_pop = population_df.loc[population_df['CDC_AgeGroups']=="35-44 years"]
years45_54_pop = population_df.loc[population_df['CDC_AgeGroups']=="45-54 years"]
years55_64_pop = population_df.loc[population_df['CDC_AgeGroups']=="55-64 years"]
years65_74_pop = population_df.loc[population_df['CDC_AgeGroups']=="65-74 years"]
years75_84_pop = population_df.loc[population_df['CDC_AgeGroups']=="75-84 years"]
years85_pop = population_df.loc[population_df['CDC_AgeGroups']=="85 years and over"]
all_ages_pop = population_df.loc[population_df['CDC_AgeGroups']=="Total"]



In [203]:
merged_pop = pd.merge(under5_pop, years5_14_pop, on ='State')
merged_pop = pd.merge(merged_pop, years15_24_pop, on='State')
merged_pop = merged_pop.rename(columns={
                                "Population_x":"Under 5 Years Population",
                                "Population_y":"5-14 Years Population",
                                "Population":"15-24 Years Population"
                                       })
del merged_pop['CDC_AgeGroups_x']
del merged_pop['CDC_AgeGroups_y']
del merged_pop['CDC_AgeGroups']
#merged_pop.head()

In [204]:
merged_pop = pd.merge(merged_pop, years25_34_pop, on ='State')
merged_pop = pd.merge(merged_pop, years35_44_pop, on='State')
merged_pop = pd.merge(merged_pop, years45_54_pop, on='State')
merged_pop = merged_pop.rename(columns={
                                "Population_x":"25-34 Years Population",
                                "Population_y":"35-44 Years Population",
                                "Population":"45-54 Years Population"
                                       })
del merged_pop['CDC_AgeGroups_x']
del merged_pop['CDC_AgeGroups_y']
del merged_pop['CDC_AgeGroups']
#merged_pop.head()

In [205]:
merged_pop = pd.merge(merged_pop, years55_64_pop, on ='State')
merged_pop = pd.merge(merged_pop, years65_74_pop, on='State')
merged_pop = pd.merge(merged_pop, years75_84_pop, on='State')
merged_pop = merged_pop.rename(columns={
                                "Population_x":"55-64 Years Population",
                                "Population_y":"65-74 Years Population",
                                "Population":"75-84 Years Population"
                                       })
del merged_pop['CDC_AgeGroups_x']
del merged_pop['CDC_AgeGroups_y']
del merged_pop['CDC_AgeGroups']
#merged_pop.head()

In [206]:
merged_pop = pd.merge(merged_pop, years85_pop, on ='State')
merged_pop = pd.merge(merged_pop, all_ages_pop, on ='State')


merged_pop = merged_pop.rename(columns={
                                "Population_x":"85 Years and Over Population",
                                "Population_y":"Total Population"
                                       })

del merged_pop['CDC_AgeGroups_x']
del merged_pop['CDC_AgeGroups_y']
#merged_pop

In [207]:
merged_pop['Total Population'].sum()

327533774.0

In [208]:
new_row_pop = {'State':'USA', 'Under 5 Years Population':19531315.0, 
              '5-14 Years Population':40926765.0, 
              '15-24 Years Population':42599542.0,
              '25-34 Years Population':45775780.0,
              '35-44 Years Population':41550175.0,
              '45-54 Years Population':40800616.0,
              '55-64 Years Population':42378661.0,
              '65-74 Years Population':31433214.0,
              '75-84 Years Population':15944990.0,
              '85 Years and Over Population':6592716.0,
                'Total Population': 327533774.0
              
             }
#append row to the dataframe
merged_pop = merged_pop.append(new_row_pop, ignore_index=True)

In [209]:
new_dfff = pd.merge(age_sex_df, merged_pop, on='State')

In [210]:
new_dfff = new_dfff[['State', 'Male COVID-19 Deaths', 'Female COVID-19 Deaths', 'All Sexes COVID-19 Deaths',
                     'Under 5 Years COVID-19 Deaths', 'Under 5 Years Population',
                    '5-14 Years COVID-19 Deaths', '5-14 Years Population',
                    '15-24 Years COVID-19 Deaths', '15-24 Years Population',
                     '25-34 Years COVID-19 Deaths', '25-34 Years Population',
                     '35-44 Years COVID-19 Deaths', '35-44 Years Population',
                     '45-54 Years COVID-19 Deaths', '45-54 Years Population',
                     '55-64 Years COVID-19 Deaths', '55-64 Years Population',
                     '65-74 Years COVID-19 Deaths', '65-74 Years Population',
                     '75-84 Years COVID-19 Deaths', '75-84 Years Population',
                     '85 Years and Over COVID-19 Deaths', '85 Years and Over Population',
                     'All Ages COVID-19 Deaths', 'Total Population'
                    ]]
new_dfff.head()

Unnamed: 0,State,Male COVID-19 Deaths,Female COVID-19 Deaths,All Sexes COVID-19 Deaths,Under 5 Years COVID-19 Deaths,Under 5 Years Population,5-14 Years COVID-19 Deaths,5-14 Years Population,15-24 Years COVID-19 Deaths,15-24 Years Population,...,55-64 Years COVID-19 Deaths,55-64 Years Population,65-74 Years COVID-19 Deaths,65-74 Years Population,75-84 Years COVID-19 Deaths,75-84 Years Population,85 Years and Over COVID-19 Deaths,85 Years and Over Population,All Ages COVID-19 Deaths,Total Population
0,USA,317561,261468,579029,36,19531315.0,108,40926765.0,905,42599542.0,...,72612,42378661.0,128643,31433214.0,159427,15944990.0,174000,6592716.0,579029,327533774.0
1,AL,5723,4982,10705,0,294357.0,0,608466.0,11,635707.0,...,1493,657667.0,2787,501447.0,3070,256847.0,2455,91543.0,10705,4903185.0
2,AK,198,140,338,0,51080.0,0,100662.0,0,96393.0,...,41,93339.0,86,61300.0,99,23107.0,78,7181.0,338,731545.0
3,AZ,9277,6482,15759,0,429788.0,0,930745.0,48,973547.0,...,2299,880794.0,3761,751699.0,4493,411197.0,3548,145737.0,15759,7278717.0
4,AR,3107,2876,5983,0,188464.0,0,393213.0,0,397538.0,...,728,385994.0,1347,302829.0,1767,161141.0,1743,59912.0,5983,3017804.0


In [211]:
#new_dfff.to_csv("../data-file/total-pop-death-data.csv", index=False, header=True)

In [212]:
base_url = "https://api.census.gov/data/2019/acs/acs1?get=NAME,B01001_002E,B01001_026E&for=state:*"
response = requests.get(base_url)


response = requests.get(base_url)
census_data = response.json()

In [213]:
df = pd.DataFrame(census_data[1:], columns=census_data[0])
df = df.sort_values(by=['state'])

In [214]:
df = df.rename(columns={"B01001_002E":'Male Population', 'B01001_026E':'Female Population'})

In [215]:
df["Male Population"] = pd.to_numeric(df["Male Population"], downcast="float")
df["Female Population"] = pd.to_numeric(df["Female Population"], downcast="float")

In [216]:
new_row = {'NAME':'United States', 'Male Population':163104960.0, 
              'Female Population':168328240.0
             }
#append row to the dataframe
df = df.append(new_row, ignore_index=True)

In [217]:
df = df.drop(columns=['state'])

In [218]:
df = df.drop([8, 51])

In [219]:
df = df.reset_index(drop=True)

In [220]:
for index, row in df.iterrows():
    state = row[0]
    df.loc[index,"NAME"] = us_state_abbrev[state]

In [221]:
df = df.rename(columns={'NAME': 'State'})

In [222]:
pop_merged = pd.merge(df, new_dfff, on = 'State')

In [223]:
pop_merged = pop_merged[['State', 'Male COVID-19 Deaths', 'Male Population', 'Female COVID-19 Deaths', 'Female Population',
                     'All Sexes COVID-19 Deaths', 'Under 5 Years COVID-19 Deaths', 'Under 5 Years Population',
                    '5-14 Years COVID-19 Deaths', '5-14 Years Population',
                    '15-24 Years COVID-19 Deaths', '15-24 Years Population',
                     '25-34 Years COVID-19 Deaths', '25-34 Years Population',
                     '35-44 Years COVID-19 Deaths', '35-44 Years Population',
                     '45-54 Years COVID-19 Deaths', '45-54 Years Population',
                     '55-64 Years COVID-19 Deaths', '55-64 Years Population',
                     '65-74 Years COVID-19 Deaths', '65-74 Years Population',
                     '75-84 Years COVID-19 Deaths', '75-84 Years Population',
                     '85 Years and Over COVID-19 Deaths', '85 Years and Over Population',
                     'All Ages COVID-19 Deaths', 'Total Population'
                    ]]
#pop_merged.head()

In [224]:
sex_df = pop_merged[["State", "Male COVID-19 Deaths", "Male Population", "Female COVID-19 Deaths", "Female Population"]]
sex_df.head()
sex_df["Male COVID-19 Deaths"] = pd.to_numeric(sex_df["Male COVID-19 Deaths"], downcast="float")
sex_df["Female COVID-19 Deaths"] = pd.to_numeric(sex_df["Female COVID-19 Deaths"], downcast="float")
#sex_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sex_df["Male COVID-19 Deaths"] = pd.to_numeric(sex_df["Male COVID-19 Deaths"], downcast="float")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sex_df["Female COVID-19 Deaths"] = pd.to_numeric(sex_df["Female COVID-19 Deaths"], downcast="float")


In [225]:
sex_df['Crude Male Mortality Rate (%)'] = sex_df['Male COVID-19 Deaths']/sex_df['Male Population']*100
sex_df['Crude Female Mortality Rate (%)'] = sex_df['Female COVID-19 Deaths']/sex_df['Female Population']*100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sex_df['Crude Male Mortality Rate (%)'] = sex_df['Male COVID-19 Deaths']/sex_df['Male Population']*100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sex_df['Crude Female Mortality Rate (%)'] = sex_df['Female COVID-19 Deaths']/sex_df['Female Population']*100


In [226]:
sex_df = sex_df[['State', 'Male COVID-19 Deaths', 'Male Population', 'Crude Male Mortality Rate (%)', 'Female COVID-19 Deaths', 'Female Population', 'Crude Female Mortality Rate (%)']]

In [227]:
sex_df['total pop'] = sex_df['Male Population'] + sex_df['Female Population']
#sex_df.head()

In [228]:
sex_df['Male Population (%)'] = sex_df['Male Population']/sex_df['total pop'] *100
sex_df['Female Population (%)'] = sex_df['Female Population']/sex_df['total pop']*100
#sex_df.head()

In [229]:
sex_df = sex_df[['State', 'Male Population (%)', 'Crude Male Mortality Rate (%)', 'Female Population (%)', 'Crude Female Mortality Rate (%)']]
#sex_df.head()

In [230]:
#sex_df.to_csv("../jennie-folder/demographic_sex.csv", index=False, header=True)

In [231]:
race_url = "https://data.cdc.gov/resource/pj7m-y5uh.json"
response = requests.get(race_url)
race_data = response.json()

In [232]:
client = Socrata("data.cdc.gov", None)

race_results = client.get("pj7m-y5uh", limit=6000)

race_df = pd.DataFrame.from_records(race_results)



In [233]:
race_df = race_df.loc[race_df['group']=="By Total"]
race_df = race_df.loc[race_df['indicator']=="Count of COVID-19 deaths"]

In [234]:
race_df = race_df.drop(columns=['data_as_of', 'start_week', 'end_week', 'year', 'group', 'indicator', 'month', 'footnote'])

In [235]:
race_df = race_df.rename(columns={'state':'State', 'non_hispanic_white': "White Deaths",
                                  'non_hispanic_black_african_american':'Black/African American Deaths',
                                 'non_hispanic_american_indian_alaska_native':'American Indian/Alaska Native Deaths',
                                  'non_hispanic_asian_pacific_islander':'Asian Pacific Islander Deaths',
                                  'nh_nhopi':'Native Hawaiian/Other Pacific Islander Deaths',
                                  'non_hispanic_more_than_one_race':'More than one Race Deaths',
                                  'hispanic_latino_total':'Hispanic/Latino Deaths'
                                 })
#race_df.head()

In [236]:
race_df = race_df.reset_index(drop=True)

In [237]:
for index, row in race_df.iterrows():
    state = row[0]
    race_df.loc[index,"State"] = us_state_abbrev[state]

In [238]:
race_df = race_df.fillna(0)

In [239]:
race_pop_url = "https://api.census.gov/data/2019/acs/acs1?get=NAME,B01001A_001E,B01001B_001E,B01001C_001E,B01001D_001E,B01001E_001E,B01001G_002E,B01001I_001E&for=state:*"
response = requests.get(race_pop_url)


race_pop_response = requests.get(race_pop_url)
race_pop_data = race_pop_response.json()

In [240]:
race_pop_df = pd.DataFrame(race_pop_data[1:], columns=race_pop_data[0])
race_pop_df = race_pop_df.sort_values(by=['state'])

In [241]:
race_pop_df = race_pop_df.reset_index(drop=True)

In [242]:
race_pop_df = race_pop_df.rename(columns={'NAME':'State','B01001A_001E':'White Population',
                                          'B01001B_001E':'Black/African American Population',
                                          'B01001C_001E':'American Indian/Alaska Native Population',
                                          'B01001D_001E':'Asian Population',
                                          'B01001E_001E':'Native Hawaiian/Other Pacific Islander Population',
                                          'B01001G_002E':'More than one Race Population',
                                          'B01001I_001E':'Hispanic/Latino Population'
                                         })
#race_pop_df.head()

In [243]:
race_pop_df = race_pop_df.fillna(0)

In [244]:
race_pop_df["White Population"] = pd.to_numeric(race_pop_df["White Population"], downcast="float")
race_pop_df["Black/African American Population"] = pd.to_numeric(race_pop_df["Black/African American Population"], downcast="float")
race_pop_df["American Indian/Alaska Native Population"] = pd.to_numeric(race_pop_df["American Indian/Alaska Native Population"], downcast="float")
race_pop_df["Asian Population"] = pd.to_numeric(race_pop_df["Asian Population"], downcast="float")
race_pop_df["Native Hawaiian/Other Pacific Islander Population"] = pd.to_numeric(race_pop_df["Native Hawaiian/Other Pacific Islander Population"], downcast="float")
race_pop_df["More than one Race Population"] = pd.to_numeric(race_pop_df["More than one Race Population"], downcast="float")
race_pop_df["Hispanic/Latino Population"] = pd.to_numeric(race_pop_df["Hispanic/Latino Population"], downcast="float")

In [245]:
new_row_race = {'State':'United States', 'White Population':238530160.0, 
              'Black/African American Population':42357500.0,
               'American Indian/Alaska Native Population':2815045.0,
               'Asian Population':18632352.0,
               'Native Hawaiian/Other Pacific Islander Population':496290.0,
               'More than one Race Population':5752848.0,
               'Hispanic/Latino Population':63616508.0
             }
#append row to the dataframe
race_pop_df = race_pop_df.append(new_row_race, ignore_index=True)
#race_pop_df.head()

In [246]:
race_pop_df = race_pop_df.drop(columns=['state'])

In [247]:
race_pop_df = race_pop_df.drop([8, 51])

In [248]:
race_pop_df = race_pop_df.reset_index(drop=True)

In [249]:
for index, row in race_pop_df.iterrows():
    state = row[0]
    race_pop_df.loc[index,"State"] = us_state_abbrev[state]

In [250]:
#race_pop_df

In [251]:
race_merge = pd.merge(race_df, race_pop_df, on ='State')

In [252]:
race_merge = race_merge[['State', 
                         'White Deaths', 'White Population', 
                         'Black/African American Deaths', 'Black/African American Population',
                         'American Indian/Alaska Native Deaths', 'American Indian/Alaska Native Population', 
                         'Asian Pacific Islander Deaths', 'Asian Population',
                         'Native Hawaiian/Other Pacific Islander Deaths', 'Native Hawaiian/Other Pacific Islander Population',
                         'More than one Race Deaths', 'More than one Race Population',
                         'Hispanic/Latino Deaths', 'Hispanic/Latino Population'
                    ]]
race_merge.head()

Unnamed: 0,State,White Deaths,White Population,Black/African American Deaths,Black/African American Population,American Indian/Alaska Native Deaths,American Indian/Alaska Native Population,Asian Pacific Islander Deaths,Asian Population,Native Hawaiian/Other Pacific Islander Deaths,Native Hawaiian/Other Pacific Islander Population,More than one Race Deaths,More than one Race Population,Hispanic/Latino Deaths,Hispanic/Latino Population
0,USA,351618,238530160.0,86569,42357500.0,6533,2815045.0,22313,18632352.0,1057,496290.0,1888,5752848.0,107105,63616508.0
1,AL,7280,3326375.0,3150,1319551.0,17,23265.0,48,66129.0,0,0.0,0,42835.0,191,219296.0
2,AK,135,469771.0,0,22551.0,114,115544.0,34,43678.0,20,0.0,10,28459.0,15,52548.0
3,AZ,8116,5701810.0,509,343729.0,1609,332273.0,328,241721.0,36,14168.0,143,146550.0,4863,2310592.0
4,AR,4688,2315020.0,890,467468.0,40,17216.0,47,46078.0,58,12829.0,24,42358.0,236,231951.0


In [253]:
demographic_age = pd.merge(pop_merged, race_merge, how='left', on='State')
#demographic_age.head()

In [254]:
demographic_df = pd.read_csv("../jennie-folder/CRDT Data - CRDT.csv")

In [255]:
print(demographic_df.columns.tolist())

['Date', 'State', 'Cases_Total', 'Cases_White', 'Cases_Black', 'Cases_Latinx', 'Cases_Asian', 'Cases_AIAN', 'Cases_NHPI', 'Cases_Multiracial', 'Cases_Other', 'Cases_Unknown', 'Cases_Ethnicity_Hispanic', 'Cases_Ethnicity_NonHispanic', 'Cases_Ethnicity_Unknown', 'Deaths_Total', 'Deaths_White', 'Deaths_Black', 'Deaths_Latinx', 'Deaths_Asian', 'Deaths_AIAN', 'Deaths_NHPI', 'Deaths_Multiracial', 'Deaths_Other', 'Deaths_Unknown', 'Deaths_Ethnicity_Hispanic', 'Deaths_Ethnicity_NonHispanic', 'Deaths_Ethnicity_Unknown', 'Hosp_Total', 'Hosp_White', 'Hosp_Black', 'Hosp_Latinx', 'Hosp_Asian', 'Hosp_AIAN', 'Hosp_NHPI', 'Hosp_Multiracial', 'Hosp_Other', 'Hosp_Unknown', 'Hosp_Ethnicity_Hispanic', 'Hosp_Ethnicity_NonHispanic', 'Hosp_Ethnicity_Unknown', 'Tests_Total', 'Tests_White', 'Tests_Black', 'Tests_Latinx', 'Tests_Asian', 'Tests_AIAN', 'Tests_NHPI', 'Tests_Multiracial', 'Tests_Other', 'Tests_Unknown', 'Tests_Ethnicity_Hispanic', 'Tests_Ethnicity_NonHispanic', 'Tests_Ethnicity_Unknown']


In [256]:
us_row = {'State':'USA', 'Cases_Total':973233885.0, 
              'Cases_White':328779224.0,
          'Cases_Black':90625703.0,
          'Cases_Latinx':126799688.0,
          'Cases_Asian':17872268.0,
          'Cases_AIAN':7717493.0,
          'Cases_NHPI':2036023.0,
          'Cases_Multiracial':4972311.0,
          'Cases_Ethnicity_Hispanic':155679737.0
             }
#append row to the dataframe
demographic_df = demographic_df.append(us_row, ignore_index=True)

In [257]:
demographic_df = demographic_df.groupby('State').sum()
demographic_df = demographic_df[["Cases_Total", "Cases_White", "Cases_Black","Cases_Latinx", "Cases_Asian", "Cases_AIAN", "Cases_NHPI", "Cases_Multiracial", "Cases_Ethnicity_Hispanic"]]
#demographic_df

In [258]:
death_rate_eth = pd.merge(race_merge, demographic_df, on='State')

In [259]:
death_rate_eth = death_rate_eth[['State',
                         'Cases_White','White Deaths', 'White Population', 
                         'Cases_Black', 'Black/African American Deaths', 'Black/African American Population',
                         'Cases_AIAN','American Indian/Alaska Native Deaths', 'American Indian/Alaska Native Population', 
                         'Cases_Asian','Asian Pacific Islander Deaths', 'Asian Population',
                         'Cases_NHPI','Native Hawaiian/Other Pacific Islander Deaths', 'Native Hawaiian/Other Pacific Islander Population',
                         'Cases_Multiracial', 'More than one Race Deaths', 'More than one Race Population',
                         'Cases_Ethnicity_Hispanic','Hispanic/Latino Deaths', 'Hispanic/Latino Population'
                    ]]
death_rate_eth.head()

death_rate_eth['total pop'] = death_rate_eth['White Population'] + death_rate_eth['Black/African American Population'] + death_rate_eth['American Indian/Alaska Native Population'] + death_rate_eth['Asian Population'] + death_rate_eth['Native Hawaiian/Other Pacific Islander Population'] + death_rate_eth['More than one Race Population'] + death_rate_eth['Hispanic/Latino Population']
#death_rate_eth.head()

In [260]:
death_rate_eth["White Deaths"] = pd.to_numeric(death_rate_eth["White Deaths"], downcast="float")
death_rate_eth["Black/African American Deaths"] = pd.to_numeric(death_rate_eth["Black/African American Deaths"], downcast="float")
death_rate_eth["American Indian/Alaska Native Deaths"] = pd.to_numeric(death_rate_eth["American Indian/Alaska Native Deaths"], downcast="float")
death_rate_eth["Native Hawaiian/Other Pacific Islander Deaths"] = pd.to_numeric(death_rate_eth["Native Hawaiian/Other Pacific Islander Deaths"], downcast="float")
death_rate_eth["Asian Pacific Islander Deaths"] = pd.to_numeric(death_rate_eth["Asian Pacific Islander Deaths"], downcast="float")
death_rate_eth["More than one Race Deaths"] = pd.to_numeric(death_rate_eth["More than one Race Deaths"], downcast="float")
death_rate_eth["Hispanic/Latino Deaths"] = pd.to_numeric(death_rate_eth["Hispanic/Latino Deaths"], downcast="float")

In [261]:
white_dr = death_rate_eth['White Deaths']/death_rate_eth['Cases_White']*100
death_rate_eth['White Fatality Rate (%)'] = white_dr

white_pop = death_rate_eth['White Population']/death_rate_eth['total pop']*100
death_rate_eth['White Population (%)'] = white_pop

black_dr = death_rate_eth['Black/African American Deaths']/death_rate_eth['Cases_Black']*100
death_rate_eth['Black Fatality Rate (%)'] = black_dr

black_pop = death_rate_eth['Black/African American Population']/death_rate_eth['total pop']*100
death_rate_eth['Black/African American Population (%)'] = black_pop

AIAN_dr = death_rate_eth['American Indian/Alaska Native Deaths']/death_rate_eth['Cases_AIAN']*100
death_rate_eth['AIAN Fatality Rate (%)'] = AIAN_dr

AIAN_pop = death_rate_eth['American Indian/Alaska Native Population']/death_rate_eth['total pop']*100
death_rate_eth['AIAN Population (%)'] = AIAN_pop

ASIAN_dr = death_rate_eth['Asian Pacific Islander Deaths']/death_rate_eth['Cases_Asian']*100
death_rate_eth['Asian Fatality Rate (%)'] = ASIAN_dr

asian_pop = death_rate_eth['Asian Population']/death_rate_eth['total pop']*100
death_rate_eth['Asian Population (%)'] = asian_pop

NHPI_dr = death_rate_eth['Native Hawaiian/Other Pacific Islander Deaths']/death_rate_eth['Cases_NHPI']*100
death_rate_eth['NHPI Fatality Rate (%)'] = NHPI_dr

NHPI_pop = death_rate_eth['Native Hawaiian/Other Pacific Islander Population']/death_rate_eth['total pop']*100
death_rate_eth['NHPI Population (%)'] = NHPI_pop

multiple_dr = death_rate_eth['More than one Race Deaths']/death_rate_eth['Cases_Multiracial']*100
death_rate_eth['Multiracial Fatality Rate (%)'] = multiple_dr

multiple_pop = death_rate_eth['More than one Race Population']/death_rate_eth['total pop']*100
death_rate_eth['Multiracial Population (%)'] = multiple_pop

hispanic_dr = death_rate_eth['Hispanic/Latino Deaths']/death_rate_eth['Cases_Ethnicity_Hispanic']*100
death_rate_eth['Hispanic Fatality Rate (%)'] = hispanic_dr

hispanic_pop = death_rate_eth['Hispanic/Latino Population']/death_rate_eth['total pop']*100
death_rate_eth['Hispanic Population (%)'] = hispanic_pop

In [262]:
death_rate_eth = death_rate_eth[['State',
                         'White Fatality Rate (%)', 'White Population (%)',
                         'Black Fatality Rate (%)', 'Black/African American Population (%)',
                         'AIAN Fatality Rate (%)', 'AIAN Population (%)',
                         'Asian Fatality Rate (%)', 'Asian Population (%)',
                         'NHPI Fatality Rate (%)', 'NHPI Population (%)',
                         'Multiracial Fatality Rate (%)', 'Multiracial Population (%)',
                         'Hispanic Fatality Rate (%)', 'Hispanic Population (%)'
                    ]]

death_rate_eth = death_rate_eth.replace([np.inf, -np.inf], np.nan)
death_rate_eth = death_rate_eth.fillna(0)
#death_rate_eth

In [263]:
#death_rate_eth.to_csv("../jennie-folder/demographic_ethnicity.csv", index=False, header=True)

In [264]:
aged_df = demographic_age[["State", "Under 5 Years Population", "5-14 Years Population", "15-24 Years Population", "25-34 Years Population", "35-44 Years Population", "45-54 Years Population", "55-64 Years Population", "65-74 Years Population", "65-74 Years COVID-19 Deaths", "75-84 Years Population","75-84 Years COVID-19 Deaths", "85 Years and Over Population", "85 Years and Over COVID-19 Deaths", "Total Population"]]
#aged_df.head()

In [265]:
aged_df["65-74 Years COVID-19 Deaths"] = aged_df["65-74 Years COVID-19 Deaths"].astype(float)
aged_df["75-84 Years COVID-19 Deaths"] = aged_df["75-84 Years COVID-19 Deaths"].astype(float)
aged_df["85 Years and Over COVID-19 Deaths"] = aged_df["85 Years and Over COVID-19 Deaths"].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aged_df["65-74 Years COVID-19 Deaths"] = aged_df["65-74 Years COVID-19 Deaths"].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aged_df["75-84 Years COVID-19 Deaths"] = aged_df["75-84 Years COVID-19 Deaths"].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aged_df["85 Years

In [266]:
aged_df['65 Years and Over Population'] = aged_df['65-74 Years Population'] + aged_df['75-84 Years Population'] + aged_df['85 Years and Over Population']
#aged_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aged_df['65 Years and Over Population'] = aged_df['65-74 Years Population'] + aged_df['75-84 Years Population'] + aged_df['85 Years and Over Population']


In [267]:
aged_df['65 Years and Over COVID Deaths'] = aged_df['65-74 Years COVID-19 Deaths'] + aged_df['75-84 Years COVID-19 Deaths'] + aged_df['85 Years and Over COVID-19 Deaths']
#aged_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aged_df['65 Years and Over COVID Deaths'] = aged_df['65-74 Years COVID-19 Deaths'] + aged_df['75-84 Years COVID-19 Deaths'] + aged_df['85 Years and Over COVID-19 Deaths']


In [268]:
population_over = (aged_df['65 Years and Over Population']/53970920.0)*10
aged_df["Population % for 65 Years and Over"] = population_over

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aged_df["Population % for 65 Years and Over"] = population_over


In [269]:
pop_perc = aged_df['65 Years and Over Population']/aged_df['Total Population'] *100
aged_df["Over 65 Population (%)"] = pop_perc

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aged_df["Over 65 Population (%)"] = pop_perc


In [270]:
deaths_data = (aged_df['65 Years and Over COVID Deaths']/aged_df['Total Population'] * 100)
aged_df["65 Years and Over Crude Mortality Rate (%)"] = deaths_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aged_df["65 Years and Over Crude Mortality Rate (%)"] = deaths_data


In [271]:
aged_df = aged_df[["State", "Over 65 Population (%)", "65 Years and Over Crude Mortality Rate (%)" ]]
#aged_df

In [272]:
#aged_df.to_csv("../jennie-folder/demographic_age.csv", index=False, header=True)

In [273]:
merge_dff = pd.merge(aged_df, sex_df, how='left', on='State')
#merge_dff

In [274]:
final_df = pd.merge(merge_dff, death_rate_eth, how='left', on='State')
final_df = final_df.round(decimals=2)
final_df.head()

Unnamed: 0,State,Over 65 Population (%),65 Years and Over Crude Mortality Rate (%),Male Population (%),Crude Male Mortality Rate (%),Female Population (%),Crude Female Mortality Rate (%),White Fatality Rate (%),White Population (%),Black Fatality Rate (%),...,AIAN Fatality Rate (%),AIAN Population (%),Asian Fatality Rate (%),Asian Population (%),NHPI Fatality Rate (%),NHPI Population (%),Multiracial Fatality Rate (%),Multiracial Population (%),Hispanic Fatality Rate (%),Hispanic Population (%)
0,AL,17.33,0.17,48.33,0.24,51.67,0.2,0.13,66.56,0.09,...,0.0,0.47,0.07,1.32,0.0,0.0,0.0,0.86,0.03,4.39
1,AK,12.52,0.04,52.0,0.05,48.0,0.04,0.02,64.13,0.0,...,0.03,15.77,0.05,5.96,0.04,0.0,0.01,3.88,0.01,7.17
2,AZ,17.98,0.16,49.75,0.26,50.25,0.18,0.1,62.72,0.06,...,0.11,3.66,0.1,2.66,0.0,0.16,0.0,1.61,0.06,25.42
3,AR,17.36,0.16,48.87,0.21,51.13,0.19,0.07,73.89,0.05,...,0.13,0.55,0.04,1.47,0.03,0.41,0.04,1.35,0.02,7.4
4,CA,14.78,0.12,49.71,0.19,50.29,0.13,0.13,48.24,0.12,...,0.12,0.66,0.16,12.05,0.08,0.32,0.04,2.05,0.07,31.99


In [275]:
test_url = "https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=vaccination_data"
response = requests.get(test_url)
test_data = response.json()

client = Socrata("data.cdc.gov", None)


results = client.get("9mfq-cb36", limit=25000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)



In [276]:
r_dff = results_df[["submission_date","state","tot_cases","new_case","tot_death","new_death"]]
new_cases = r_dff["new_case"].astype(float)
total_cases = r_dff["tot_cases"].astype(float)
total_deaths = r_dff["tot_death"].astype(float)
new_deaths = r_dff["new_death"].astype(float)

In [277]:
date = r_dff["submission_date"]
states = r_dff["state"]
new_dfff = pd.DataFrame({"Date":date,"Year":"", "Month":"","Week":"","State": states,"Covid_19_Cases": new_cases, "Cum_Covid_19_Cases":total_cases,"Covid_19_Deaths": new_deaths,"Cum_Covid19_Deaths": total_deaths,"Death_Rate":100*total_deaths/total_cases})

In [278]:
for index, row in new_dfff.iterrows():
    # Calculate Year, Month and Week
    year = int(row[0][:4])
    month = int(row[0][5:7])
    new_dfff.loc[index,"Year"] = year
    new_dfff.loc[index,"Month"] = month
    
    week = datetime.date(int(row[0][:4]),int(row[0][5:7]),int(row[0][8:10])).isocalendar()[1]
    new_dfff.loc[index,"Week"] = week

In [279]:
dffff_new = new_dfff.groupby(by=["State","Year","Month"])
dffff_new["Cum_Covid_19_Cases","Cum_Covid19_Deaths"].max()
dffff_new.head()

  dffff_new["Cum_Covid_19_Cases","Cum_Covid19_Deaths"].max()


Unnamed: 0,Date,Year,Month,Week,State,Covid_19_Cases,Cum_Covid_19_Cases,Covid_19_Deaths,Cum_Covid19_Deaths,Death_Rate
0,2021-01-10T00:00:00.000,2021,1,1,TX,15855.0,1954406.0,302.0,34346.0,1.757363
1,2021-02-12T00:00:00.000,2021,2,6,UT,1060.0,359641.0,11.0,1785.0,0.496328
2,2020-04-16T00:00:00.000,2020,4,16,TX,963.0,16455.0,32.0,551.0,3.348526
3,2020-05-17T00:00:00.000,2020,5,20,RMI,0.0,0.0,0.0,0.0,
4,2020-07-27T00:00:00.000,2020,7,31,NY,318.0,188293.0,2.0,8822.0,4.685251
5,2021-05-09T00:00:00.000,2021,5,18,PW,0.0,0.0,0.0,0.0,
6,2020-08-12T00:00:00.000,2020,8,33,AS,0.0,0.0,0.0,0.0,
7,2020-08-15T00:00:00.000,2020,8,33,FL,6291.0,563628.0,204.0,9345.0,1.658008
8,2021-04-03T00:00:00.000,2021,4,13,OH,2293.0,1024011.0,0.0,18646.0,1.820879
9,2021-03-14T00:00:00.000,2021,3,10,PR,193.0,102995.0,3.0,2080.0,2.019516


In [280]:
Cum_Covid_Cases = dffff_new["Cum_Covid_19_Cases"].max()
Cum_Covid_Deaths = dffff_new["Cum_Covid19_Deaths"].max()
Covid_Death_Rate = 100*Cum_Covid_Deaths/Cum_Covid_Cases

In [281]:
df_Final = pd.DataFrame({"Cum_Covid_Cases":Cum_Covid_Cases,"Cum_Covid_Deaths":Cum_Covid_Deaths,
                         "Covid_Death_Rate":Covid_Death_Rate})
#df_Final

In [282]:
df_Final.unstack(level=[2,1])
df_Final.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cum_Covid_Cases,Cum_Covid_Deaths,Covid_Death_Rate
State,Year,Month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AK,2020,1,0.0,0.0,
AK,2020,2,0.0,0.0,
AK,2020,3,128.0,6.0,4.6875
AK,2020,4,355.0,9.0,2.535211
AK,2020,5,463.0,13.0,2.807775


In [283]:
dff_Final = df_Final.unstack(level=[2,1]).iloc[:,49].sort_values(ascending=True)
dff_Final.head()
dff_Final.to_csv("DeathRate_byState.csv")
dr_df = pd.read_csv("../jennie-folder/DeathRate_byState.csv")

In [284]:
dr_df.columns = ['State', 'Overall Death Rate (%)']
dr_df.head()

Unnamed: 0,State,Overall Death Rate (%)
0,,4.0
1,,2021.0
2,State,
3,RMI,0.0
4,FSM,0.0


In [304]:
death_rate_df = pd.merge(dr_df, final_df, how='left', on='State')
#death_rate_df

In [305]:
death_rate_df = death_rate_df.drop([0 , 1, 2, 3, 4, 7, 12, 36, 41, 56, 60, 61, 62])
death_rate_df = death_rate_df.reset_index(drop=True)
death_rate_df.head()

Unnamed: 0,State,Overall Death Rate (%),Over 65 Population (%),65 Years and Over Crude Mortality Rate (%),Male Population (%),Crude Male Mortality Rate (%),Female Population (%),Crude Female Mortality Rate (%),White Fatality Rate (%),White Population (%),...,AIAN Fatality Rate (%),AIAN Population (%),Asian Fatality Rate (%),Asian Population (%),NHPI Fatality Rate (%),NHPI Population (%),Multiracial Fatality Rate (%),Multiracial Population (%),Hispanic Fatality Rate (%),Hispanic Population (%)
0,AK,0.521335,12.52,0.04,52.0,0.05,48.0,0.04,0.02,64.13,...,0.03,15.77,0.05,5.96,0.04,0.0,0.01,3.88,0.01,7.17
1,UT,0.554209,11.41,0.06,50.24,0.09,49.76,0.06,0.02,80.08,...,0.05,1.01,0.03,2.19,0.02,0.88,0.0,1.57,0.01,13.22
2,NE,1.019694,16.15,0.13,49.97,0.17,50.03,0.13,0.06,80.33,...,0.04,0.91,0.02,2.3,0.0,0.0,0.0,1.32,0.02,10.58
3,VT,1.071802,20.04,0.03,49.55,0.04,50.45,0.04,0.07,93.94,...,0.0,0.0,0.0,1.38,0.0,0.0,0.0,1.17,0.0,1.99
4,ID,1.090293,16.27,0.1,50.25,0.14,49.75,0.1,0.05,83.85,...,0.05,1.2,0.05,1.36,0.0,0.0,0.0,1.53,0.03,12.05


In [333]:
adjusted_df = pd.read_csv("../jennie-folder/normalized_deathrate.csv")

In [334]:
adjusted_df= adjusted_df.drop(columns=['Unnamed: 0'])

In [335]:
adjusted_df= adjusted_df.drop(columns=['Actual_DR', 'Normalized_DR', 'Delta', 'Cum_Covid_19_Death_Rate'])
adjusted_df = adjusted_df.rename(columns={"Adj_for_age_Covid_Death_Rate": "Death Rate Normalized by Age (%)"})

In [336]:
deathrate_df = pd.merge(adjusted_df, death_rate_df, how='left', on='State')

In [337]:
deathrate_df= deathrate_df.round(decimals=2)
deathrate_df

Unnamed: 0,State,Death Rate Normalized by Age (%),Overall Death Rate (%),Over 65 Population (%),65 Years and Over Crude Mortality Rate (%),Male Population (%),Crude Male Mortality Rate (%),Female Population (%),Crude Female Mortality Rate (%),White Fatality Rate (%),...,AIAN Fatality Rate (%),AIAN Population (%),Asian Fatality Rate (%),Asian Population (%),NHPI Fatality Rate (%),NHPI Population (%),Multiracial Fatality Rate (%),Multiracial Population (%),Hispanic Fatality Rate (%),Hispanic Population (%)
0,AK,0.76,0.52,12.52,0.04,52.0,0.05,48.0,0.04,0.02,...,0.03,15.77,0.05,5.96,0.04,0.0,0.01,3.88,0.01,7.17
1,AL,2.02,2.05,17.33,0.17,48.33,0.24,51.67,0.2,0.13,...,0.0,0.47,0.07,1.32,0.0,0.0,0.0,0.86,0.03,4.39
2,AR,1.66,1.71,17.36,0.16,48.87,0.21,51.13,0.19,0.07,...,0.13,0.55,0.04,1.47,0.03,0.41,0.04,1.35,0.02,7.4
3,AZ,1.93,2.01,17.98,0.16,49.75,0.26,50.25,0.18,0.1,...,0.11,3.66,0.1,2.66,0.0,0.16,0.0,1.61,0.06,25.42
4,CA,1.8,1.68,14.78,0.12,49.71,0.19,50.29,0.13,0.13,...,0.12,0.66,0.16,12.05,0.08,0.32,0.04,2.05,0.07,31.99
5,CO,1.45,1.23,14.63,0.1,50.44,0.13,49.56,0.1,0.08,...,0.11,0.86,0.1,2.82,0.05,0.0,0.01,1.69,0.04,18.82
6,CT,2.09,2.39,17.68,0.2,48.91,0.23,51.09,0.23,0.2,...,0.0,0.23,0.11,4.27,0.0,0.0,0.0,1.72,0.05,15.41
7,DE,1.4,1.56,19.4,0.13,48.3,0.16,51.7,0.15,0.09,...,0.0,0.0,0.03,3.57,0.0,0.0,0.0,1.55,0.01,9.11
8,FL,1.28,1.6,20.94,0.13,48.88,0.18,51.12,0.13,0.09,...,0.0,0.23,0.0,2.3,0.0,0.06,0.0,1.16,0.04,21.7
9,GA,2.15,1.84,14.29,0.13,48.69,0.18,51.31,0.15,0.09,...,0.07,0.38,0.06,3.93,0.0,0.0,633.33,1.31,0.03,9.31
