In [None]:
# Dependencies
from sodapy import Socrata
import json
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from config import appToken

In [None]:
# Use open client to obtain CDC COVID-19 death rates, including age/sex data
client = Socrata("data.cdc.gov", appToken)
results = client.get("9bhg-hcku", limit=4000000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)
results_df

In [None]:
# Filter DataFrame for State COVID-19 death totals only
results_states = results_df.loc[(results_df['state'] != 'United States') & (results_df['age_group'] == 'All Ages') & (results_df['group'] == 'By Total') & (results_df['sex'] == 'All Sexes')]
results_states = results_states[['state', 'covid_19_deaths']]

# Create copy of DataFrame for tabulation, convert and sort death counts descending
converted_states_df = results_states.copy()
converted_states_df = converted_states_df.astype({"covid_19_deaths": "int64"})
states_df = pd.DataFrame(converted_states_df).sort_values('covid_19_deaths',ascending=False)
states_df

In [None]:
# Plot COVID-19 death totals by State
# Set x axis and tick locations
x_axis = np.arange(len(states_df['state']))
tick_locations = [value+0.4 for value in x_axis]

# Create a list indicating where to write x labels and set figure size to adjust for space
# Also assign separate color for three states with highest rates
plt.figure(figsize=(14,6))
barplot = plt.bar(x_axis, states_df['covid_19_deaths'], color='blue', alpha=0.5, align="edge")
plt.xticks(tick_locations, states_df['state'], rotation="vertical")
barplot[0].set_color('red')
barplot[1].set_color('red')
barplot[2].set_color('red')

# Set x and y limits
plt.xlim(-0.25, len(x_axis))
plt.ylim(0, max(states_df['covid_19_deaths'])+4000)

# Set Title and Labels
plt.title("Total COVID-19 Deaths by State")
plt.xlabel("State")
plt.ylabel("Deaths")

# Display the chart, and export for later use
plt.tight_layout()
plt.savefig("Images/deathct_by_state.png")
plt.show()

In [None]:
# Filter CDC data for the three states with highest COVID-19 deaths, to include
# data for both sexes, all ages
results_sex = results_df.loc[((results_df['state'] == 'Texas') | (results_df['state'] == 'California') | (results_df['state'] == 'Florida')) & (results_df['age_group'] == 'All Ages') & (results_df['group'] == 'By Total') & (results_df['sex'] != 'All Sexes')]
results_sex = results_sex[['state', 'sex', 'covid_19_deaths']]

# Create copy of DataFrame for tabulation, convert death total to integer for summation
converted_sex_df = results_sex.copy()
converted_sex_df = converted_sex_df.astype({"covid_19_deaths": "int64"})
converted_sex_df

In [None]:
# Create lists for plotting
state_lst = []
male_ct = converted_sex_df[converted_sex_df["sex"] == 'Male']["covid_19_deaths"].tolist()
female_ct = converted_sex_df[converted_sex_df["sex"] == 'Female']["covid_19_deaths"].tolist()

for state in converted_sex_df['state']:
    if state in state_lst:
        pass
    else:
        state_lst.append(state)

# Plot COVID-19 death counts by State, stacking bars by Sex, assigning title/legend
plt.bar(state_lst, male_ct, color='b', label="Male", alpha=0.5)
plt.bar(state_lst, female_ct, bottom=male_ct, color='r',label="Female", alpha=0.5)
plt.title("COVID-19 Deaths by Sex")
plt.legend(loc="upper center")

# Display total counts by sex at the top of each bar
for i in range(len(state_lst)):
    plt.text(i, male_ct[i], male_ct[i], ha="center", va="top", color="w", fontweight="bold")
    plt.text(i, female_ct[i]+male_ct[i], female_ct[i], ha="center", va="top", color="w", fontweight="bold")

# Display the chart, and export for later use
plt.savefig("Images/deathct_by_sex.png")
plt.show()

In [None]:
# Filter CDC data for the three states with highest COVID-19 deaths, to include
# death data for all age groups without overlap
results_age = results_df.loc[((results_df['state'] == 'Texas') | (results_df['state'] == 'California') | (results_df['state'] == 'Florida')) & (results_df['age_group'] != 'All Ages') & (results_df['group'] == 'By Total') & (results_df['sex'] == 'All Sexes')]
results_age2 = results_age.loc[(results_df['age_group'] == '0-17 years') | (results_df['age_group'] == '18-29 years') | (results_df['age_group'] == '30-39 years') | (results_df['age_group'] == '40-49 years') | (results_df['age_group'] == '50-64 years') | (results_df['age_group'] == '65-74 years') | (results_df['age_group'] == '75-84 years') | (results_df['age_group'] == '85 years and over')]
results_age2 = results_age2[['state', 'age_group', 'covid_19_deaths']]

# Create copy of DataFrame for tabulation, convert death total to integer for summation
age_df = results_age2.copy()
age_df = results_age2.astype({"covid_19_deaths": "int64"})
age_df

In [None]:
# Create lists for plotting
age_range = []
cali_ct = age_df[age_df["state"] == 'California']["covid_19_deaths"].tolist()
tex_ct = age_df[age_df["state"] == 'Texas']["covid_19_deaths"].tolist()
flor_ct = age_df[age_df["state"] == 'Florida']["covid_19_deaths"].tolist()

for age in age_df['age_group']:
    if age in age_range:
        pass
    else:
        age_range.append(age)
        
# Set index length and bar width 
index = np.arange(8)
bar_width = 0.25
        
# Plot COVID-19 death counts by State grouped by Age Group
fig, ax = plt.subplots()
cali = ax.bar(index, cali_ct, bar_width, label="California", color="blue", alpha=0.5)
texas = ax.bar(index+bar_width, tex_ct, bar_width, label="Texas", color="red", alpha=0.5)
florida = ax.bar(index+bar_width+bar_width, flor_ct, bar_width, label="Florida", color="orange", alpha=0.5)

# Assign title, labels, and legend
ax.set_xlabel('Age Group')
ax.set_ylabel('Deaths')
ax.set_title('COVID-19 Deaths by Age Group')
ax.set_xticks(index + bar_width / 2)
ax.set_xticklabels(age_range, rotation=45, ha="right")
ax.legend()

# Display the chart, and export for later use
plt.tight_layout()
plt.savefig("Images/deathct_by_agegrp.png")
plt.show()

In [None]:
# Use open client to obtain data
results2 = client.get("ks3g-spdg", limit=4000000)

# Convert to pandas DataFrame
results2_df = pd.DataFrame.from_records(results2)
results2_df

In [None]:
# Filter CDC data for the three states with highest COVID-19 deaths, to include
# death data for all race groups excepting nominal "Unknown" figures
results_race = results2_df.loc[((results2_df['state'] == 'Texas') | (results2_df['state'] == 'California') | (results2_df['state'] == 'Florida')) & (results2_df['race_and_hispanic_origin'] != 'Total Deaths') & (results2_df['race_and_hispanic_origin'] != 'Unknown')]
results_race = results_race.loc[(results_race['age_group_new'] == '0-17 years') | (results_race['age_group_new'] == '18-29 years') | (results_race['age_group_new'] == '30-39 years') | (results_race['age_group_new'] == '40-49 years') | (results_race['age_group_new'] == '50-64 years') | (results_race['age_group_new'] == '65-74 years') | (results_race['age_group_new'] == '75-84 years') | (results_race['age_group_new'] == '85 years and over')]
results_race2 = results_race.fillna(0)
results_race2 = results_race2[['state', 'race_and_hispanic_origin', 'covid_19_deaths']]

# Create copy of DataFrame for tabulation, convert death total to integer for summation
converted_race_df = results_race2.copy()
converted_race_df = converted_race_df.astype({"covid_19_deaths": "int64"})
converted_race_df

# Group DataFrame by State, Race cagtegories
group_race_df = converted_race_df.groupby(['state','race_and_hispanic_origin'])
race_df = pd.DataFrame(group_race_df[["covid_19_deaths"]].sum())

# Convert DataFrame to pivot table
dfr = race_df.pivot_table(index='race_and_hispanic_origin', columns='state', values='covid_19_deaths')
dfr

In [None]:
# Create lists for plotting
race_range = dfr.index.tolist()
cali_ct2 = dfr["California"].tolist()
tex_ct2 = dfr["Texas"].tolist()
flor_ct2 = dfr["Florida"].tolist()

# Set index length and bar width 
index = np.arange(7)
bar_width = 0.25
        
# Plot COVID-19 death counts for each State by Race 
fig, ax = plt.subplots()
cali2 = ax.barh(index+bar_width+bar_width, cali_ct2, bar_width, label="California", color="blue", alpha=0.5)
texas2 = ax.barh(index+bar_width, tex_ct2, bar_width, label="Texas", color="red", alpha=0.5)
florida2 = ax.barh(index, flor_ct2, bar_width, label="Florida", color="orange", alpha=0.5)

# Assign title, labels, and legend
ax.set_xlabel('Deaths')
ax.set_title('COVID-19 Deaths by Race and Hispanic Origin')
ax.set_yticks(index + bar_width / 2)
ax.set_yticklabels(race_range)
ax.legend()

# Display the chart, and export for later use
plt.savefig("Images/deathct_by_racegrp.png", bbox_inches='tight')
plt.figure(figsize=(30,10))
plt.tight_layout()
plt.show()