In [None]:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('conditions_contributing_to.csv')
df

markdown

In [None]:
df.isnull().sum()

markdown

In [None]:
df.dtypes

markdown

In [None]:
for col in df:
    print(col, ': ', df[col].unique())

markdown

In [None]:
df.drop(columns=['Data As Of', 'Start Date', 'End Date', 'Group', 'Condition Group', 'ICD10_codes', 'Number of Mentions'], inplace=True)
df.rename(columns={'COVID-19 Deaths':'Deaths'}, inplace=True)
df

markdown

In [None]:
df.isnull().sum()

markdown

In [None]:
df.loc[(df['Deaths'].isnull()) & (df['Flag'].isnull())]

markdown

In [None]:
df.drop(columns=['Flag'], inplace=True)
df.fillna({'Deaths':0}, inplace=True)
df

markdown

In [None]:
# the count for the united states is 1146242
df.loc[(df['Year'].isnull()) & (df['State']=='United States') & (df['Condition']=='COVID-19') & (df['Age Group']=='All Ages')]['Deaths'].sum()

# the count for the fifty states, puerto rico, district of columbia, and new york city is 1152658
df.loc[(df['Year'].isnull()) & (df['State']!='United States') & (df['Condition']=='COVID-19') & (df['Age Group']=='All Ages')]['Deaths'].sum()

# the count for the fifty states, district of columbia, and new york city is 1146242
df.loc[(df['Year'].isnull()) & (df['State']!='United States') & (df['State']!='Puerto Rico') & (df['Condition']=='COVID-19') & (df['Age Group']=='All Ages')]['Deaths'].sum()

markdown

In [None]:
columns = df.columns[:-1].tolist() #[:-1] excludes the 'Deaths' column
df = df.loc[(df['State']!='Puerto Rico') & (df['State']!='District of Columbia')]
df = df.replace('New York City', 'New York')
df = df.groupby(columns, dropna=False).sum()
df.reset_index(inplace=True)
df.sort_values(by=columns, na_position='first', inplace=True)
df

markdown

In [None]:
df.dtypes

markdown

In [None]:
month_list = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
year_list = [2020, 2021, 2022, 2023]

deaths_bymonth = {}

for year in year_list:
    deaths = []
    for month in month_list:
        deaths.append(df.loc[(df['Year']==year) & (df['Month']==month) & (df['State']=='United States') & (df['Condition']=='COVID-19') & (df['Age Group']=='All Ages')]['Deaths'].sum())
    deaths_bymonth[year] = deaths

figs, axes = plt.subplots(2,2)
figs.suptitle('COVID-19 Deaths by Month, By Year')

axes[0,0].set_title(2020)
axes[0,0].bar(month_list.values(), deaths_bymonth[2020], color='blue')

axes[0,1].set_title(2021)
axes[0,1].bar(month_list.values(), deaths_bymonth[2021], color='darkorange')

axes[1,0].set_title(2022)
axes[1,0].bar(month_list.values(), deaths_bymonth[2022], color='green')

axes[1,1].set_title(2023)
axes[1,1].bar(month_list.values(), deaths_bymonth[2023], color='red')

for ax in axes.flat:
    ax.set(xlabel='Month', ylabel='Number of Deaths')
    ax.tick_params('x', labelrotation=90)
    plt.setp(ax, ylim=(0,120000))
    ax.label_outer()

markdown

In [None]:
def prevmonth_func(year, month):
    if (month==1):
        return year-1, 12
    else:
        return year, month-1

percent_bymonth = {}

for year in year_list:
    deaths = []
    for month in month_list:
        if (year==2023) and (month>8):
            percent = 0
        else:
            prevyear, prevmonth = prevmonth_func(year,month)
            if prevyear not in year_list:
                percent = 0
            elif (deaths_bymonth[prevyear][prevmonth-1]==0):
                percent = 0
            else:
                percent = (deaths_bymonth[year][month-1] - deaths_bymonth[prevyear][prevmonth-1])/(deaths_bymonth[prevyear][prevmonth-1]) * 100
        deaths.append(percent)
    percent_bymonth[year] = deaths

figs, axes = plt.subplots(2,2)
figs.suptitle('Percentage Change of COVID-19 Deaths from Previous Month')

axes[0,0].set_title(2020)
axes[0,0].bar(month_list.values(), percent_bymonth[2020], color='blue')

axes[0,1].set_title(2021)
axes[0,1].bar(month_list.values(), percent_bymonth[2021], color='darkorange')

axes[1,0].set_title(2022)
axes[1,0].bar(month_list.values(), percent_bymonth[2022], color='green')

axes[1,1].set_title(2023)
axes[1,1].bar(month_list.values(), percent_bymonth[2023], color='red')

for ax in axes.flat:
    ax.set(xlabel='Month', ylabel='Percentage Change')
    ax.tick_params('x', labelrotation=90)
    plt.setp(ax, ylim=(-100,100))
    ax.label_outer()

markdown

In [None]:
df2 = df.copy().loc[(df['Month'].isnull()) & (df['State']!='United States') & (df['Condition']=='COVID-19') & (df['Age Group']=='All Ages')][['Year', 'State', 'Deaths']]
df2.sort_values(by=['Year', 'Deaths'], ascending=False, inplace=True)

for year in year_list:
    df3 = df2.loc[(df2['Year']==year)]

    print(int(year))

    print('Most deaths:')
    print('1. ', df3.iloc[0]['State'], ' (', int(df3.iloc[0]['Deaths']), ')', sep='')
    print('2. ', df3.iloc[1]['State'], ' (', int(df3.iloc[1]['Deaths']), ')', sep='')
    print('3. ', df3.iloc[2]['State'], ' (', int(df3.iloc[2]['Deaths']), ')', sep='')

    print('Least deaths:')
    print('1. ', df3.iloc[-1]['State'], ' (', int(df3.iloc[-1]['Deaths']), ')', sep='')
    print('2. ', df3.iloc[-2]['State'], ' (', int(df3.iloc[-2]['Deaths']), ')', sep='')
    print('3. ', df3.iloc[-3]['State'], ' (', int(df3.iloc[-3]['Deaths']), ') \n', sep='')

markdown

In [None]:
age_list = df['Age Group'].unique().tolist()
age_list.remove('All Ages')

deaths_byage = {}

for year in year_list:
    deaths = []
    for age in age_list:
        deaths.append(df.loc[(df['Year']==year) & (df['Month'].isnull()) & (df['State']=='United States') & (df['Condition']=='COVID-19') & (df['Age Group']==age)]['Deaths'].sum())
    deaths_byage[year] = deaths

figs, axes = plt.subplots(2,2)
figs.suptitle('COVID-19 Deaths by Age Group, By Year')

axes[0,0].set_title(2020)
axes[0,0].bar(age_list, deaths_byage[2020], color='blue')

axes[0,1].set_title(2021)
axes[0,1].bar(age_list, deaths_byage[2021], color='darkorange')

axes[1,0].set_title(2022)
axes[1,0].bar(age_list, deaths_byage[2022], color='green')

axes[1,1].set_title(2023)
axes[1,1].bar(age_list, deaths_byage[2023], color='red')

for ax in axes.flat:
    ax.set(xlabel='Age Group', ylabel='Number of Deaths')
    ax.tick_params('x', labelrotation=90)
    plt.setp(ax, ylim=(0,125000))
    ax.label_outer()

markdown

In [None]:
color_list = ['#6E5E4D', '#887561', '#A08C77', '#BFAB95', '#E7DA61', '#9AB8C8', '#7392BD', '#535E84', 'black']

figs, axes = plt.subplots(2,2)
figs.suptitle('Percentage of COVID-19 Deaths by Age Group, By Year')

axes[0,0].set_title(2020)
axes[0,0].pie(deaths_byage[2020], labels=age_list, autopct='%1.1f%%', colors=color_list)

axes[0,1].set_title(2021)
axes[0,1].pie(deaths_byage[2021], labels=age_list, autopct='%1.1f%%', colors=color_list)

axes[1,0].set_title(2022)
axes[1,0].pie(deaths_byage[2022], labels=age_list, autopct='%1.1f%%', colors=color_list)

axes[1,1].set_title(2023)
axes[1,1].pie(deaths_byage[2023], labels=age_list, autopct='%1.1f%%', colors=color_list)

plt.show()

markdown

In [None]:
condition_list = df['Condition'].unique().tolist()

total_bycondition = {}

df2 = df.loc[(df['Year'].isnull()) & (df['State']=='United States') & (df['Age Group']!='All Ages')][['Condition', 'Age Group', 'Deaths']]

for condition in condition_list:
    deaths = df2.loc[(df2['Condition']==condition)]['Deaths'].sum()
    total_bycondition[condition] = deaths

new_data = {'Condition':condition_list, 'Total Deaths':total_bycondition.values()}

for age in age_list:
    proportions = []
    for condition in condition_list:
        deaths = df2.loc[(df2['Condition']==condition) & (df2['Age Group']==age)]['Deaths'].sum()
        proportions.append(deaths / total_bycondition[condition])

    if (age=='Not stated'):
        key = 'Proportion of Unstated Age'
    else:
        key = 'Proportion Aged ' + age

    new_data[key] = proportions

new_df = pd.DataFrame(new_data)
new_df

markdown

In [None]:
new_df.sort_values('Proportion Aged 0-24').tail(1)

markdown

In [23]:
df2 = df.copy().loc[(df['Year'].notnull()) & ((df['Year']!=2023) | ((df['Year']==2023) & (df['Month']<9))) & (df['State']!='United States') & (df['Condition']=='COVID-19') & (df['Age Group']=='All Ages') & (df['Deaths']<10)][['Year', 'Month', 'State', 'Deaths']]
df2.sort_values(by=['Year', 'Month'], ascending=False, inplace=True)
df2

def length_func(currentyear, currentmonth, length):
    prevyear, prevmonth = prevmonth_func(currentyear, currentmonth)
    prevrecord = df2.loc[(df2['Year']==prevyear) & (df2['Month']==prevmonth) & (df2['State']==state)]

    if prevrecord.empty:
        return currentyear, currentmonth, length

    return length_func(prevyear, prevmonth, length+1)

streak_list = []

# for i in range(len(df2)):
for i in range(len(df2.loc[(df['Year']==2023)])):
    endyear, endmonth, state = df2.iloc[i][['Year', 'Month', 'State']]
    startyear, startmonth, length = length_func(endyear, endmonth, 1)
    
    new_streak = {'state':state, 'start year':startyear, 'start month': startmonth, 'end year':endyear, 'end month':endmonth, 'length':length}

    if not streak_list:
        streak_list.append(new_streak)

    elif length > streak_list[0]['length']:
        streak_list = new_streak

    elif length == streak_list[0]['length']:
        streak_list.append(new_streak)

streak_list

[{'state': 'Alaska',
  'start year': 2023.0,
  'start month': 3.0,
  'end year': 2023.0,
  'end month': 8.0,
  'length': 6}]