In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

import plotly as py
import plotly.graph_objects as go

%matplotlib inline

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)


In [2]:
df = pd.read_csv("owid-covid-data.csv", parse_dates=['date'])

In [3]:
df_usa =  df[df.iso_code == 'USA']

In [4]:
df_week = df_usa.copy()

In [5]:
df_week_new_cases = df_week[['date', 'new_cases']].resample('W', on = 'date').agg({'new_cases': ['min', 'mean', 'max']})
df_week_new_cases.columns = ['min', 'mean', 'max']

In [52]:
# fig, ax = plt.subplots(figsize = (15,10)) 

trace1 = go.Scatter(
    x = df_week_new_cases.index,
    y = df_week_new_cases['mean'],
    mode = "lines+markers",
#     name = "Average Weekly New Cases",
#     text= df.university_name,
    line = dict(color = 'firebrick', width=4),)

trace2 = go.Scatter(
    x = df_week_new_cases.index,
    y = df_week_new_cases['min'],
    fill = None,
    mode = "lines",
    line_color='darksalmon',
    )

trace3 = go.Scatter(
    x=df_week_new_cases.index,
    y=df_week_new_cases['max'],
    fill='tonexty',
    fillcolor  = 'mistyrose',# fill area between trace0 and trace1
    mode='lines', line_color='darksalmon',
    opacity=1)

data = [trace2, trace3, trace1]
layout = dict(title = 'New Cases of COVID 19 in USA',
              xaxis= dict(title= 'Date',ticklen= 5,zeroline= False),
              showlegend=False
             )
fig = go.Figure(data= data, layout = layout)

fig.show()

# ax = sns.lineplot(df_week_new_cases.index, df_week_new_cases['mean'], color = 'lightcoral', marker = 'o', linewidth = 3.0) 
# fig = px.line(df_week_new_cases, x=df_week_new_cases.index, y='mean', labels={'x':'Date', 'y':'New Cases Count'})
# fig = px.area(df_week_new_cases, x=df_week_new_cases.index, y=df_week_new_cases['min'])
# fig.show()

# ax.fill_between(df_week_new_cases.index, df_week_new_cases['min'], df_week_new_cases['max'],\
#                 facecolor ='lightsalmon', alpha = 0.3) 

# ax.set_facecolor('azure')
# ax.xaxis.set(ticks = df_week_new_cases.index, )
# ax.tick_params(axis='x', rotation=45)
# ax.set(title = "New Cases of Covid19 in USA on weekly basis", ylabel = 'New Cases Count', xlabel = 'Date')

In [7]:
df_age = pd.read_csv('COVID-19_Cases_Summarized_by_Age_Group.csv', parse_dates=['Specimen Collection Date'])\
.drop('Last Updated at', axis =1)


In [8]:
grouped_age = df_age.groupby('Age Group')[['Specimen Collection Date', 'Age Group', 'New Confirmed Cases']]\
.resample('M', on = 'Specimen Collection Date').sum().reset_index()

In [53]:
fig = px.line(grouped_age, x='Specimen Collection Date', y='New Confirmed Cases', color='Age Group')
fig.show()



# fig, ax = plt.subplots(figsize = (15,10)) 
# ax = sns.lineplot(x = 'Specimen Collection Date', y = 'New Confirmed Cases', data = grouped_age, hue = 'Age Group')


# ax.set_facecolor('azure')
# ax.set(title = "New Cases of Covid19 in USA on Monthly basis", ylabel = 'New Cases Count', xlabel = 'Date')

In [10]:
df3 = pd.read_csv('Provisional_COVID-19_Death_Counts_by_Sex__Age__and_Week.csv', parse_dates=['Week ending Date'])
df3 = df3.drop(['Data as of', 'State'], axis=1).drop(df3[df3['Sex'] == 'All Sex'].index)
age_dict = { "Under 1 year" : "Under 15",
            "1-4 years" : "Under 15",
            "5-14 years" : "Under 15",
            "15-24 years" : "15-24",
            "25-34 years" : "25-34",
            "35-44 years" : "35-44",
            "45-54 years" : "45-54",
            "55-64 years" : "55-64",
            "65-74 years" : "65-74",
            "75-84 years" : "Over 75",
            "85 years and over" : "Over 75"
           }

In [11]:
df3['Age Group'] = df3['Age Group'].map(age_dict)

In [12]:
grouped_age2 = df3.groupby('Sex')[['Week ending Date', 'Age Group', 'Sex', 'COVID-19 Deaths']]\
.resample('M', on = 'Week ending Date').sum().reset_index().pivot_table(index = 'Week ending Date', columns = 'Sex',\
                                                                        values = 'COVID-19 Deaths', aggfunc=sum).apply(lambda x:100 * x / float(x.sum()),\
                                                                                                                       axis =1).round(2)

In [59]:
grouped_age2.index = grouped_age2.index.strftime('%B')
grouped_age2

Sex,Female,Male
February,54.55,45.45
March,39.46,60.54
April,44.5,55.5
May,49.29,50.71
June,48.86,51.14


In [63]:
fig = go.Figure(data=[
    go.Bar(name='Male', x=grouped_age2.index, y=grouped_age2.Male, opacity = 0.6),
    go.Bar(name='Female', x=grouped_age2.index, y=grouped_age2.Female, opacity = 0.6)
])
# Change the bar mode
fig.update_layout(barmode='stack', title = 'Proportion of Male and Female Death due to COVID 19',
              xaxis= dict(title= 'Month'), yaxis= dict(title= '%of COVID19 Death'),xaxis_tickangle=-45)

fig.show()

# fig, ax = plt.subplots(figsize = (15,10)) 

# grouped_age2.plot(kind = 'bar', stacked = 'true', ax = ax)

# # plt.xaxis.set(ticks = grouped_age2.index, )
# ax.tick_params(axis='x', rotation=45)
# ax.set_facecolor('azure')
# ax.set(title = "New Cases of Covid19 in USA on Monthly basis", ylabel = 'New Cases Count', xlabel = 'Date')

In [74]:
grouped_sex = df3.groupby('Sex')['COVID-19 Deaths'].sum()
grouped_sex.values

array([52241, 59982], dtype=int64)

In [73]:
# fig = px.pie(grouped_sex, values='COVID-19 Deaths', names=grouped_sex.index, title='Proportion of Male and Female Death due to COVID 19')
# fig.show()
colors = ['lightcoral', 'cornflowerblue']

fig = go.Figure(data=[go.Pie(labels=grouped_sex.index,
                             values=grouped_sex.values)])
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=20,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

# fig, ax = plt.subplots(figsize = (15,10)) 
# grouped_sex.plot.pie(y = 'COVID-19 Deaths', x  = 'Sex', autopct='%1.1f%%', startangle=75)
# ax.set_facecolor('azure')
# ax.set(title = "Male to Female Proportion")

In [16]:
df_state = pd.read_csv('states.csv')

In [17]:
df_state_clean = df_state.drop(['Data as of', 'Footnote', 'Indicator'], axis = 1)\
.loc[df_state['Indicator'] == 'Count of COVID-19 deaths', :].reset_index(drop = True).fillna(0)

In [18]:
df_state_clean.columns = ['State', 'White', 'Black', 'Native American', 'Asian', 'Hispanic', 'Other']
df_state_clean = df_state_clean.set_index('State').apply(lambda x:100 * x / float(x.sum()), axis =1).round(2).reset_index()
df_state_clean.replace('[<].+','',regex=True, inplace = True)

In [126]:
# Create subplots: use 'domain' type for Pie subplot
from plotly.subplots import make_subplots
fig = make_subplots(rows=int(len(df_state_clean)/4), cols=4, \
                    specs=[[{'type': 'domain'}]*4 for i in range(len(df_state_clean)//4)],\
                    subplot_titles=df_state_clean.set_index('State').index)
labels = df_state_clean.set_index('State').columns



for i, (idx, row) in enumerate(df_state_clean.set_index('State').iterrows()):

    fig.add_trace(go.Pie(labels=labels, values=row, name= idx, rotation=-90), (i//4)+1, (i%4)+1)
fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Death due to COVID-19 in various states",
    # Add annotations in the center of the donut pies.
#     annotations=[dict(text='GHG', x=0.18, y=0.5, font_size=20, showarrow=False),
#                  dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False)])
)

fig.update_layout(
    autosize=False,
    width=1100,
    height=4000,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
#     paper_bgcolor="LightSteelBlue",
)
fig.show()

# fig, axes = plt.subplots(int(len(df_state_clean)/4), 4, figsize=(15, 60))

# for i, (idx, row) in enumerate(df_state_clean.set_index('State').iterrows()):
#     ax = axes[i // 4, i % 4]
#     row = row[row.gt(row.sum() * .01)]
#     ax.pie(row, labels=row.index, startangle=30, wedgeprops=dict(width=0.5))
#     ax.set_title(idx)

# fig.subplots_adjust(wspace=.2)

In [165]:
df_state = pd.read_csv('us-states.csv', parse_dates=['date']).pivot_table(index = 'date', columns = 'state', values = 'cases', fill_value = 0)
df_state.tail()

state,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District of Columbia,Florida,Georgia,Guam,Hawaii,Idaho,Illinois,Indiana,Iowa,Kansas,Kentucky,Louisiana,Maine,Maryland,Massachusetts,Michigan,Minnesota,Mississippi,Missouri,Montana,Nebraska,Nevada,New Hampshire,New Jersey,New Mexico,New York,North Carolina,North Dakota,Northern Mariana Islands,Ohio,Oklahoma,Oregon,Pennsylvania,Puerto Rico,Rhode Island,South Carolina,South Dakota,Tennessee,Texas,Utah,Vermont,Virgin Islands,Virginia,Washington,West Virginia,Wisconsin,Wyoming
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1
2020-06-26,34183,1001,66676,18740,207027,31780,46059,11017,10185,122952,69542,1217,850,5149,141812,45269,27828,13729,15033,54882,3102,66704,108070,69473,34651,25066,20567,829,18524,15359,5671,172438,11408,395972,59253,3430,30,48638,12343,7943,88950,6922,16661,30335,6535,38802,142582,20218,1198,81,60570,32489,2730,29725,1368
2020-06-27,35083,1027,70152,19310,211453,32006,46206,11091,10216,132537,71066,1217,856,5319,142536,45707,28203,13767,15337,54882,3154,67032,108443,69802,35068,25531,20909,852,18775,16400,5717,172727,11619,396669,60710,3467,30,49455,12642,8101,89400,7066,16661,31939,6626,39813,148639,20735,1200,81,61247,33031,2782,30242,1392
2020-06-28,35441,1060,73961,19818,216955,32290,46303,11226,10248,141067,72800,1217,883,5543,143117,46071,28520,14016,15480,56349,3191,67360,108667,70042,35584,25899,21254,863,18900,17187,5747,173036,11809,397293,62178,3504,30,50309,12944,8348,89785,7189,16661,33320,6681,39945,152437,21155,1202,81,61736,33341,2832,30723,1417
2020-06-29,37175,1085,74602,20257,223995,32495,46362,11376,10292,146333,74816,1224,884,5770,143885,46370,28874,14569,15657,57189,3219,67837,108768,70356,35896,26568,21748,919,19064,17971,5760,173126,11982,397684,63684,3548,30,51046,13172,8494,90553,7250,16764,34644,6716,41388,158756,21818,1208,81,62189,34036,2870,31110,1450
2020-06-30,38045,1132,79399,20777,232153,32698,46514,11474,10327,152426,77280,1228,901,6124,144661,46740,29152,14630,15916,58203,3253,68152,108882,70850,36338,27248,22275,967,19194,18582,5782,173521,12147,398142,65062,3585,30,51789,13757,8663,91226,7465,16813,36399,6764,42815,166703,22364,1208,81,62787,34541,2905,31723,1487


In [309]:
sample_state = df_state.resample('M').max().apply(lambda x:100 * x / float(x.sum()),axis =0).round(2).diff().T.pct_change(axis=1)

In [310]:
sample_state.head(55)

date,2020-01-31,2020-02-29,2020-03-31,2020-04-30,2020-05-31,2020-06-30
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alabama,,,inf,5.070513,0.794087,0.84638
Alaska,,,inf,0.654631,-0.472486,4.714029
Arizona,,,inf,3.883333,0.93686,3.838767
Arkansas,,,inf,3.813559,0.462441,2.405297
California,,,210.0,3.909953,0.495174,0.900581
Colorado,,,inf,3.085271,-0.086654,-0.432133
Connecticut,,,inf,6.843511,-0.409732,-0.702391
Delaware,,,inf,12.788618,0.079599,-0.585472
District of Columbia,,,inf,6.719807,0.170213,-0.658824
Florida,,,inf,2.99262,-0.166359,3.286031


In [326]:
pre_ext = {'Cardiovascular Disease' : 10.5, 'Diabetes' :7.3 , 'Chronic Respiratory Disease' : 6.3, 
 'High Blood Pressure / Hypertension' : 6, 'Cancer' : 5.60, 'No Underlying condition' : 0.90}


In [344]:
fig = go.Figure(go.Bar(
            x= list(pre_ext.values()),
            y=list(pre_ext.keys()),
            orientation='h',
    textposition='outside',
    text = [str(i)+'%' for i in list(pre_ext.values())],
    marker=dict(
            color='rgba(255, 80, 80, 0.6)',
            line=dict(color='rgba(255, 80, 80, 1.0)', width=3)
        )))

fig.update_layout(title = 'Effect of Existing Condition',
              xaxis= dict(title= '% of Deceased with Serious Ailments'), yaxis= dict(title= 'Existing Condition'))

fig.update_layout(
    autosize=False,
    width=800,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
#     paper_bgcolor="LightSteelBlue",
)
fig.show()

['10.5%', '7.3%', '6.3%', '6%', '5.6%', '0.9%']