In [1]:
import pandas as pd
import numpy as np

In [2]:
url = "https://raw.githubusercontent.com/ZulfiiaDitto/VizualizationFolder/master/Viz_With_Bokeh/Weekly_Provisional_Counts_of_Deaths_by_State_and_Select_Causes__2020-2022.csv"

In [3]:
df = pd.read_csv(url)

In [4]:
df.head(3)

Unnamed: 0,Data As Of,Jurisdiction of Occurrence,MMWR Year,MMWR Week,Week Ending Date,All Cause,Natural Cause,Septicemia (A40-A41),Malignant neoplasms (C00-C97),Diabetes mellitus (E10-E14),...,flag_alz,flag_inflpn,flag_clrd,flag_otherresp,flag_nephr,flag_otherunk,flag_hd,flag_stroke,flag_cov19mcod,flag_cov19ucod
0,12/28/2022,United States,2020,1,2020-01-04,60176,55009,843.0,11569.0,1829.0,...,,,,,,,,,,
1,12/28/2022,United States,2020,2,2020-01-11,60736,55755,861.0,11963.0,1942.0,...,,,,,,,,,,
2,12/28/2022,United States,2020,3,2020-01-18,59362,54516,829.0,11701.0,1819.0,...,,,,,,,,,,


In [5]:
list_columns= ['All Cause', 'Natural Cause',
       'Septicemia (A40-A41)', 'Malignant neoplasms (C00-C97)',
       'Diabetes mellitus (E10-E14)', 'Alzheimer disease (G30)',
       'Influenza and pneumonia (J09-J18)',
       'Chronic lower respiratory diseases (J40-J47)',
       'Other diseases of respiratory system (J00-J06,J30-J39,J67,J70-J98)',
       'Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)',
       'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99)',
       'Diseases of heart (I00-I09,I11,I13,I20-I51)',
       'Cerebrovascular diseases (I60-I69)',
       'COVID-19 (U071, Multiple Cause of Death)',
       'COVID-19 (U071, Underlying Cause of Death)',]

In [6]:
chronicDs = [
       'Diabetes mellitus (E10-E14)', 'Alzheimer disease (G30)',
       'Chronic lower respiratory diseases (J40-J47)',
       'Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)',
       'Diseases of heart (I00-I09,I11,I13,I20-I51)',
       'Cerebrovascular diseases (I60-I69)',
      ]

chronicDsClean = [i.split(' (')[0] for i in chronicDs]

In [7]:
# cleaning 
df['Week Ending Date'] = df['Week Ending Date'].astype('datetime64[ns]')
df = df[~df['Jurisdiction of Occurrence'].isin(['United States','District of Columbia','New York City','Puerto Rico'])]

### Time series (amount of deaths caused by the chronic desieses)

In [21]:
chronic = df.groupby(['Week Ending Date'], as_index = False)[chronicDs].sum()

In [22]:
chronic.head(2)

Unnamed: 0,Week Ending Date,Diabetes mellitus (E10-E14),Alzheimer disease (G30),Chronic lower respiratory diseases (J40-J47),"Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)","Diseases of heart (I00-I09,I11,I13,I20-I51)",Cerebrovascular diseases (I60-I69)
0,2020-01-04,1724.0,2466.0,3431.0,1003.0,13773.0,3048.0
1,2020-01-11,1843.0,2483.0,3628.0,1000.0,13510.0,3096.0


In [23]:
from bokeh.models.annotations import Tooltip
from pandas.core import tools
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20_20, Category10_6,  Pastel1_3
from bokeh.layouts import column, gridplot
from bokeh.models import ColumnDataSource, Legend, HoverTool,Whisker, Panel, Tabs
from bokeh.plotting import figure, show, output_file

source = ColumnDataSource(chronic)

chronicTS = figure(x_axis_type="datetime",
                    width=1500, height=500,
                    x_axis_label='Date',
                    y_axis_label='Number of deaths', 
                    title = 'Number of deaths caused by chromic diseases in one State')
chronicTS.add_layout(Legend(), 'right')
# appending the lines 
for i in range(len(chronicDs)):
    chronicTS.line('Week Ending Date', '{}'.format(chronicDs[i]), source=source, color= Category20_20[i],
                  legend_label = chronicDs[i], line_width=2)

show(chronicTS)

### Stacked bar chart amount of chronic deaths by the years 

In [11]:

disease = df.groupby(["MMWR Year"])[chronicDs].sum()
disease

Unnamed: 0_level_0,Diabetes mellitus (E10-E14),Alzheimer disease (G30),Chronic lower respiratory diseases (J40-J47),"Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)","Diseases of heart (I00-I09,I11,I13,I20-I51)",Cerebrovascular diseases (I60-I69)
MMWR Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020,98412.0,132717.0,150988.0,48075.0,685708.0,158247.0
2021,98440.0,115149.0,138390.0,49087.0,676635.0,158023.0
2022,88438.0,109289.0,133026.0,49447.0,632071.0,150817.0


In [12]:

from math import pi

years = ["2020", "2021", "2022"]

data = {'years' : ["2020", "2021", "2022"]}
for i in chronicDs:
    data[i] = disease.loc[: , i].values 
    
StackedDisease = figure(x_range=years, height=350,width= 1000, title="Amount of deaths cases by chronic desiase in period of 3 years",)
StackedDisease.add_layout(Legend(), 'right')

StackedDisease.vbar_stack(chronicDs, x='years', width=0.9, color=Category10_6, source=data,
              legend_label=chronicDsClean)

StackedDisease.y_range.start = 0
StackedDisease.x_range.range_padding = 0.1
StackedDisease.xgrid.grid_line_color = None
StackedDisease.axis.minor_tick_line_color = None
StackedDisease.outline_line_color = None



StackedDisease.xaxis.major_label_orientation = pi/4

show(StackedDisease)

### Box plot distribution of Septicemia by years

In [13]:
septicemia = df[['MMWR Year', 'Septicemia (A40-A41)', ]].copy()

septicemia.head(2)

Unnamed: 0,MMWR Year,Septicemia (A40-A41)
155,2020,15.0
156,2020,25.0


In [14]:
qnt = septicemia.groupby(['MMWR Year'])['Septicemia (A40-A41)'].quantile([0.25, 0.5, 0.75])
qnt = qnt.unstack().reset_index()
qnt.rename({0.25 : "q1", 0.50 :"q2", 0.75: "q3"}, axis = 1, inplace = True)
qnt.head()

Unnamed: 0,MMWR Year,q1,q2,q3
0,2020,13.5,20.0,32.0
1,2021,13.0,21.0,33.0
2,2022,13.0,21.0,31.0


In [15]:
sept = septicemia.merge(qnt, on = ['MMWR Year'], how = 'left')
sept['MMWR Year'] = sept['MMWR Year'].astype(str) # need to be categorical type in order to build the graph

sept['upper'] = sept.q3 + 1.5*(sept.q3-sept.q1)
sept['lower'] = sept.q3 - 1.5*(sept.q3-sept.q1)
sept.head()

Unnamed: 0,MMWR Year,Septicemia (A40-A41),q1,q2,q3,upper,lower
0,2020,15.0,13.5,20.0,32.0,59.75,4.25
1,2020,25.0,13.5,20.0,32.0,59.75,4.25
2,2020,28.0,13.5,20.0,32.0,59.75,4.25
3,2020,16.0,13.5,20.0,32.0,59.75,4.25
4,2020,15.0,13.5,20.0,32.0,59.75,4.25


In [16]:
dates = sept['MMWR Year'].astype(str).unique().tolist()
dates

['2020', '2021', '2022']

In [17]:
source = ColumnDataSource(sept)

p = figure(x_range=dates, tools="", toolbar_location=None,
           title="Distribution of the septicemia per years amoung states",
           background_fill_color="#eaefef", y_axis_label="Number of patients with Septicemia")

# outlier 
whisker = Whisker(base="MMWR Year", upper="upper", lower="lower", source=source)
whisker.upper_head.size = whisker.lower_head.size = 20
p.add_layout(whisker)

# quantile 
cmap = factor_cmap("MMWR Year", "Set3_3", dates)
p.vbar("MMWR Year", 0.7, "q2", "q3", source=source, color=cmap, line_color="black", )
p.vbar("MMWR Year", 0.7, "q1", "q2", source=source, color=cmap, line_color="black",)

# outliers
outliers = sept[~sept['Septicemia (A40-A41)'].between(sept.lower, sept.upper)]
p.scatter("MMWR Year", "Septicemia (A40-A41)", source=outliers, size=6, color="black", alpha=0.3)

p.xgrid.grid_line_color = None
p.axis.major_label_text_font_size="14px"
p.axis.axis_label_text_font_size="12px"
show(p)

### Respiratory diseases 

In [18]:
resp = df[['MMWR Year','Jurisdiction of Occurrence',
           'Week Ending Date', 'MMWR Week', 
           'Other diseases of respiratory system (J00-J06,J30-J39,J67,J70-J98)',
            'Chronic lower respiratory diseases (J40-J47)']].copy()
resp['MMWR Year'] =resp['MMWR Year'].astype(str)

In [19]:

plots = []
markers = ['hex', 'star', 'triangle']
years = resp['MMWR Year'].unique().tolist()

for i in range(len(years)):
    
    yearDf = resp.loc[resp['MMWR Year']==years[i]]
    source = ColumnDataSource(yearDf)
    scatterPlot = figure(title = "Scatter plot Other vs Chronic resp diseases, year {}".format(years[i]),
                        x_axis_label='number of death from other resp disease',
                        y_axis_label='number of death from chronic resp desiase')
    
    scatterPlot.scatter("Other diseases of respiratory system (J00-J06,J30-J39,J67,J70-J98)", 
                        "Chronic lower respiratory diseases (J40-J47)",  
                        source=source, legend_group= 'MMWR Year', marker = markers[i], 
                        color = Pastel1_3[i], line_color = 'black', size = 7)
    plots.append(scatterPlot)
    
show(column(*plots))
    

###  Assembling dash

In [24]:
# Time series of chronic desiases 
source = ColumnDataSource(chronic)

chronicTS = figure(x_axis_type="datetime",
                    width=1500, height=500,
                    x_axis_label='Date',
                    y_axis_label='Number of deaths', 
                    title = 'Number of deaths caused by chromic diseases in one State')
chronicTS.add_layout(Legend(), 'right')

for i in range(len(chronicDs)):
    chronicTS.line('Week Ending Date', '{}'.format(chronicDs[i]), source=source, color= Category20_20[i],
                  legend_label = chronicDs[i], line_width=2)

# stacked bar chronick desiase by year 
years = ["2020", "2021", "2022"]
data = {'years' : ["2020", "2021", "2022"]}
for i in chronicDs:
    data[i] = disease.loc[: , i].values 
    
StackedDisease = figure(x_range=years, height=350,width= 1000, title="Amount of deaths cases by chronic desiase in period of 3 years",)
StackedDisease.add_layout(Legend(), 'right')

StackedDisease.vbar_stack(chronicDs, x='years', width=0.9, color=Category10_6, source=data,
              legend_label=chronicDsClean)

StackedDisease.y_range.start = 0
StackedDisease.x_range.range_padding = 0.1
StackedDisease.xgrid.grid_line_color = None
StackedDisease.axis.minor_tick_line_color = None
StackedDisease.outline_line_color = 'black'
StackedDisease.xaxis.major_label_orientation = pi/4

# box plot 
source = ColumnDataSource(sept)
dates = sept['MMWR Year'].astype(str).unique().tolist()
p = figure(x_range=dates, tools="", toolbar_location=None,
           title="Distribution of the septicemia per years amoung states",
           background_fill_color="#eaefef", y_axis_label="Number of patients with Septicemia")

# outlier 
whisker = Whisker(base="MMWR Year", upper="upper", lower="lower", source=source)
whisker.upper_head.size = whisker.lower_head.size = 20
p.add_layout(whisker)

# quantile 
cmap = factor_cmap("MMWR Year", "Set3_3", dates)
p.vbar("MMWR Year", 0.7, "q2", "q3", source=source, color=cmap, line_color="black", )
p.vbar("MMWR Year", 0.7, "q1", "q2", source=source, color=cmap, line_color="black",)

# outliers
outliers = sept[~sept['Septicemia (A40-A41)'].between(sept.lower, sept.upper)]
p.scatter("MMWR Year", "Septicemia (A40-A41)", source=outliers, size=6, color="black", alpha=0.3)

p.xgrid.grid_line_color = None
p.axis.major_label_text_font_size="14px"
p.axis.axis_label_text_font_size="12px"

# scatterplot 
plots = []
markers = ['hex', 'star', 'triangle']
years = resp['MMWR Year'].unique().tolist()

for i in range(len(years)):
    
    yearDf = resp.loc[resp['MMWR Year']==years[i]]
    source = ColumnDataSource(yearDf)
    scatterPlot = figure(title = "Scatter plot Other vs Chronic resp diseases, year {}".format(years[i]),
                        x_axis_label='number of death from other resp disease',
                        y_axis_label='number of death from chronic resp desiase')
    
    scatterPlot.scatter("Other diseases of respiratory system (J00-J06,J30-J39,J67,J70-J98)", 
                        "Chronic lower respiratory diseases (J40-J47)",  
                        source=source, legend_group= 'MMWR Year', marker = markers[i], 
                        color = Pastel1_3[i], line_color = 'black', size = 7)
    plots.append(scatterPlot)

# assemble grid 

chronicTab = gridplot([[chronicTS, None ],[StackedDisease, None], ])
SepticemiaTab = gridplot([[p, None], ])
RespTab = gridplot([plots])

# assemble tab 
tab1 = Panel(child=chronicTab,title="Chronic desiases")
tab2 = Panel(child=SepticemiaTab,title="Septicemia")
tab3 = Panel(child=RespTab,title="Resp desiases")
# assemble the tabs
tabs = Tabs(tabs=[ tab1, tab2, tab3 ])

show(tabs)