In [1]:
!pip install adjustText

Collecting adjustText
  Downloading adjustText-0.7.3.tar.gz (7.5 kB)
Building wheels for collected packages: adjustText
  Building wheel for adjustText (setup.py) ... [?25l[?25hdone
  Created wheel for adjustText: filename=adjustText-0.7.3-py3-none-any.whl size=7097 sha256=fc7b3dbb69c18f2805fb2b082493dbf651365e8cef822356904a545d3b370c95
  Stored in directory: /root/.cache/pip/wheels/2f/98/32/afbf902d8f040fadfdf0a44357e4ab750afe165d873bf5893d
Successfully built adjustText
Installing collected packages: adjustText
Successfully installed adjustText-0.7.3


In [2]:
# Data from here:
# https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from adjustText import adjust_text
from matplotlib.ticker import ScalarFormatter

from bokeh.models import ColumnDataSource, LabelSet, Arrow, NormalHead, OpenHead, VeeHead, ColorBar, Span
from bokeh.models import Label, Title, NumeralTickFormatter
from bokeh.plotting import figure, output_file, show
from bokeh.transform import linear_cmap
from bokeh.palettes import RdBu, Spectral10, Spectral6

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
covid_cumulative = pd.read_excel('/content/gdrive/MyDrive/Colab Notebooks/covid_a_master_cumulative_table.xlsx')
covid_cumulative.drop(columns=['Unnamed: 0'], inplace=True)

In [5]:
# # READ, RENAME
# covid_cumulative = pd.read_excel("covid_a_master_cumulative_table.xlsx")
# covid_cumulative.drop(columns=['Unnamed: 0'], inplace=True)

## FILTER THE LAST DAY OF THE RESULTS AND PICK THE TOP N IN # OF CASES

In [6]:
# N: HOW MANY TOP COUNTRIES YOU WANT TO PICK IN TERMS OF # OF CASES
N = 50
last_day = covid_cumulative.sort_values('Date', ascending=True).groupby('Countries').last()
last_day['Countries'] = last_day.index
last_day.sort_values('Cases', ascending=False, inplace=True)

top_countries = last_day[:N]['Countries'].tolist() + ['Greece']
covid_top_countries = covid_cumulative[covid_cumulative.Countries.isin(top_countries)]

## GENERATE THE RELEVANT METRICS: Case fatality rate, Infection rate per 1m AND Mortality rate per 1m


In [7]:
max_cases = covid_top_countries.groupby(by=['Countries']).max()
max_cases['Case_fatality_rate'] = max_cases['Deaths']/max_cases['Cases']*100
max_cases.sort_values('Cases', ascending=False)
max_cases['Infection_rate_per_1m'] = max_cases['Cases']/max_cases['Population']*1000000
max_cases['Mortality_rate_per_1m'] = max_cases['Deaths']/max_cases['Population']*1000000
# max_cases.to_excel('covid_b_cases_cumulative_table.xlsx')

In [8]:
max_cases.loc[max_cases['Continent'] == 'Asia', 'Color'] =  Spectral10[0]
max_cases.loc[max_cases['Continent'] == 'Europe', 'Color'] =  Spectral10[3]
max_cases.loc[max_cases['Continent'] == 'America', 'Color'] =  Spectral10[6]
max_cases.loc[max_cases['Continent'] == 'Africa', 'Color'] =  Spectral10[9]

In [9]:
Average_infection_rate_per_1m = max_cases['Cases'].sum()/max_cases['Population'].sum()*1000000
print("The average infection rate is: {:.1f} per 1m people".format(Average_infection_rate_per_1m))
Average_death_rate_per_1m = max_cases['Deaths'].sum()/max_cases['Population'].sum()*1000000
print("The average death rate is: {:.1f} per 1m people".format(Average_death_rate_per_1m))
Average_case_fatality_rate = max_cases['Deaths'].sum()/max_cases['Cases'].sum()*100
print("The average fatality rate is: {:.1f} per 100 people infected".format(Average_case_fatality_rate)) 

The average infection rate is: 3852.8 per 1m people
The average death rate is: 134.2 per 1m people
The average fatality rate is: 3.5 per 100 people infected


In [10]:
source_1 = ColumnDataSource(max_cases) # convert pandas to bokeh data

In [11]:
TITLE = "Infection rate vs Case fatality rate"
TOOLS = "hover, pan, wheel_zoom, box_zoom, reset, save"
p = figure(title=TITLE, tools=TOOLS, toolbar_location="above", plot_width=1200, x_axis_type="log")
p.toolbar.logo = "grey"
p.background_fill_color = "#dddddd"

p.xaxis.axis_label = "Infection rate per 1m"
p.yaxis.axis_label = "Case fatality rate (%)"
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
p.xaxis.major_label_text_font_size = "14pt"
p.yaxis.major_label_text_font_size = "14pt"
p.xaxis.formatter = NumeralTickFormatter(format="0,0")
p.grid.grid_line_color = "white"

# #Use the field name of the column source
# mapper = linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max())

p.circle("Infection_rate_per_1m", "Case_fatality_rate", size=12, source=source_1, # call the glyph method
#          fill_color=linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max()),
         legend_group='Continent',
         color='Color',
         line_color='black',
         fill_alpha=0.8)

p.hover.tooltips = [
    ("Country", "@{Countries}"),
    ("Infection rate per 1m", "@Infection_rate_per_1m{0,0}"),
    ("Case fatality rate (%)", "@Case_fatality_rate{0.1}" ), 
    ("Cases", "@Cases{0,0}"),
    ("Deaths", "@Deaths{0,0}"),
]

labels = LabelSet(x="Infection_rate_per_1m", y="Case_fatality_rate", text="Countries", y_offset=8,
                  text_font_size="11px", text_color="black", text_font_style='bold',
                  source=source_1, text_align='center')
p.add_layout(labels) # adds the newly created component to the existing plot

hline = Span(location=Average_case_fatality_rate,
             dimension='width', line_color='black',
             line_dash='dashed', line_width=1)
p.add_layout(hline) # adds the newly created component to the existing plot

vline = Span(location=Average_infection_rate_per_1m,
             dimension='height', line_color='black',
             line_dash='dashed', line_width=1)
p.add_layout(vline)

citation = Label(x=100, y=12, text='Dashed lines show the average of each axis', 
                 text_font_size="12px", 
                 text_color="black", 
                 text_font_style='bold', 
                 render_mode='css',
                 border_line_color='black', 
                 border_line_alpha=1.0,
                 background_fill_color='white', 
                 background_fill_alpha=1.0)
p.add_layout(citation)

# color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0))
# p.add_layout(color_bar, 'right')

output_file("infection_rate_vs_case_fatality.html", title="infection rate vs case fatality")

show(p)

# 7 Task #3

In [12]:
max_cases['Fatality_rate_per_1m'] = max_cases['Deaths']/max_cases['Cases']*1000000 # fatality rate per 1m
Average_case_fatality_rate_1m = max_cases['Deaths'].sum()/max_cases['Cases'].sum()*1000000 # average fatality rate per 1m
print("The average fatality rate per 1m is: {:.1f} per 1m people".format(Average_case_fatality_rate_1m)) 

The average fatality rate per 1m is: 34843.2 per 1m people


In [13]:
source_2 = ColumnDataSource(max_cases) # convert pandas to bokeh data

In [14]:
TITLE = "Infection Rate per 1m vs Fatality Rate per 1m"
TOOLS = "hover, pan, wheel_zoom, box_zoom, reset, save"
pp = figure(title=TITLE, tools=TOOLS, toolbar_location="above", plot_width=1200, x_axis_type="log", y_axis_type = 'log') # both axes are in log scale
pp.toolbar.logo = "grey"
pp.background_fill_color = "#dddddd"

pp.xaxis.axis_label = "Infection Rate per 1m"
pp.yaxis.axis_label = "Fatality Rate per 1m"
pp.xaxis.axis_label_text_font_size = "14pt"
pp.yaxis.axis_label_text_font_size = "14pt"
pp.xaxis.major_label_text_font_size = "14pt"
pp.yaxis.major_label_text_font_size = "14pt"
pp.xaxis.formatter = NumeralTickFormatter(format="0,0")
pp.yaxis.formatter = NumeralTickFormatter(format="0,0")
# pp.yaxis.formatter.use_scientific = False
pp.grid.grid_line_color = "white"

# #Use the field name of the column source
# mapper = linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max())

pp.circle("Infection_rate_per_1m", "Fatality_rate_per_1m", size=12, source=source_2, # call the glyph method
#          fill_color=linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max()),
         legend_group='Continent',
         color='Color',
         line_color='black',
         fill_alpha=0.8)

pp.hover.tooltips = [
    ("Country", "@{Countries}"),
    ("Infection rate per 1m", "@Infection_rate_per_1m{0,0}"), # {} is for formatter
    ("Fatality rate per 1m", "@Fatality_rate_per_1m{0,1}" ), 
    ("Cases", "@Cases{0,0}"),
    ("Deaths", "@Deaths{0,0}"),
]

labels = LabelSet(x="Infection_rate_per_1m", y="Fatality_rate_per_1m", text="Countries", y_offset=8,
                  text_font_size="11px", text_color="black", text_font_style='bold',
                  source=source_2, text_align='center')
pp.add_layout(labels) # adds the newly created component to the existing plot

hline = Span(location=Average_case_fatality_rate_1m, # new hline
             dimension='width', line_color='black',
             line_dash='dashed', line_width=1)
pp.add_layout(hline) # adds the newly created component to the existing plot

vline = Span(location=Average_infection_rate_per_1m,
             dimension='height', line_color='black',
             line_dash='dashed', line_width=1)
pp.add_layout(vline)

citation = Label(x=100, y=12, text='Dashed lines show the average of each axis', 
                 text_font_size="12px", 
                 text_color="black", 
                 text_font_style='bold', 
                 render_mode='css',
                 border_line_color='black', 
                 border_line_alpha=1.0,
                 background_fill_color='white', 
                 background_fill_alpha=1.0)
pp.add_layout(citation)

# color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0))
# p.add_layout(color_bar, 'right')


output_file("Fig_3_Tjoa_Fendi.html", title="Infection Rate per 1m vs Fatality Rate per 1m")

show(pp)