In [None]:
# Data from here:
# https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide
!pip install adjustText
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from adjustText import adjust_text
from matplotlib.ticker import ScalarFormatter

from bokeh.models import ColumnDataSource, LabelSet, Arrow, NormalHead, OpenHead, VeeHead, ColorBar, Span
from bokeh.models import Label, Title, NumeralTickFormatter
from bokeh.plotting import figure, output_file, show
from bokeh.transform import linear_cmap
from bokeh.palettes import RdBu, Spectral10, Spectral6



In [None]:
# READ, RENAME
covid_cumulative = pd.read_excel("covid_a_master_cumulative_table.xlsx")
covid_cumulative.drop(columns=['Unnamed: 0'], inplace=True)
covid_cumulative

Unnamed: 0,DateRep,Country,Cases,Deaths,Date,Countries,Population,Continent
0,2019-12-31,Afghanistan,0,0,2019-12-31,Afghanistan,38041757.0,Asia
1,2019-12-31,Algeria,0,0,2019-12-31,Algeria,43053054.0,Africa
2,2019-12-31,Armenia,0,0,2019-12-31,Armenia,2957728.0,Europe
3,2019-12-31,Australia,0,0,2019-12-31,Australia,25203200.0,Oceania
4,2019-12-31,Austria,0,0,2019-12-31,Austria,8858775.0,Europe
...,...,...,...,...,...,...,...,...
38906,2020-08-27,Vietnam,1034,30,2020-08-27,Vietnam,96462108.0,Asia
38907,2020-08-27,Western_Sahara,766,1,2020-08-27,Western_Sahara,582458.0,Africa
38908,2020-08-27,Yemen,1930,560,2020-08-27,Yemen,29161922.0,Asia
38909,2020-08-27,Zambia,11376,282,2020-08-27,Zambia,17861034.0,Africa


## FILTER THE LAST DAY OF THE RESULTS AND PICK THE TOP N IN # OF CASES

In [None]:
# N: HOW MANY TOP COUNTRIES YOU WANT TO PICK IN TERMS OF # OF CASES
N = 50
last_day = covid_cumulative.sort_values('Date', ascending=True).groupby('Countries').last()
last_day['Countries'] = last_day.index
last_day.sort_values('Cases', ascending=False, inplace=True)

top_countries = last_day[:N]['Countries'].tolist() + ['Greece'] 
covid_top_countries = covid_cumulative[covid_cumulative.Countries.isin(top_countries)]
covid_top_countries

Unnamed: 0,DateRep,Country,Cases,Deaths,Date,Countries,Population,Continent
7,2019-12-31,Belarus,0,0,2019-12-31,Belarus,9.452409e+06,Europe
8,2019-12-31,Belgium,0,0,2019-12-31,Belgium,1.145552e+07,Europe
9,2019-12-31,Brazil,0,0,2019-12-31,Brazil,2.110495e+08,America
11,2019-12-31,Canada,0,0,2019-12-31,Canada,3.741104e+07,America
13,2019-12-31,China,27,0,2019-12-31,China,1.433784e+09,Asia
...,...,...,...,...,...,...,...,...
38894,2020-08-27,Turkey,262507,6183,2020-08-27,Turkey,8.200388e+07,Asia
38896,2020-08-27,UAE,68020,378,2020-08-27,UAE,9.770526e+06,Asia
38897,2020-08-27,UK,328846,41465,2020-08-27,UK,6.664711e+07,Europe
38898,2020-08-27,USA,5821876,179714,2020-08-27,USA,3.290649e+08,America


## GENERATE THE RELEVANT METRICS: Case fatality rate, Infection rate per 1m AND Mortality rate per 1m


In [None]:
max_cases = covid_top_countries.groupby(by=['Countries']).max()
max_cases['Case_fatality_rate'] = max_cases['Deaths']/max_cases['Cases']*100
max_cases.sort_values('Cases', ascending=False)
max_cases['Infection_rate_per_1m'] = max_cases['Cases']/max_cases['Population']*1000000
max_cases['Fatality_rate_per_1m'] = max_cases['Deaths']/max_cases['Population']*1000000
max_cases

Unnamed: 0_level_0,DateRep,Country,Cases,Deaths,Date,Population,Continent,Case_fatality_rate,Infection_rate_per_1m,Fatality_rate_per_1m
Countries,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Argentina,2020-08-27,Argentina,370175,7839,2020-08-27,44780680.0,America,2.117647,8266.400629,175.05319
Bangladesh,2020-08-27,Bangladesh,302147,4082,2020-08-27,163046200.0,Asia,1.350998,1853.137639,25.035853
Belarus,2020-08-27,Belarus,70974,657,2020-08-27,9452409.0,Europe,0.925691,7508.562103,69.506091
Belgium,2020-08-27,Belgium,82936,9879,2020-08-27,11455520.0,Europe,11.911594,7239.829116,862.379086
Bolivia,2020-08-27,Bolivia,112094,4726,2020-08-27,11513100.0,America,4.216104,9736.211839,410.48885
Brazil,2020-08-27,Brazil,3717156,117665,2020-08-27,211049500.0,America,3.165458,17612.719601,557.523185
Canada,2020-08-27,Canada,126417,9094,2020-08-27,37411040.0,America,7.193653,3379.136393,243.083338
Chile,2020-08-27,Chile,402365,10990,2020-08-27,18952040.0,America,2.731351,21230.701611,579.884957
China,2020-08-27,China,89784,4713,2020-08-27,1433784000.0,Asia,5.249265,62.620324,3.287107
Colombia,2020-08-27,Colombia,572270,18184,2020-08-27,50339440.0,America,3.177521,11368.222727,361.227676


In [None]:
max_cases.loc[max_cases['Continent'] == 'Asia', 'Color'] =  Spectral10[0]
max_cases.loc[max_cases['Continent'] == 'Europe', 'Color'] =  Spectral10[3]
max_cases.loc[max_cases['Continent'] == 'America', 'Color'] =  Spectral10[6]
max_cases.loc[max_cases['Continent'] == 'Africa', 'Color'] =  Spectral10[9]

In [None]:
Average_infection_rate_per_1m = max_cases['Cases'].sum()/max_cases['Population'].sum()*1000000
print("The average infection rate is: {:.1f} per 1m people".format(Average_infection_rate_per_1m))
Average_death_rate_per_1m = max_cases['Deaths'].sum()/max_cases['Population'].sum()*1000000
print("The average death rate is: {:.1f} per 1m people".format(Average_death_rate_per_1m))
Average_case_fatality_rate = max_cases['Deaths'].sum()/max_cases['Cases'].sum()*100
print("The average mortality rate is: {:.1f} per 100 people infected".format(Average_case_fatality_rate))

The average infection rate is: 3852.8 per 1m people
The average death rate is: 134.2 per 1m people
The average mortality rate is: 3.5 per 100 people infected


In [None]:
source_1 = ColumnDataSource(max_cases)

In [None]:
TITLE = "Infection rate per million vs Fatality rate per million"
TOOLS = "hover, pan, wheel_zoom, box_zoom, reset, save"
p = figure(title=TITLE, tools=TOOLS, toolbar_location="above", plot_width=1200, x_axis_type="log")
p.toolbar.logo = "grey"
p.background_fill_color = "#dddddd"

p.xaxis.axis_label = "Infection rate per 1m"
p.yaxis.axis_label = "Fatality rate per 1m"
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
p.xaxis.major_label_text_font_size = "14pt"
p.yaxis.major_label_text_font_size = "14pt"
p.xaxis.formatter = NumeralTickFormatter(format="0,0")
p.grid.grid_line_color = "white"

# #Use the field name of the column source
# mapper = linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max())

p.circle("Infection_rate_per_1m", "Fatality_rate_per_1m", size=12, source=source_1, 
#          fill_color=linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max()),
         legend_group='Continent',
         color='Color',
         line_color='black',
         fill_alpha=0.8)

p.hover.tooltips = [
    ("Country", "@{Countries}"),
    ("Infection rate per 1m", "@Infection_rate_per_1m{0,0}"),
    ("Fatality rate per 1m", "@Fatality_rate_per_1m{0,0}" ),
    ("Cases", "@Cases{0,0}"),
    ("Deaths", "@Deaths{0,0}"),
]

labels = LabelSet(x="Infection_rate_per_1m", y="Fatality_rate_per_1m", text="Countries", y_offset=8,
                  text_font_size="11px", text_color="black", text_font_style='bold',
                  source=source_1, text_align='center')
p.add_layout(labels)

hline = Span(location=Average_death_rate_per_1m,
             dimension='width', line_color='black',
             line_dash='dashed', line_width=1)
p.add_layout(hline)

vline = Span(location=Average_infection_rate_per_1m,
             dimension='height', line_color='black',
             line_dash='dashed', line_width=1)
p.add_layout(vline)

citation = Label(x=100, y=12, text='Dashed lines show the average of each axis', 
                 text_font_size="12px", 
                 text_color="black", 
                 text_font_style='bold', 
                 render_mode='css',
                 border_line_color='black', 
                 border_line_alpha=1.0,
                 background_fill_color='white', 
                 background_fill_alpha=1.0)
p.add_layout(citation)

# color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0))
# p.add_layout(color_bar, 'right')

output_file("Fig_3_Desai_Ashutosh.html", title="infection rate vs fatality rate")

show(p)