In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import utils


from IPython.display import display, Markdown, HTML
from matplotlib import pyplot
from matplotlib.transforms import Bbox

In [None]:
def video(filename, alt):
    return HTML(f"""
        <video alt="{alt}" controls style="max-width: 640px; margin-left: 1em; margin-right: 1em; width: 100%">
            <source src="assets/video/{filename}.mp4" type="video/mp4">
        </video>
        """)

def image(filename, alt):
    return HTML(f"""
        <img alt="{alt}" src="assets/img/{filename}.png" style="background: white;" />
        """)

In [None]:
countries_list = utils.storage.get_countries()

column_colors = {
    'Confirmed' : '#ae4444',
    'Recovered' : '#44ae44',
    'Deaths' : '#444444'
}

figure_size = (10,6)

def get_countries_palette():
    normalizer =np.array([1/255,1/255,1/255])
    max_c = 225
    min_c = 23
    countries_num = 194 # don't use len(countries_list), since you can miss changes in this and it could lead to worse palette generation result
    base = np.array([min_c,min_c,min_c])

    colors_steps =np.linspace(0,(max_c-(min_c-1))**3-1,countries_num,dtype=int)
    colors = list()
    r_step = (max_c-min_c)**2
    g_step = (max_c-min_c)

    for step in colors_steps:
        r, rem = divmod(step, r_step)
        g, rem = divmod(rem, g_step)
        b = rem
        colors.append(base+[r,g,b])

    colors = np.array(list(map(lambda x: (x[0],x[1],x[2]), colors * normalizer)), dtype=[('red',float),('green',float), ('blue', float)])
    return np.sort(colors,order=['blue'])

_countries_palette = get_countries_palette()

countries_colors=dict(map(lambda country: (country, _countries_palette[countries_list.index(country)]),countries_list))

del _countries_palette, get_countries_palette

if not os.path.exists('./assets'):
    os.mkdir('./assets')
    os.mkdir('./assets/img')
elif not os.path.exists('./assets/img'):
    os.mkdir('./assets/img')

In [None]:
today = utils.last_day
yesterday = today - utils.one_day
this_week = utils.last_week
previous_week = utils.last_week - utils.one_week

date_to_string = lambda d: d.date().strftime('%d %B %Y')

display(HTML("""<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.css">"""))
display(Markdown(f"""
# COVID-19 Sandbox report / {date_to_string(utils.last_day)}
---

Report's data covers a period from '_{date_to_string(utils.first_day)}_' till '_{date_to_string(utils.last_day)}_'.
Report based on data gathered from Johns Hopkins University and Yandex company.

There are several terms that you will often meet in the report:
- Today - the last available day in the report's data.
- This week - the latest available week includes 'Today.' **NOTE:** Week starts from Monday.
- SMA - simple moving average. Usually used with number meaning amount of days based on.
 For instance, SMA-7 means that this is a simple moving average value based on seven days windows."""))

In [None]:
world_stats_daily_df = utils.storage.get_countries_report()[
    ['Date','Name','Confirmed_Change','Recovered_Change','Deaths_Change']
    ].groupby('Date').sum().rename(
        columns={'Confirmed_Change':'Confirmed', 'Recovered_Change':'Recovered', 'Deaths_Change':'Deaths'})

world_stats_weekly_df = utils.data.per_week(world_stats_daily_df)

world_daily_max_metrics = dict(map(lambda kvp: (kvp[0],(kvp[1], int(world_stats_daily_df.loc[kvp[1],kvp[0]]))),
         world_stats_daily_df.idxmax().items()))
world_weekly_max_metrics = dict(map(lambda kvp: (kvp[0],(kvp[1], int(world_stats_weekly_df.loc[kvp[1],kvp[0]]))),
         world_stats_weekly_df.idxmax().items()))

people_to_string = lambda number: f"{number:,} people".replace(',', ' ')
week_metrics_to_string = lambda column: f"{people_to_string(world_weekly_max_metrics[column][1])} on a week {date_to_string(world_weekly_max_metrics[column][0])} - {date_to_string(world_weekly_max_metrics[column][0] + utils.one_day*6)}"

display(Markdown(f"""
## World stats
### This day
The number in parentheses represents a previous period - yesterday or the last week
- Today we have:
    - Confirmed: {people_to_string(world_stats_daily_df.loc[today, 'Confirmed'])} ({people_to_string(world_stats_daily_df.loc[yesterday, 'Confirmed'])})
    - Recovered: {people_to_string(world_stats_daily_df.loc[today, 'Recovered'])} ({people_to_string(world_stats_daily_df.loc[yesterday, 'Recovered'])})
    - Deaths: {people_to_string(world_stats_daily_df.loc[today, 'Deaths'])} ({people_to_string(world_stats_daily_df.loc[yesterday, 'Deaths'])})
- During this week:
    - Confirmed: {people_to_string(world_stats_weekly_df.loc[this_week, 'Confirmed'])} ({people_to_string(world_stats_weekly_df.loc[previous_week, 'Confirmed'])})
    - Recovered: {people_to_string(world_stats_weekly_df.loc[this_week, 'Recovered'])} ({people_to_string(world_stats_weekly_df.loc[previous_week, 'Recovered'])})
    - Deaths: {people_to_string(world_stats_weekly_df.loc[this_week, 'Deaths'])} ({people_to_string(world_stats_weekly_df.loc[previous_week, 'Deaths'])})
- Total from the beginning of reports data:
    - Confirmed: {people_to_string(world_stats_daily_df['Confirmed'].sum())}
    - Recovered: {people_to_string(world_stats_daily_df['Recovered'].sum())}
    - Deaths: {people_to_string(world_stats_daily_df['Deaths'].sum())}

### Max values
- Daily
    - Confirmed: {people_to_string(world_daily_max_metrics['Confirmed'][1])} on {date_to_string(world_daily_max_metrics['Confirmed'][0])}
    - Recovered: {people_to_string(world_daily_max_metrics['Recovered'][1])} on {date_to_string(world_daily_max_metrics['Recovered'][0])}
    - Deaths: {people_to_string(world_daily_max_metrics['Deaths'][1])} on {date_to_string(world_daily_max_metrics['Deaths'][0])}
- Weekly
    - Confirmed: {week_metrics_to_string('Confirmed')}
    - Recovered: {week_metrics_to_string('Recovered')}
    - Deaths: {week_metrics_to_string('Deaths')}
"""))

In [None]:
display(Markdown("""
### World's dynamic
Below is the plot of world cases dynamic. And while deaths are pretty steady, the number of new cases increases.
"""))
confirmed_y_max = 50_000 * int(np.ceil(world_stats_daily_df['Confirmed'].max() / 50_000))
deaths_y_max = confirmed_y_max / 10
y_ticks = 10
y_minor_ticks = 50

pyplot.figure(figsize=figure_size)
ax = pyplot.subplot(111)

utils.plot.bar_with_sma_line(ax,world_stats_daily_df['Confirmed'],sma_window=10, label='Confirmed', bar_alpha=0.2, color=column_colors['Confirmed'])
ax.set_ylim(0,confirmed_y_max)
ax.set_yticks(np.arange(confirmed_y_max+1,step=confirmed_y_max/y_ticks))
ax.set_yticks(np.arange(confirmed_y_max+1,step=confirmed_y_max/y_minor_ticks), minor=True)
ax.set_ylabel("Confrimed",color=column_colors['Confirmed'])
ax.tick_params(axis='y',labelcolor=column_colors['Confirmed'])
ax.set_xlim(utils.str_to_datetime('01-03-2020'),today)
ax.set_xlabel("Date")
ax.grid(axis='x')

ax = ax.twinx()

utils.plot.bar_with_sma_line(ax,world_stats_daily_df['Deaths'],sma_window=10, label='Deaths', bar_alpha=0.2, color=column_colors['Deaths'])
ax.set_ylim(0,deaths_y_max)
ax.set_yticks(np.arange(deaths_y_max+1,step=deaths_y_max/y_ticks))
ax.set_yticks(np.arange(deaths_y_max+1,step=deaths_y_max/y_minor_ticks), minor=True)
ax.set_ylabel("Deaths",color=column_colors['Deaths'])
ax.tick_params(axis='y',labelcolor=column_colors['Deaths'])

ax.figure.legend(loc='upper left',bbox_to_anchor=(0,0))
ax.figure.tight_layout(pad=3)
ax.figure.suptitle('Confirmed cases and Deaths')
pyplot.savefig('./assets/img/confirmed_and_deaths.png', bbox_inches=Bbox([[-.375,-.75],[figure_size[0],figure_size[1]]]), pad_inches=0)
pyplot.close()
display(image('confirmed_and_deaths', 'Confirmed cases and Deaths'))

display(Markdown("Another plot to show mean value with a confidence interval of confirmed cases and deaths of all countries."))

pyplot.figure(figsize=figure_size)
ax = pyplot.subplot(211)
confirmed_y_max= 9600
deaths_y_max = 240

sns.lineplot(x='Date',y='Confirmed_Change',data=utils.storage.get_countries_report(),ax=ax,color=column_colors['Confirmed'],label='Confirmed',legend=None)
ax.set_ylim(0,confirmed_y_max)
ax.set_yticks(np.arange(confirmed_y_max+1,step=confirmed_y_max/y_ticks))
ax.set_yticks(np.arange(confirmed_y_max+1,step=confirmed_y_max/y_minor_ticks), minor=True)
ax.set_ylabel("Confrimed",color=column_colors['Confirmed'])
ax.tick_params(axis='y',labelcolor=column_colors['Confirmed'])
ax.set_xlim(utils.str_to_datetime('01-03-2020'),today)
ax.set_xlabel("Date")
ax.grid(axis='x')

ax = pyplot.subplot(212)
sns.lineplot(x='Date',y='Deaths_Change',data=utils.storage.get_countries_report(),ax=ax,color=column_colors['Deaths'],label='Deaths',legend=None)
ax.set_ylim(0,deaths_y_max)
ax.set_yticks(np.arange(deaths_y_max+1,step=deaths_y_max/y_ticks))
ax.set_yticks(np.arange(deaths_y_max+1,step=deaths_y_max/y_minor_ticks), minor=True)
ax.set_ylabel("Deaths",color=column_colors['Deaths'])
ax.tick_params(axis='y',labelcolor=column_colors['Deaths'])
ax.set_xlim(utils.str_to_datetime('01-03-2020'),today)
ax.set_xlabel("Date")

ax.figure.legend(loc='upper left',bbox_to_anchor=(0,0))
ax.figure.tight_layout(pad=3)
ax.figure.suptitle('Confirmed cases and Deaths per country')
pyplot.savefig('./assets/img/confirmed_and_deaths_per_country.png', bbox_inches=Bbox([[-.375,-.75],[figure_size[0],figure_size[1]]]), pad_inches=0)
pyplot.close()
display(image('confirmed_and_deaths_per_country', 'Confirmed cases and Deaths per country'))

display(Markdown("In addition you can see the world dynamic in the following video:"))
for name, alt in [
    ("world_confirmed", "World confirmed cases"),
    ("world_confirmed_100k", "World confirmed cases per 100 000 people"),
    ("world_deaths", "World death cases"),
    ("world_deaths_100k", "World death cases per 100 000 people")
    ]:
    display(Markdown(f"- {alt}"))
    display(video(name, alt))    


In [None]:
get_top = lambda df,sort_by,top=10: df.sort_values(by=sort_by, axis = 1, ascending = False).astype(int).transpose().head(top).rename_axis(index='Country', columns='Date')

display(Markdown("""
### TOP-10 lists
#### By Confirmed cases
##### Per day
"""))
top_10_daily = get_top(utils.storage.get_countries_report_by_column("Confirmed_Change", start_date=today - utils.one_day * 6), today)
top_10_weekly = get_top(utils.data.per_week(utils.storage.get_countries_report_by_column("Confirmed_Change", start_date=today - utils.one_day * 29)), this_week)
display(Markdown("You can see the countries list ordered by today's confirmed cases and their confirmed cases history for the last seven days in the table below."))
display(top_10_daily)

display(Markdown("The next table - is a country list ordered by this week's confirmed cases and their confirmed cases history for the last four weeks."))
display(top_10_weekly)

display(Markdown("##### Total"))
top_10_total = get_top(utils.storage.get_countries_report_by_column("Confirmed", start_date=today), today)
top_10_total_per_hundreds = get_top(utils.storage.get_countries_report_by_column("Confirmed", start_date=today).apply(lambda x: utils.data.per_value(x,x.name,per=100_000)),today)
display(Markdown("A table below represents a TOP-10 countries list ordered by Confirmed cases in total."))
display(top_10_total)
display(Markdown("However, the previous table changes dramatically if we calculate the number of cases per 100 000 people in each country."))
display(top_10_total_per_hundreds)

display(Markdown("""
#### By Recovered cases
Since the statistics for recovered cases are pretty poor, there are no TOP lists because they will be very misleading.
"""))


display(Markdown("""
#### By Deaths cases
##### Per day
"""))
top_10_daily = get_top(utils.storage.get_countries_report_by_column("Deaths_Change", start_date=today - utils.one_day * 6), today)
top_10_weekly = get_top(utils.data.per_week(utils.storage.get_countries_report_by_column("Deaths_Change", start_date=today - utils.one_day * 29)), this_week)
display(Markdown("You can see the countries list ordered by today's deaths cases and their deaths cases history for the last seven days in the table below."))
display(top_10_daily)

display(Markdown("The next table - is a country list ordered by this week's deaths cases and their confirmed cases history for the last four weeks."))
display(top_10_weekly)

display(Markdown("##### Total"))
top_10_total = get_top(utils.storage.get_countries_report_by_column("Deaths", start_date=today), today)
top_10_total_per_hundreds = get_top(utils.storage.get_countries_report_by_column("Deaths", start_date=today).apply(lambda x: utils.data.per_value(x,x.name,per=100_000)),today)
display(Markdown("A table below represents a TOP-10 countries list ordered by Confirmed cases in total."))
display(top_10_total)
display(Markdown("However, the previous table changes dramatically if we calculate number of cases per 100 000 people in each country"))
display(top_10_total_per_hundreds)

In [None]:
display(Markdown("### TOP-5 dynamic"))
display(Markdown(f"Let's build some graphics to see probably interesting TOP-5 countries changes in Confrimed cases and Deaths. I don't show TOP-10 countries because it makes graphs hard to read. All our graphs in this section are started from {date_to_string(utils.str_to_datetime('01-03-2020'))}."))

start_date = utils.str_to_datetime('01-03-2020')

top_5_daily = get_top(utils.storage.get_countries_report_by_column("Confirmed_Change", start_date=start_date), today, 5).transpose()

pyplot.figure(figsize=figure_size)
ax = pyplot.subplot(111)

for country in top_5_daily.columns:
    utils.plot.bar_with_sma_line(ax, top_5_daily[country], 7,country,0.1,countries_colors[country])

ax.set_xlim(start_date,today)
ax.set_ylim(1)
ax.set_yscale('log')
ax.set_ylabel('Cases per day (log scale)')
ax.set_xlabel('Date')

ax.figure.legend(loc='upper left',bbox_to_anchor=(0,0))
ax.figure.tight_layout(pad=3)
ax.figure.suptitle('Confirmed cases per day in TOP-5 countries')
pyplot.savefig('./assets/img/top5_confirmed_per_day.png', bbox_inches=Bbox([[-.375,-1.5],[figure_size[0],figure_size[1]]]), pad_inches=0)
pyplot.close()
display(image('top5_confirmed_per_day', 'Confirmed cases per day in TOP-5 countries'))

top_5_daily = get_top(utils.storage.get_countries_report_by_column("Deaths_Change", start_date=start_date), today, 5).transpose()

pyplot.figure(figsize=figure_size)
ax = pyplot.subplot(111)

for country in top_5_daily.columns:
    utils.plot.bar_with_sma_line(ax, top_5_daily[country], 7,country,0.1,countries_colors[country])

ax.set_xlim(start_date,today)
ax.set_ylim(0,5000)
ax.set_ylabel('Deaths per day')
ax.set_xlabel('Date')
ax.figure.legend(loc='upper left',bbox_to_anchor=(0,0))
ax.figure.tight_layout(pad=3)
ax.figure.suptitle('Deaths per day in TOP-5 countries')
pyplot.savefig('./assets/img/top5_deaths_per_day.png', bbox_inches=Bbox([[-.375,-1.5],[figure_size[0],figure_size[1]]]), pad_inches=0)
pyplot.close()
display(image('top5_deaths_per_day', 'Deaths per day in TOP-5 countries'))

In [None]:
display(Markdown("""
## References
- COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University (https://github.com/CSSEGISandData/COVID-19)
- Data for Russia from March till end of August retrieved from Yandex Datalens Covid Dashboard (https://datalens.yandex/7o7is1q6ikh23)
- Made with Natural Earth. Free vector and raster map data @ [naturalearthdata.com](https://naturalearthdata.com).
---
"""))
display(HTML("""
<p style="font-size:var(--jp-content-font-size3); color:#4f4f4f; text-align:right">
<i>
Created by 
<a href="https://www.linkedin.com/in/guest512/" class="fab fa-linkedin" style="color:#0A66C2; text-decoration:none"></a><a href="https://github.com/guest512" class="fab fa-github-square" style="color:#24292E; text-decoration:none"></a>Denis Legashov.
Source code available on <a href="https://github.com/guest512/CovidSandbox">GitHub</a>
</i>
</p>
"""))