In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import utils

from IPython.display import display, Markdown, Video
from matplotlib import pyplot

In [None]:
countries_list = utils.storage.get_countries()

column_colors = {
    'Confirmed' : '#ae4444',
    'Recovered' : '#44ae44',
    'Deaths' : '#444444'
}

def get_countries_palette():
    normalizer =np.array([1/255,1/255,1/255])
    max_c = 225
    min_c = 23
    countries_num = 194 # don't use len(countries_list), since you can miss changes in this and it could lead to worse palette generation result
    base = np.array([min_c,min_c,min_c])

    colors_steps =np.linspace(0,(max_c-(min_c-1))**3-1,countries_num,dtype=int)
    colors = list()
    r_step = (max_c-min_c)**2
    g_step = (max_c-min_c)

    for step in colors_steps:
        r, rem = divmod(step, r_step)
        g, rem = divmod(rem, g_step)
        b = rem
        colors.append(base+[r,g,b])

    colors = np.array(list(map(lambda x: (x[0],x[1],x[2]), colors * normalizer)), dtype=[('red',float),('green',float), ('blue', float)])
    return np.sort(colors,order=['blue'])

_countries_palette = get_countries_palette()

countries_colors=dict(map(lambda country: (country, _countries_palette[countries_list.index(country)]),countries_list))

del _countries_palette, get_countries_palette

In [None]:
today = utils.last_day
yesterday = today - utils.one_day
this_week = utils.last_week
previous_week = utils.last_week - utils.one_week


date_to_string = lambda d: d.date().strftime('%d %B, %Y')

display(Markdown('# COVID-19 Sandbox report'))
display(Markdown('---'))
#with utils.setlocale_ctx('ru_RU'):
display(Markdown(f"Reports data covers period \
from '_{date_to_string(utils.first_day)}_' \
till '_{date_to_string(utils.last_day)}_'. Reports based on data gathered from Johns Hopkins University and Yandex company."))

display(Markdown("There are several terms that you will oftenly meet in this reports:"))
display(Markdown("\
- Today - the last available day in repots data.\n\
- This week - the last available week that includes 'Today'. **NOTE:** Week starts from Monday.\n\
- SMA - simple moving average. Usually used with number meaning amount of days based on.\
 For instance, SMA-7 means that this is a simple moving average values based on 7 days windows."))

In [None]:
display(Markdown("## World stats"))

world_stats_daily_df = utils.storage.get_countries_report()[
    ['Date','Name','Confirmed_Change','Recovered_Change','Deaths_Change']
    ].groupby('Date').sum().rename(
        columns={'Confirmed_Change':'Confirmed', 'Recovered_Change':'Recovered', 'Deaths_Change':'Deaths'})

world_stats_weekly_df = utils.data.per_week(world_stats_daily_df)

world_daily_max_metrics = dict(map(lambda kvp: (kvp[0],(kvp[1], int(world_stats_daily_df.loc[kvp[1],kvp[0]]))),
         world_stats_daily_df.idxmax().items()))
world_weekly_max_metrics = dict(map(lambda kvp: (kvp[0],(kvp[1], int(world_stats_weekly_df.loc[kvp[1],kvp[0]]))),
         world_stats_weekly_df.idxmax().items()))

people_to_string = lambda number: f"{number:,} people".replace(',', ' ')
week_metrics_to_string = lambda column: f"{people_to_string(world_weekly_max_metrics[column][1])} on a week {date_to_string(world_weekly_max_metrics[column][0])} - {date_to_string(world_weekly_max_metrics[column][0] + utils.one_day*6)}"

display(Markdown("### This day"))
display(Markdown("Number in parenteses represents a previous period - yesterday or previous week"))
display(Markdown("Today we have:"))
display(Markdown(f"- Confirmed: {people_to_string(world_stats_daily_df.loc[today, 'Confirmed'])} ({people_to_string(world_stats_daily_df.loc[yesterday, 'Confirmed'])})\n\
- Recovered: {people_to_string(world_stats_daily_df.loc[today, 'Recovered'])} ({people_to_string(world_stats_daily_df.loc[yesterday, 'Recovered'])})\n\
- Deaths: {people_to_string(world_stats_daily_df.loc[today, 'Deaths'])} ({people_to_string(world_stats_daily_df.loc[yesterday, 'Deaths'])})"))
display(Markdown("During this week:"))
display(Markdown(f"- Confirmed: {people_to_string(world_stats_weekly_df.loc[this_week, 'Confirmed'])} ({people_to_string(world_stats_weekly_df.loc[previous_week, 'Confirmed'])})\n\
- Recovered: {people_to_string(world_stats_weekly_df.loc[this_week, 'Recovered'])} ({people_to_string(world_stats_weekly_df.loc[previous_week, 'Recovered'])})\n\
- Deaths: {people_to_string(world_stats_weekly_df.loc[this_week, 'Deaths'])} ({people_to_string(world_stats_weekly_df.loc[previous_week, 'Deaths'])})"))
display(Markdown("Total from the beginning of reports data:"))
display(Markdown(f"- Confirmed: {people_to_string(world_stats_daily_df['Confirmed'].sum())}\n\
- Recovered: {people_to_string(world_stats_daily_df['Recovered'].sum())}\n\
- Deaths: {people_to_string(world_stats_daily_df['Deaths'].sum())}"))


display(Markdown("#### Max values"))
display(Markdown(f"\
- Daily\n\
    - Confirmed: {people_to_string(world_daily_max_metrics['Confirmed'][1])} on {date_to_string(world_daily_max_metrics['Confirmed'][0])}\n\
    - Recovered: {people_to_string(world_daily_max_metrics['Recovered'][1])} on {date_to_string(world_daily_max_metrics['Recovered'][0])}\n\
    - Deaths: {people_to_string(world_daily_max_metrics['Deaths'][1])} on {date_to_string(world_daily_max_metrics['Deaths'][0])}\n\
- Weekly\n\
    - Confirmed: {week_metrics_to_string('Confirmed')}\n\
    - Recovered: {week_metrics_to_string('Recovered')}\n\
    - Deaths: {week_metrics_to_string('Deaths')}"))

In [None]:
display(Markdown("### World's dynamic"))
display(Markdown("Below is the plot of world cases dynamic. And while deaths is pretty steady, the number of new cases increases."))
confirmed_y_max = 700_000
deaths_y_max = confirmed_y_max / 10
y_ticks = 10
y_minor_ticks = 50

pyplot.figure(figsize=(10,6))
ax = pyplot.subplot(111)

utils.plot.bar_with_sma_line(ax,world_stats_daily_df['Confirmed'],sma_window=10, label='Confirmed', bar_alpha=0.2, color=column_colors['Confirmed'])
ax.set_ylim(0,confirmed_y_max)
ax.set_yticks(np.arange(confirmed_y_max+1,step=confirmed_y_max/y_ticks))
ax.set_yticks(np.arange(confirmed_y_max+1,step=confirmed_y_max/y_minor_ticks), minor=True)
ax.set_ylabel("Confrimed",color=column_colors['Confirmed'])
ax.tick_params(axis='y',labelcolor=column_colors['Confirmed'])
ax.set_xlim(utils.str_to_datetime('01-03-2020'),today)
ax.set_xlabel("Date")
ax.grid(axis='x')

ax = ax.twinx()

utils.plot.bar_with_sma_line(ax,world_stats_daily_df['Deaths'],sma_window=10, label='Deaths', bar_alpha=0.2, color=column_colors['Deaths'])
ax.set_ylim(0,deaths_y_max)
ax.set_yticks(np.arange(deaths_y_max+1,step=deaths_y_max/y_ticks))
ax.set_yticks(np.arange(deaths_y_max+1,step=deaths_y_max/y_minor_ticks), minor=True)
ax.set_ylabel("Deaths",color=column_colors['Deaths'])
ax.tick_params(axis='y',labelcolor=column_colors['Deaths'])

ax.figure.legend(loc='upper left',bbox_to_anchor=(0,0))
ax.figure.tight_layout(pad=3)
ax.figure.suptitle('Confirmed cases and Deaths')
pyplot.show()

display(Markdown("Another plot to show mean value with confidence interval of confirmed cases and deaths per country."))

pyplot.figure(figsize=(10,6))
ax = pyplot.subplot(211)
confirmed_y_max= 4800
deaths_y_max = 160

sns.lineplot(x='Date',y='Confirmed_Change',data=utils.storage.get_countries_report(),ax=ax,color=column_colors['Confirmed'],label='Confirmed',legend=None)
ax.set_ylim(0,confirmed_y_max)
ax.set_yticks(np.arange(confirmed_y_max+1,step=confirmed_y_max/y_ticks))
ax.set_yticks(np.arange(confirmed_y_max+1,step=confirmed_y_max/y_minor_ticks), minor=True)
ax.set_ylabel("Confrimed",color=column_colors['Confirmed'])
ax.tick_params(axis='y',labelcolor=column_colors['Confirmed'])
ax.set_xlim(utils.str_to_datetime('01-03-2020'),today)
ax.set_xlabel("Date")
ax.grid(axis='x')

ax= pyplot.subplot(212)
sns.lineplot(x='Date',y='Deaths_Change',data=utils.storage.get_countries_report(),ax=ax,color=column_colors['Deaths'],label='Deaths',legend=None)
ax.set_ylim(0,deaths_y_max)
ax.set_yticks(np.arange(deaths_y_max+1,step=deaths_y_max/y_ticks))
ax.set_yticks(np.arange(deaths_y_max+1,step=deaths_y_max/y_minor_ticks), minor=True)
ax.set_ylabel("Deaths",color=column_colors['Deaths'])
ax.tick_params(axis='y',labelcolor=column_colors['Deaths'])
ax.set_xlim(utils.str_to_datetime('01-03-2020'),today)
ax.set_xlabel("Date")

ax.figure.legend(loc='upper left',bbox_to_anchor=(0,0))
ax.figure.tight_layout(pad=3)
ax.figure.suptitle('Confirmed cases and Deaths per country')
pyplot.show()

In [None]:
get_top = lambda df,sort_by,top=10: df.sort_values(by=sort_by, axis = 1, ascending = False).astype(int).transpose().head(top).rename_axis(index='Country', columns='Date')

display(Markdown("### TOP-10 lists"))
display(Markdown("#### By Confirmed cases"))
display(Markdown("##### Per day"))
top_10_daily = get_top(utils.storage.get_countries_report_by_column("Confirmed_Change", start_date=today - utils.one_day * 6), today)
top_10_weekly = get_top(utils.data.per_week(utils.storage.get_countries_report_by_column("Confirmed_Change", start_date=today - utils.one_day * 29)), this_week)
display(Markdown("In a table below, you can see countries list ordered by today confirmed cases and their confirmed cases history for the last 7 days."))
display(top_10_daily)

display(Markdown("Next table - is a countries list ordered by this week confirmed cases and their confirmed cases history for the last 4 weeks."))
display(top_10_weekly)

display(Markdown("##### Total"))
top_10_total = get_top(utils.storage.get_countries_report_by_column("Confirmed", start_date=today), today)
top_10_total_per_hundreds = get_top(utils.storage.get_countries_report_by_column("Confirmed", start_date=today).apply(lambda x: utils.data.per_value(x,x.name,per=100_000)),today)
display(Markdown("A table below represents a TOP-10 countries list ordered by Confirmed cases in total."))
display(top_10_total)
display(Markdown("However, this table changes dramatically if we calculate number of cases per 100 000 people in each country"))
display(top_10_total_per_hundreds)

display(Markdown("#### By Recovered cases"))
display(Markdown("Since the statistics for recovered cases is pretty poor, there are no any TOP lists because they will be very misleading."))


display(Markdown("#### By Deaths cases"))
display(Markdown("##### Per day"))
top_10_daily = get_top(utils.storage.get_countries_report_by_column("Deaths_Change", start_date=today - utils.one_day * 6), today)
top_10_weekly = get_top(utils.data.per_week(utils.storage.get_countries_report_by_column("Deaths_Change", start_date=today - utils.one_day * 29)), this_week)
display(Markdown("In a table below, you can see countries list ordered by today confirmed cases and their confirmed cases history for the last 7 days."))
display(top_10_daily)

display(Markdown("Next table - is a countries list ordered by this week confirmed cases and their confirmed cases history for the last 4 weeks."))
display(top_10_weekly)

display(Markdown("##### Total"))
top_10_total = get_top(utils.storage.get_countries_report_by_column("Deaths", start_date=today), today)
top_10_total_per_hundreds = get_top(utils.storage.get_countries_report_by_column("Deaths", start_date=today).apply(lambda x: utils.data.per_value(x,x.name,per=100_000)),today)
display(Markdown("A table below represents a TOP-10 countries list ordered by Confirmed cases in total."))
display(top_10_total)
display(Markdown("However, this table changes dramatically if we calculate number of cases per 100 000 people in each country"))
display(top_10_total_per_hundreds)

In [None]:
display(Markdown("### TOP-5 dynamic"))
display(Markdown(f"Let's build some graphics to see probably interesting TOP-5 countries changes in Confrimed cases and Deaths. I don't show TOP-10 countries because it makes graphs hard to read. All our graphs in this section are started from {date_to_string(utils.str_to_datetime('01-03-2020'))}."))

start_date = utils.str_to_datetime('01-03-2020')

top_5_daily = get_top(utils.storage.get_countries_report_by_column("Confirmed_Change", start_date=start_date), today, 5).transpose()

pyplot.figure(figsize=(10,6))
ax = pyplot.subplot(111)

for country in top_5_daily.columns:
    utils.plot.bar_with_sma_line(ax, top_5_daily[country], 7,country,0.1,countries_colors[country])

ax.set_xlim(start_date,today)
ax.set_ylim(1)
ax.set_yscale('log')
ax.set_ylabel('Cases per day (log scale)')
ax.set_xlabel('Date')

ax.figure.legend(loc='upper left',bbox_to_anchor=(0,0))
ax.figure.tight_layout(pad=3)
ax.figure.suptitle('Confirmed cases per day in TOP-5 countries')
pyplot.show()

top_5_daily = get_top(utils.storage.get_countries_report_by_column("Deaths_Change", start_date=start_date), today, 5).transpose()

pyplot.figure(figsize=(10,6))
ax = pyplot.subplot(111)

for country in top_5_daily.columns:
    utils.plot.bar_with_sma_line(ax, top_5_daily[country], 7,country,0.1,countries_colors[country])

ax.set_xlim(start_date,today)
ax.set_ylim(0,3000)
ax.set_ylabel('Deaths per day')
ax.set_xlabel('Date')
ax.figure.legend(loc='upper left',bbox_to_anchor=(0,0))
ax.figure.tight_layout(pad=3)
ax.figure.suptitle('Deaths per day in TOP-5 countries')
pyplot.show()