In [None]:
import pandas as pd
import numpy as np
from utils import storage, plot, helpers

from IPython.display import display, Markdown
from matplotlib import pyplot



In [None]:
start_day, finish_day = storage.get_dates()
today = finish_day
one_day = pd.Timedelta('1 days')
yesterday = today - one_day

display(Markdown('# Песочница по статистике по коронавирусу COVID-19'))
display(Markdown('---'))
with helpers.setlocale_ctx('ru'):
    display(Markdown(f"DISCLAIMER: Данные в отчетах охватывают период с '_{start_day.date().strftime('%d %B, %Y')}_' по '_{finish_day.date().strftime('%d %B, %Y')}_'. \
        За \"Сегодня\" принят последний доступный день в отчетах"))

In [None]:
display(Markdown("## Топ десять стран по количеству заболевших на \"Сегодня\""))
display(Markdown("В таблице представлены ТОП-10 стран отсортированны по количеству заболевших на \"Сегодня\", а также с данными по количеству заболевших за последние 7 дней"))

storage.get_countries_report_by_column("Confirmed_Change", start_date=today - one_day * 6).fillna(0).sort_values(by=today, axis = 1).iloc[:,-10:]

In [None]:
display(Markdown("## Динамика ТОП-5 стран по количеству заболевших на \"Сегодня\""))
display(Markdown("Здесь и ниже суффикс SMA означает среднее количество за N дней."))

countries_list = list(storage.get_countries_report_by_column("Confirmed_Change", start_date=today).fillna(0).sort_values(by=today, axis = 1).iloc[:,-5:].columns)

pyplot.figure(figsize=(18,10))

ax = pyplot.subplot(111)
ax.xaxis_date()

column_name = 'Confirmed_Change'

for country in countries_list:
    country_df = storage.get_country_report(country)
   
    country_df[column_name+'_SMA3'] = country_df[column_name].rolling(window=3).mean()
    ax.bar(country_df.index, country_df[column_name].values, alpha=0.1)
    ax.plot(country_df.index, country_df[column_name+'_SMA3'].values, label=country + '-SMA3') 

ax.legend()
ax.set_xlim(pd.to_datetime('15-03-2020', dayfirst=True))
ax.set_ylim(bottom=100)
pyplot.yscale("log")
pyplot.show()

In [None]:
pyplot.figure(figsize=(18,10))

ax = pyplot.subplot(111)
ax.xaxis_date()

all_countries = storage.get_countries()

for country in all_countries:
    country_df = storage.get_country_report(country)

    country_df['Confirmed_Change_Normalized'] = helpers.normalize(country_df['Confirmed_Change'])
    country_df['Confirmed_Change_SMA7'] = helpers.normalize(country_df['Confirmed_Change'].rolling(window=7).mean())
    country_df['Confirmed_Change_SMA14'] = helpers.normalize(country_df['Confirmed_Change'].rolling(window=14).mean())

    ax.bar(country_df.index, country_df['Confirmed_Change_Normalized'].values, alpha = 1/len(all_countries), color = "Blue")

ax.set_xlim(pd.to_datetime('01-03-2020', dayfirst=True))
ax.set_ylim(bottom=0, top = 1)
pyplot.title("Нормализованное количество заболевших в день")
pyplot.show()


In [None]:
column_name = "Confirmed_Change"
start_date = pd.to_datetime('01-04-2020', dayfirst=True)

moscow_df = storage.get_region_report("Russia", "Москва")[start_date:]
regions_df = storage.get_regions_report_by_column("Russia", column_name, exclude=["Москва"], start_date=start_date)


In [None]:
pyplot.figure(figsize=(18,10))

ax = pyplot.subplot(111)
ax.xaxis_date()

regions_df['Total'] = 0
regions_df['Total'] = regions_df.sum(axis = 1)

ax.bar(regions_df.index, regions_df['Total'].values, label = 'Регионы', alpha=0.3)
ax.plot(regions_df.index, regions_df['Total'].rolling(window=3).mean(), label= 'Регионы-SMA3')

ax.bar(moscow_df.index, moscow_df[column_name].values, label = 'Москва', alpha = 0.3)
ax.plot(moscow_df.index, moscow_df[column_name].rolling(window=3).mean(), label= 'Москва-SMA3')


plot.draw_key_russian_dates(ax)

#ax.set_ylim(bottom=5000, top = 6500)
ax.legend(loc='upper left')
ax.set_title('Количество заболевших в день в Москве и остальной России')
ax.grid(axis='y', color='black', linestyle='dashed', alpha=0.4)
pyplot.show()

In [None]:
pyplot.figure(figsize=(20,15))

ax = pyplot.subplot(111)
ax.xaxis_date()

column_name = 'Confirmed_Change'

for region_name in ["Санкт-Петербург", "Ростовская обл.", "Краснодарский край", "Хабаровский край"]:
    region_df = storage.get_region_report("Russia", region_name)[start_date:]

    ax.bar(region_df.index, region_df[column_name].values, label = region_name+'-'+column_name, alpha=0.3)
    ax.plot(
        region_df.index,
        region_df[column_name].rolling(window=3).mean().values,
        label = region_name+'-'+column_name+'-SMA3',
        alpha=0.9)

plot.draw_key_russian_dates(ax)

#ax.set_ylim(bottom=5000, top = 6500)
ax.legend(loc='upper left')
pyplot.show()

In [None]:
def foo(df: pd.DataFrame, name: str):
    dfdf = df
    dfdf_weekly = dfdf.resample("1W").sum()
    dfdf_monthly = dfdf.resample("1M").sum()

    index = dfdf.index
    index_weekly = dfdf_weekly.index
    index_monthly = dfdf_monthly.index

    confirmed_daily = dfdf.Confirmed_Change
    confirmed_daily_SMA7 = confirmed_daily.rolling(window=7).mean()

    recovered_daily = dfdf.Recovered_Change
    recovered_daily_SMA7 = recovered_daily.rolling(window=7).mean()

    deaths_daily = dfdf.Deaths_Change
    deaths_daily_SMA7 = deaths_daily.rolling(window=7).mean()

    confirmed_weekly = dfdf_weekly.Confirmed_Change
    confirmed_monthly = dfdf_monthly.Confirmed_Change

    recovered_weekly = dfdf_weekly.Recovered_Change
    recovered_monthly = dfdf_monthly.Recovered_Change

    deaths_weekly = dfdf_weekly.Deaths_Change
    deaths_monthly = dfdf_monthly.Deaths_Change


    active = dfdf.Active
    active_SMA7 = active.rolling(window=7).mean()


    pyplot.figure(figsize=(18*2,10*2))
    ax = pyplot.subplot(2,2,1)

    ax.bar(index, confirmed_daily, label = 'Заболевшие', alpha=0.3)
    ax.plot(index, confirmed_daily_SMA7, label= 'Заболевшие-SMA7')

    ax.bar(index, recovered_daily, label = 'Выздоровевшие', alpha = 0.3)
    ax.plot(index, recovered_daily_SMA7, label= 'Выздоровевшие-SMA7')

    ax.bar(index, deaths_daily, label = 'Смерти', alpha = 0.3, bottom = recovered_daily)
    ax.plot(index, recovered_daily_SMA7+deaths_daily_SMA7, label= 'Смерти-SMA7')

    plot.setup_axes_for_russian_regions_stat(ax, "Статистика день ко дню")

    ax = pyplot.subplot(2,2,2)

    ax.bar(index, active, label = 'Больные', alpha=0.3)
    ax.plot(index, active_SMA7, label= 'Больные-SMA7')

    plot.setup_axes_for_russian_regions_stat(ax, "Количество больных")

    ax = pyplot.subplot(2,2,3)

    ax.bar(index_weekly, confirmed_weekly, label = 'Заболевшие', width = 2)
    ax.bar(index_weekly + one_day, recovered_weekly, label = 'Выздоровевшие', width = 2)
    ax.bar(index_weekly + one_day * 2, deaths_weekly, label = 'Смерти', width = 2)


    plot.setup_axes_for_russian_regions_stat(ax, "Статистика неделя к неделе")

    ax = pyplot.subplot(2,2,4)

    ax.bar(index_monthly, confirmed_monthly, label = 'Заболевшие', width = 2)
    ax.bar(index_monthly + one_day, recovered_monthly, label = 'Выздоровевшие', width = 2)
    ax.bar(index_monthly + one_day * 2, deaths_monthly, label = 'Смерти', width = 2)


    plot.setup_axes_for_russian_regions_stat(ax, "Статистика месяц к месяцу")

    pyplot.suptitle(name)
    pyplot.show()

regions = ["Санкт-Петербург", "Краснодарский край", "Ростовская обл.", "Хабаровский край", "Москва", "Московская обл.", "Татарстан"] #list(utils.get_country_regions("Russia"))

foo(storage.get_country_report("Russia")[60:], "Россия")

for region in regions:
    dfdf = storage.get_region_report("Russia", region)[60:]
    name = region
    foo(dfdf, name)



In [None]:
pyplot.figure(figsize=(18,10))

ax = pyplot.subplot(111)
ax.xaxis_date()

country_name = "Russia"
all_regions = storage.get_country_regions(country_name)
total = pd.DataFrame()

for region in all_regions:
    print(region)
    region_df = storage.get_region_report(country_name, region)[pd.to_datetime("01-04-2020",dayfirst=True):]

    region_df['Confirmed_Change_Normalized'] = helpers.normalize(region_df['Confirmed_Change'])

    ax.bar(region_df.index, region_df['Confirmed_Change_Normalized'].values, alpha = 1/len(all_regions), color = "Blue")
    total[region] = region_df['Confirmed_Change_Normalized']

total["Mean"] = total.mean(axis=1)
total["Median"] = total.iloc[:,:-1].median(axis=1)
ax.plot(total.index,total.Mean, color = "Red")
ax.plot(total.index,total.Median, color = "Yellow")
ax.set_ylim(bottom=0, top = 1)
ax.grid(axis='y', color='black', linestyle='dashed', alpha=0.4)
ax.legend(loc = 'upper left')
pyplot.title("Нормализованное количество заболевших в день")
pyplot.show()


In [None]:
pyplot.figure(figsize=(18,10))

ax = pyplot.subplot(111)
ax.xaxis_date()
russia = storage.get_regions_report_by_column(country_name, "Confirmed_Change",start_date =pd.to_datetime("01-04-2020",dayfirst=True))
russia["Confirmed_Change"] = russia.sum(axis=1)
ax.plot(total.index, helpers.normalize(total.Mean), color = "Red")
ax.plot(russia.index, helpers.normalize(russia.Confirmed_Change), color = "Orange")
ax.set_ylim(bottom=0, top = 1)
ax.grid(axis='y', color='black', linestyle='dashed', alpha=0.4)
#ax.legend(loc = 'upper left')
#utils.draw_key_russian_dates_on_plot(ax)
pyplot.show()