Import modules

In [None]:
import geopandas as gpd
import numpy as np
import imageio
import os
import itertools
import shutil
import argparse

from matplotlib import pyplot, cm, colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
from tqdm import tqdm
from multiprocess import Pool, cpu_count

import utils

In [None]:
parser = argparse.ArgumentParser(description="Creates video animations from COVID daily reports")
parser.add_argument("--minimal", "--min", action='store_true', default=False, help="Create only necessary animations")

args = parser.parse_known_args()


Prepare environment and load data

In [None]:
world_shape_df=gpd.read_file('maps/ne_10m_admin_0_sovereignty.shp')
world_states_shape_df = gpd.read_file('maps/ne_10m_admin_1_states_provinces.shp')

russia_shape_df = None if args[0].minimal else world_states_shape_df[world_states_shape_df.admin == 'Russia']

world_report_df = utils.storage.get_countries_report()
russia_report_df = None if args[0].minimal else utils.storage.get_regions_report('Russia')

if not os.path.exists('./assets'):
    os.mkdir('./assets')
    os.mkdir('./assets/video')
elif not os.path.exists('./assets/video'):
    os.mkdir('./assets/video')
    
if not os.path.exists('./temp'):
    os.mkdir('./temp')

Process data - add more metrics if needed

In [None]:
world_stats = utils.storage.get_countries_stats()

def augment_report(df, country, region = None):
    name = country
    if region:
        name = region

    for column, per in itertools.product(['Confirmed', 'Deaths', 'Confirmed_Change', 'Deaths_Change'],[1,1000,100_000]):
        suffix = 'per_capita'
        if per == 1000:
            suffix = 'per_1k'
        elif per == 100_000:
            suffix = 'per_100k'
            
        df.loc[df.Name == name, column+'_'+suffix] = utils.data.per_value(
            df.loc[df.Name == name, column], 
            country,
            region,
            per=per)
    
    for column in ['Confirmed', 'Deaths', 'Confirmed_Change', 'Deaths_Change']:
        df.loc[df.Name == name, column+'_Norm'] = utils.data.normalize(df.loc[df.Name == name, column])

for country in tqdm(utils.storage.get_countries(), desc='Add new columns to world report'):
    augment_report(world_report_df, country)
    world_report_df.loc[world_report_df.Name == country, 'Continent'] = world_stats.loc[country, 'Continent']

if not args[0].minimal:
    for region in tqdm(utils.storage.get_country_regions('Russia'), desc='Add new columns to russia report'):
        augment_report(russia_report_df, 'Russia', region)

Rename some countries and regions or remove them to join two shape and data dataframes

In [None]:
for data, shape in [
    ('North Macedonia', 'Macedonia'),
    ('Holy See', 'Vatican'),
    ('Cote d\'Ivoire', 'Ivory Coast'),
    ('Congo (Kinshasa)', 'Democratic Republic of the Congo'),
    ('Congo (Brazzaville)', 'Republic of the Congo'),
    ('Bahamas', 'The Bahamas'),
    ('Serbia', 'Republic of Serbia'),
    ('Sao Tome and Principe', 'São Tomé and Principe'),
    ('Tanzania', 'United Republic of Tanzania'),
    ('UK', 'United Kingdom'),
    ('US', 'United States of America')
]:
    world_report_df.loc[world_report_df.Name == data, 'Name'] = shape
    
for to_remove in ['West Bank and Gaza', 'Timor-Leste', 'Hong Kong', 'Macau']:
    world_report_df = world_report_df[world_report_df.Name != to_remove]
    
world_shape_df.loc[world_shape_df['ADMIN'] == 'Baykonur Cosmodrome', 'ADMIN'] = 'Kazakhstan'


if not args[0].minimal:
    russia_shape_df.loc[1442, 'name_ru'] = "Алтайский край"
    russia_shape_df.dropna(subset = ['name_ru'], inplace = True)
    
    for data, shape in [
        ('Крым', 'Автономная Республика Крым'),
        ('Алтай', 'Республика Алтай'),
        ('Еврейская АО', 'Еврейская автономная область'),
        ('Карачаево-Черкессия', 'Карачаево-Черкесия'),
        ('Карелия', 'Республика Карелия'),
        ('Коми', 'Республика Коми'),
        ('Ненецкий АО', 'Ненецкий автономный округ'),
        ('Северная Осетия', 'Республика Северная Осетия-Алания'),
        ('Саха (Якутия)', 'Якутия'),
        ('ХМАО – Югра', 'Ханты-Мансийский автономный округ — Югра'),
        ('Чукотский АО', 'Чукотский автономный округ'),
        ('Ямало-Ненецкий АО', 'Ямало-Ненецкий автономный округ'),
    ]:
        russia_shape_df.loc[russia_shape_df.name_ru == shape, 'name_ru'] = data
        
    russia_shape_df['name_ru'] = russia_shape_df['name_ru'].apply(lambda x: x[:-4] +'.' if x.endswith('область') else x)

Prepare shapes dataframes

In [None]:
world_shape_df = world_shape_df.to_crs('epsg:4326')
europe_shape_df = world_shape_df.loc[world_shape_df.ADMIN.isin(set(world_report_df.loc[world_report_df.Continent=='Europe','Name']))]

if not args[0].minimal:
    russia_shape_df = russia_shape_df.to_crs('epsg:5940')   

Preparations for images rendering:
- Set fin, step variables
- Set pool_size. CAUTION: Big values could lead to out of memory exceptions and to kernel crash
- Set cmap

In [None]:
step = utils.one_day
fin =  utils.last_day
pool_size = max(1, int(cpu_count()*.75))
sea_color = '#CBE8FE'

cmap_general = colors.LinearSegmentedColormap.from_list('test',
 [
     (0,'#00cc00'),
     (0.2,'#28a428'),
     (0.4,'#7ba428'), 
     (0.5,'#ccad00'),
     (0.7,'#e69500'),
     (0.9,'#cc3600'), 
     (1,'#ba1234')
 ])

cmap_deaths = colors.LinearSegmentedColormap.from_list('test',
 [
     (0,'#e9edee'),
     (0.1,'#ffff91'),
     (0.2,'#ff713f'), 
     (0.5,'#a42c2b'),
     (0.7,'#595959'),
     (1,'#000000')
 ])

data_selector = {
    'world' : (
        world_shape_df,
        world_report_df,
        utils.first_day,
        (-180, 180),
        (-90, 90),
        (0.01, 0.1)
    ),
        
    'europe' : (
        europe_shape_df,
        world_report_df,
        utils.str_to_datetime('01-02-2020'),
        (-20, 50),
        (30, 73),
        (-0.05, 0.01)
    ),
        
    'russia' : (
        russia_shape_df, 
        russia_report_df,
        utils.str_to_datetime('15-03-2020'),
        None,
        None,
        (0.01, 0.01)
    )
}

Define helper functions

In [None]:
fig,ax = None, None

def get_fig_axes(cmap, norm):
    fig = pyplot.figure(figsize=(24,14))
    fig.set_tight_layout({"pad":0.1})
        
    ax = fig.add_subplot(1,1,1)
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("bottom", size="1%", pad=0.05)
    fig.colorbar(cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax,cax=cax,orientation='horizontal')
   
    return fig, ax

def draw_top_10(df, name_column, data_column, ax, 
                name_name='Country', data_name = 'Cases',
                as_int = True, x0 = 0.01, y0 = 0.01):
    formatter = (lambda x: '{:d}'.format(int(x))) if as_int else (lambda x: '{:.2f}'.format(float(x)))
    table_data =list(
        df.dropna(subset=[data_column]).sort_values(
            by = data_column,
            ascending = False).head(10)[[name_column, data_column]].apply(
            lambda x: [x[name_column], formatter(x[data_column])], axis=1).values)
    
    count = len(table_data)
    
    if count > 0:
        table = ax.table(table_data,
                         colLabels=[name_name, data_name],
                         rowLabels=list(range(1, count+1)),
                         colWidths=[2, 1],
                         cellColours=np.reshape(np.repeat(sea_color, 2*count), (count, 2)),
                         rowColours = np.repeat(sea_color, count),
                         colColours = np.repeat(sea_color, 2),
                         bbox=[x0, y0, .2, .027*(count+1)])
        
        table.auto_set_font_size(False)
        table.set_fontsize(10)    

def process_day (day,
                 shape_df,
                 report_df,
                 column_name,
                 vmin,
                 vmax,
                 cmap,
                 norm,
                 annotation_text,
                 folder_name,
                 frame_title = None,
                 ax_xlim = (-180, 180),
                 ax_ylim = (-90,90),
                 annotation_table_column_name = 'Country',
                 annotation_table_column_data = 'Cases',
                 annotation_table_data_as_int = True,
                 annotation_table_x0 = 0.01,
                 annotation_table_y0 = 0.01,
                 shape_df_index = 'ADMIN', 
                 report_df_index = 'Name'):
    global fig, ax
    if not fig:
        fig,ax = get_fig_axes(cmap, norm)
        
    if ax_xlim:
        ax.set_xlim(ax_xlim[0],ax_xlim[1])
        
    if ax_ylim:
        ax.set_ylim(ax_ylim[0],ax_ylim[1])
        
    ax.set_axis_off()
    
    if frame_title:
        fig.suptitle(frame_title, fontsize = 36)
        
    temp_df = shape_df.set_index(shape_df_index).join(report_df[report_df.Date == day].set_index(report_df_index)[column_name]).reset_index()
        
    temp_df.plot(ax=ax, color='white', edgecolor='black', linewidth=1)
    temp_df.plot(ax=ax, column=column_name, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap, norm=norm, alpha=0.9)
    
    ax.annotate(annotation_text, xy=(20,950), xycoords='figure pixels', fontsize=24)
    draw_top_10(temp_df, 
                shape_df_index,
                column_name, 
                ax, 
                name_name = annotation_table_column_name,
                data_name = annotation_table_column_data,
                as_int = annotation_table_data_as_int,
                x0 = annotation_table_x0,
                y0 = annotation_table_y0)
    
    fig.savefig(f'./temp/{folder_name}/{day.date().strftime("%Y-%m-%d")}.jpg', dpi=72, facecolor=sea_color)    
    ax.clear()


def make_video(name, clean_data = True):
    images = list(os.listdir(f'./temp/{name}'))

    with imageio.get_writer(f'./assets/video/{name}.mp4', mode='I', fps=6) as writer:
        for i in range(len(images)):
            image = imageio.imread(os.path.join(os.path.abspath(f'./temp/{name}'),images[i]))
            writer.append_data(image)
            
    if clean_data:
        shutil.rmtree(f'./temp/{name}')
            

def generate_frames(shape, column, suffix, idx, total):    
    shape_df, report_df, start, ax_xlim, ax_ylim, table_offset = data_selector[shape]
    folder_name = (f'{shape}_{column}_{suffix}' if suffix else f'{shape}_{column}').lower()
    column_name = column
    as_int = False

    if suffix == '100k':
        table_column_data = 'Cases per 100k'
        column_name = column_name + '_per_100k'
    elif suffix == 'Norm':
        table_column_data = 'Fraction from max'
        column_name = column_name + '_Norm'
    else:
        table_column_data = 'Cases'
        as_int = True

    if shape == 'russia':
        table_column_name = 'Regions'
        shape_index = 'name_ru'
    else:
        table_column_name = 'Countries'
        shape_index = 'ADMIN'
        
    frame_title = f'{table_column_name} {column.lower()} {table_column_data.lower()}'

    if (suffix == 'Norm'):
        vmax = 1
    elif (column == 'Confirmed_Change' or column == 'Deaths_Change'):
        max_values = list()
        
        for name in set(report_df.Name):
            max_values.append(report_df.loc[report_df.Name == name, column_name].max())
            
        vmax = numpy.quantile(max_values, .95)
    else:
        vmax = report_df.loc[report_df.Date == fin, column_name].quantile(.95)

    if (shape == 'world' and column == 'Confirmed' and suffix == ''):
        vmin = 1
        norm = colors.LogNorm(vmin, vmax)
    else:
        vmin = 0
        norm = colors.Normalize(vmin, vmax)
        
    cmap = cmap_deaths if column == 'Deaths' or column == 'Deaths_Change' else cmap_general

    if not os.path.exists(f'./temp/{folder_name}'):
        os.mkdir(f'./temp/{folder_name}')

    with Pool(pool_size) as frames_pool:
        frames = list()

        for i in range ((fin - start).days):
            day = start + step*i
            annotation_text = day.date().strftime('%Y-%m-%d')

            frames.append(frames_pool.apply_async(
                process_day,
                [
                    day,
                    shape_df,
                    report_df,
                    column_name,
                    vmin,
                    vmax,
                    cmap,
                    norm,
                    annotation_text,
                    folder_name,
                    frame_title,
                    ax_xlim,
                    ax_ylim,
                    table_column_name,
                    table_column_data,
                    as_int,
                    table_offset[0],
                    table_offset[1],
                    shape_index
                ]))

        for frame in tqdm(frames, desc=f'({idx}/{total}) Frames for {shape} - {column_name}'): 
            frame.wait()
        
    return folder_name

Make videos

In [None]:
if __name__ == '__main__':
    shapes = ['world', 'europe'] if args[0].minimal else ['world', 'europe', 'russia']
    columns = ['Confirmed', 'Deaths'] if args[0].minimal else ['Confirmed', 'Deaths', 'Confirmed_Change', 'Deaths_Change']
    suffixes = ['', '100k'] if args[0].minimal else ['', '100k', 'Norm']
    all_variants = list(itertools.product(shapes, columns, suffixes))

    with Pool(1) as videos_pool:
        videos = list()
        idx = 1
    
        for shape, column, suffix in all_variants:
            folder = generate_frames(shape, column, suffix, idx, len(all_variants))
            videos.append(videos_pool.apply_async(make_video, [folder]))
            idx = idx + 1
        
        for video in tqdm(videos, desc="Videos rendered"): 
            video.wait()