# Excess mortality in Russia during the covid19 pandemics
## Analysis and figures for the Significance-2021 paper

In [1]:
%matplotlib notebook

import numpy as np
import pandas as pd
import pylab as plt
import seaborn as sns
import matplotlib

from matplotlib.patches import Polygon

In [2]:
months_eng = ['January', 'February', 'March', 'April', 'May', 'June',
              'July', 'August', 'September', 'October', 'November', 'December']

months_eng_short = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

months_rus = ['–Ø–Ω–≤–∞—Ä—å', '–§–µ–≤—Ä–∞–ª—å', '–ú–∞—Ä—Ç', '–ê–ø—Ä–µ–ª—å', '–ú–∞–π', '–ò—é–Ω—å',
              '–ò—é–ª—å', '–ê–≤–≥—É—Å—Ç', '–°–µ–Ω—Ç—è–±—Ä—å', '–û–∫—Ç—è–±—Ä—å', '–ù–æ—è–±—Ä—å', '–î–µ–∫–∞–±—Ä—å']

months_rus_short = ['—è–Ω–≤', '—Ñ–µ–≤', '–º–∞—Ä', '–∞–ø—Ä', '–º–∞–π', '–∏—é–Ω',
                    '–∏—é–ª', '–∞–≤–≥', '—Å–µ–Ω', '–æ–∫—Ç', '–Ω–æ—è', '–¥–µ–∫']

## Names, locations, etc. of Russian regions

In [3]:
def rename(regions):
    regions[regions=='H–∏–∂–µ–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'] = '–ù–∏–∂–µ–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'

    # in EMISS files
    regions[regions=='–ì–æ—Ä–æ–¥ –ú–æ—Å–∫–≤–∞ —Å—Ç–æ–ª–∏—Ü–∞ –†–æ—Å—Å–∏–π—Å–∫–æ–π –§–µ–¥–µ—Ä–∞—Ü–∏–∏ –≥–æ—Ä–æ–¥ —Ñ–µ–¥–µ—Ä–∞–ª—å–Ω–æ–≥–æ –∑–Ω–∞—á–µ–Ω–∏—è'] = '–ú–æ—Å–∫–≤–∞'
    regions[regions=='–ì–æ—Ä–æ–¥ –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥ –≥–æ—Ä–æ–¥ —Ñ–µ–¥–µ—Ä–∞–ª—å–Ω–æ–≥–æ –∑–Ω–∞—á–µ–Ω–∏—è'] = '–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥'
    regions[regions=='–ì–æ—Ä–æ–¥ —Ñ–µ–¥–µ—Ä–∞–ª—å–Ω–æ–≥–æ –∑–Ω–∞—á–µ–Ω–∏—è –°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å'] = '–°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å'
    regions[regions=='–ù–µ–Ω–µ—Ü–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥ (–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å)'] = '–ù–µ–Ω–µ—Ü–∫–∏–π –ê–û'
    regions[regions=='–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å (–∫—Ä–æ–º–µ –ù–µ–Ω–µ—Ü–∫–æ–≥–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–≥–æ –æ–∫—Ä—É–≥–∞)'] = '–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û'
    regions[regions=='–•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥ - –Æ–≥—Ä–∞ (–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å)'] = '–•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–∏–π –ê–û'
    regions[regions=='–Ø–º–∞–ª–æ-–ù–µ–Ω–µ—Ü–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥ (–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å)'] = '–Ø–º–∞–ª–æ-H–µ–Ω–µ—Ü–∫–∏–π –ê–û'
    regions[regions=='–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å (–∫—Ä–æ–º–µ –•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–æ–≥–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–≥–æ –æ–∫—Ä—É–≥–∞-–Æ–≥—Ä—ã –∏ –Ø–º–∞–ª–æ-–ù–µ–Ω–µ—Ü–∫–æ–≥–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–≥–æ –æ–∫—Ä—É–≥–∞)'] = '–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û'
    regions[regions=='–ß—É–∫–æ—Ç—Å–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥'] = '–ß—É–∫–æ—Ç—Å–∫–∏–π –ê–û'
    regions[regions=='–ï–≤—Ä–µ–π—Å–∫–∞—è –∞–≤—Ç–æ–Ω–æ–º–Ω–∞—è –æ–±–ª–∞—Å—Ç—å'] = '–ï–≤—Ä–µ–π—Å–∫–∞—è –ê–û'
    regions[regions=='–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ê–¥—ã–≥–µ—è (–ê–¥—ã–≥–µ—è)'] = '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ê–¥—ã–≥–µ—è'
    regions[regions=='–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω (–¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω)'] = '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω' 
    regions[regions=='–ß—É–≤–∞—à—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞ - –ß—É–≤–∞—à–∏—è'] = '–ß—É–≤–∞—à—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞'
    regions[regions=='–†–µ—Å–ø—É–±–ª–∏–∫–∞ –°–µ–≤–µ—Ä–Ω–∞—è –û—Å–µ—Ç–∏—è-–ê–ª–∞–Ω–∏—è'] = '–°–µ–≤–µ—Ä–Ω–∞—è –û—Å–µ—Ç–∏—è'
    regions[regions=='–†–µ—Å–ø—É–±–ª–∏–∫–∞ –°–∞—Ö–∞ (–Ø–∫—É—Ç–∏—è)'] = '–Ø–∫—É—Ç–∏—è'
    regions[regions=='–ö–∞–±–∞—Ä–¥–∏–Ω–æ-–ë–∞–ª–∫–∞—Ä—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞'] = '–ö–∞–±–∞—Ä–¥–∏–Ω–æ-–ë–∞–ª–∫–∞—Ä–∏—è'
    regions[regions=='–ö–∞—Ä–∞—á–∞–µ–≤–æ-–ß–µ—Ä–∫–µ—Å—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞'] = '–ö–∞—Ä–∞—á–∞–µ–≤–æ-–ß–µ—Ä–∫–µ—Å–∏—è'
    regions[regions=='–ö–µ–º–µ—Ä–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å - –ö—É–∑–±–∞—Å—Å'] = '–ö–µ–º–µ—Ä–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'
    
    # in addition in Rosstat files
    regions[regions=='–≥.–ú–æ—Å–∫–≤–∞'] = '–ú–æ—Å–∫–≤–∞'
    regions[regions=='–≥.–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥'] = '–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥'
    regions[regions=='–≥.–°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å'] = '–°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å'
    regions[regions=='H–µ–Ω–µ—Ü–∫–∏–π –∞–≤—Ç.–æ–∫—Ä—É–≥'] = '–ù–µ–Ω–µ—Ü–∫–∏–π –ê–û'
    regions[regions=='–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –∞–≤—Ç–æ–Ω–æ–º–∏–∏'] = '–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û'
    regions[regions=='–•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–∏–π –∞–≤—Ç.–æ–∫—Ä—É–≥-–Æ–≥—Ä–∞'] = '–•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–∏–π –ê–û'
    regions[regions=='–Ø–º–∞–ª–æ-H–µ–Ω–µ—Ü–∫–∏–π –∞–≤—Ç.–æ–∫—Ä—É–≥'] = '–Ø–º–∞–ª–æ-H–µ–Ω–µ—Ü–∫–∏–π –ê–û'
    regions[regions=='–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –∞–≤—Ç–æ–Ω–æ–º–∏–∏'] = '–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û'
    regions[regions=='–ß—É–∫–æ—Ç—Å–∫–∏–π –∞–≤—Ç.–æ–∫—Ä—É–≥'] = '–ß—É–∫–æ—Ç—Å–∫–∏–π –ê–û'
    regions[regions=='–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω(–¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω)'] = '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω'
    regions[regions=='–ß—É–≤–∞—à—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞(–ß—É–≤–∞—à–∏—è)'] = '–ß—É–≤–∞—à—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞'
    regions[regions=='–†–µ—Å–ø—É–±–ª–∏–∫–∞ –°–µ–≤–µ—Ä–Ω–∞—è –û—Å–µ—Ç–∏—è- –ê–ª–∞–Ω–∏—è'] = '–°–µ–≤–µ—Ä–Ω–∞—è –û—Å–µ—Ç–∏—è'
    
    # in addition in Stopcoronavirus files
    regions[regions=='–ù–µ–Ω–µ—Ü–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥'] = '–ù–µ–Ω–µ—Ü–∫–∏–π –ê–û'
    regions[regions=='–†–µ—Å–ø—É–±–ª–∏–∫–∞ –°–µ–≤–µ—Ä–Ω–∞—è –û—Å–µ—Ç–∏—è ‚Äî –ê–ª–∞–Ω–∏—è'] = '–°–µ–≤–µ—Ä–Ω–∞—è –û—Å–µ—Ç–∏—è'
    regions[regions=='–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ß—É–≤–∞—à–∏—è'] = '–ß—É–≤–∞—à—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞'
    regions[regions=='–ß—É–∫–æ—Ç—Å–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥'] = '–ß—É–∫–æ—Ç—Å–∫–∏–π –ê–û'
    regions[regions=='–Ø–º–∞–ª–æ-–ù–µ–Ω–µ—Ü–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥'] = '–Ø–º–∞–ª–æ-H–µ–Ω–µ—Ü–∫–∏–π –ê–û'
    if '–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û' not in regions:
        regions[regions=='–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'] = '–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û'
        regions[regions=='–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'] = '–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û'
        
    return regions

In [4]:
regions_table = {'–ê–ª—Ç–∞–π—Å–∫–∏–π –∫—Ä–∞–π':[3,12], '–ê–º—É—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,15],
       '–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û':[1,8], '–ê—Å—Ç—Ä–∞—Ö–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[7,7],
       '–ë–µ–ª–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[6,4], '–ë—Ä—è–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[4,2], '–í–ª–∞–¥–∏–º–∏—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[3,5],
       '–í–æ–ª–≥–æ–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[6,6], '–í–æ–ª–æ–≥–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[1,5],
       '–í–æ—Ä–æ–Ω–µ–∂—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[6,5], '–ï–≤—Ä–µ–π—Å–∫–∞—è –ê–û':[3,15], '–ó–∞–±–∞–π–∫–∞–ª—å—Å–∫–∏–π –∫—Ä–∞–π':[1,15],
       '–ò–≤–∞–Ω–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,5], '–ò—Ä–∫—É—Ç—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[1,14], '–ö–∞–±–∞—Ä–¥–∏–Ω–æ-–ë–∞–ª–∫–∞—Ä–∏—è':[9,4],
       '–ö–∞–ª–∏–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[1,0], '–ö–∞–ª—É–∂—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[3,2], '–ö–∞–º—á–∞—Ç—Å–∫–∏–π –∫—Ä–∞–π':[1,17],
       '–ö–∞—Ä–∞—á–∞–µ–≤–æ-–ß–µ—Ä–∫–µ—Å–∏—è':[8,4], '–ö–µ–º–µ—Ä–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[1,13], '–ö–∏—Ä–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,8],
       '–ö–æ—Å—Ç—Ä–æ–º—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,6], '–ö—Ä–∞—Å–Ω–æ–¥–∞—Ä—Å–∫–∏–π –∫—Ä–∞–π':[7,4], '–ö—Ä–∞—Å–Ω–æ—è—Ä—Å–∫–∏–π –∫—Ä–∞–π':[0,13],
       '–ö—É—Ä–≥–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,11], '–ö—É—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[5,3], '–õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[1,3],
       '–õ–∏–ø–µ—Ü–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[5,4], '–ú–∞–≥–∞–¥–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[1,16], '–ú–æ—Å–∫–≤–∞':[3,3],
       '–ú–æ—Å–∫–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[3,4], '–ú—É—Ä–º–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[0,4], '–ù–µ–Ω–µ—Ü–∫–∏–π –ê–û':[0,9],
       '–ù–∏–∂–µ–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[3,6], '–ù–æ–≤–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[1,4],
       '–ù–æ–≤–æ—Å–∏–±–∏—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,12], '–û–º—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[3,11], '–û—Ä–µ–Ω–±—É—Ä–≥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[5,8],
       '–û—Ä–ª–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[4,3], '–ü–µ–Ω–∑–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[5,6], '–ü–µ—Ä–º—Å–∫–∏–π –∫—Ä–∞–π':[2,9],
       '–ü—Ä–∏–º–æ—Ä—Å–∫–∏–π –∫—Ä–∞–π':[3,16], '–ü—Å–∫–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,2], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ê–¥—ã–≥–µ—è':[8,3],
       '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ê–ª—Ç–∞–π':[4,12], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ë–∞—à–∫–æ—Ä—Ç–æ—Å—Ç–∞–Ω':[4,9],
       '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ë—É—Ä—è—Ç–∏—è':[2,14], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –î–∞–≥–µ—Å—Ç–∞–Ω':[8,7],
       '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ò–Ω–≥—É—à–µ—Ç–∏—è':[9,6], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö–∞–ª–º—ã–∫–∏—è':[7,6],
       '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö–∞—Ä–µ–ª–∏—è':[0,3], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö–æ–º–∏':[1,9], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö—Ä—ã–º':[7,3],
       '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ú–∞—Ä–∏–π –≠–ª':[2,7], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ú–æ—Ä–¥–æ–≤–∏—è':[4,6],
       '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω':[3,8], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢—ã–≤–∞':[3,13], '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –•–∞–∫–∞—Å–∏—è':[2,13],
       '–†–æ—Å—Ç–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[7,5], '–†—è–∑–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[4,5], '–°–∞–º–∞—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[4,8],
       '–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥':[1,2], '–°–∞—Ä–∞—Ç–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[5,7], '–°–∞—Ö–∞–ª–∏–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[3,17],
       '–°–≤–µ—Ä–¥–ª–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,10], '–°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å':[7,2], '–°–µ–≤–µ—Ä–Ω–∞—è –û—Å–µ—Ç–∏—è':[9,5],
       '–°–º–æ–ª–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[3,1], '–°—Ç–∞–≤—Ä–æ–ø–æ–ª—å—Å–∫–∏–π –∫—Ä–∞–π':[8,5], '–¢–∞–º–±–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[5,5],
       '–¢–≤–µ—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,3], '–¢–æ–º—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[1,12], '–¢—É–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[4,4],
       '–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û':[1,11], '–£–¥–º—É—Ä—Ç—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞':[3,9],
       '–£–ª—å—è–Ω–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[4,7], '–•–∞–±–∞—Ä–æ–≤—Å–∫–∏–π –∫—Ä–∞–π':[2,16], '–•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–∏–π –ê–û':[1,10],
       '–ß–µ–ª—è–±–∏–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[3,10], '–ß–µ—á–µ–Ω—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞':[8,6],
       '–ß—É–≤–∞—à—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞':[3,7], '–ß—É–∫–æ—Ç—Å–∫–∏–π –ê–û':[0,17], '–Ø–∫—É—Ç–∏—è':[3,14],
       '–Ø–º–∞–ª–æ-H–µ–Ω–µ—Ü–∫–∏–π –ê–û':[0,10], '–Ø—Ä–æ—Å–ª–∞–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':[2,4]}

regions_short = ['–†–æ—Å—Å–∏–π—Å–∫–∞—è –§–µ–¥–µ—Ä–∞—Ü–∏—è', '–ë–µ–ª–≥–æ—Ä–æ–¥—Å–∫–∞—è', '–ë—Ä—è–Ω—Å–∫–∞—è',
       '–í–ª–∞–¥–∏–º–∏—Ä—Å–∫–∞—è', '–í–æ—Ä–æ–Ω–µ–∂—Å–∫–∞—è',
       '–ò–≤–∞–Ω–æ–≤—Å–∫–∞—è', '–ö–∞–ª—É–∂—Å–∫–∞—è', '–ö–æ—Å—Ç—Ä–æ–º—Å–∫–∞—è',
       '–ö—É—Ä—Å–∫–∞—è', '–õ–∏–ø–µ—Ü–∫–∞—è', '–ú–æ—Å–∫–æ–≤—Å–∫–∞—è',
       '–û—Ä–ª–æ–≤—Å–∫–∞—è', '–†—è–∑–∞–Ω—Å–∫–∞—è', '–°–º–æ–ª–µ–Ω—Å–∫–∞—è',
       '–¢–∞–º–±–æ–≤—Å–∫–∞—è', '–¢–≤–µ—Ä—Å–∫–∞—è', '–¢—É–ª—å—Å–∫–∞—è',
       '–Ø—Ä–æ—Å–ª–∞–≤—Å–∫–∞—è', '–ú–æ—Å–∫–≤–∞', '–ö–∞—Ä–µ–ª–∏—è',
       '–ö–æ–º–∏', '–ù–µ–Ω–µ—Ü–∫–∏–π –ê–û', '–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è',
       '–í–æ–ª–æ–≥–æ–¥—Å–∫–∞—è', '–ö–∞–ª–∏–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è',
       '–õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è', '–ú—É—Ä–º–∞–Ω—Å–∫–∞—è',
       '–ù–æ–≤–≥–æ—Ä–æ–¥—Å–∫–∞—è', '–ü—Å–∫–æ–≤—Å–∫–∞—è', '–ü–µ—Ç–µ—Ä–±—É—Ä–≥',
       '–ê–¥—ã–≥–µ—è', '–ö–∞–ª–º—ã–∫–∏—è', '–ö—Ä—ã–º',
       '–ö—Ä–∞—Å–Ω–æ–¥–∞—Ä—Å–∫–∏–π', '–ê—Å—Ç—Ä–∞—Ö–∞–Ω—Å–∫–∞—è',
       '–í–æ–ª–≥–æ–≥—Ä–∞–¥—Å–∫–∞—è', '–†–æ—Å—Ç–æ–≤—Å–∫–∞—è', '–°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å',
       '–î–∞–≥–µ—Å—Ç–∞–Ω', '–ò–Ω–≥—É—à–µ—Ç–∏—è',
       '–ö–∞–±-–ë–∞–ª–∫–∞—Ä–∏—è', '–ö–∞—Ä-–ß–µ—Ä–∫–µ—Å–∏—è', '–°–µ–≤ –û—Å–µ—Ç–∏—è',
       '–ß–µ—á–µ–Ω—Å–∫–∞—è', '–°—Ç–∞–≤—Ä–æ–ø–æ–ª—å—Å–∫–∏–π',
       '–ë–∞—à–∫–æ—Ä—Ç–æ—Å—Ç–∞–Ω', '–ú–∞—Ä–∏–π –≠–ª',
       '–ú–æ—Ä–¥–æ–≤–∏—è', '–¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω',
       '–£–¥–º—É—Ä—Ç–∏—è', '–ß—É–≤–∞—à–∏—è', '–ü–µ—Ä–º—Å–∫–∏–π',
       '–ö–∏—Ä–æ–≤—Å–∫–∞—è', '–ù–∏–∂–µ–≥–æ—Ä–æ–¥—Å–∫–∞—è',
       '–û—Ä–µ–Ω–±—É—Ä–≥—Å–∫–∞—è', '–ü–µ–Ω–∑–µ–Ω—Å–∫–∞—è', '–°–∞–º–∞—Ä—Å–∫–∞—è',
       '–°–∞—Ä–∞—Ç–æ–≤—Å–∫–∞—è', '–£–ª—å—è–Ω–æ–≤—Å–∫–∞—è', '–ö—É—Ä–≥–∞–Ω—Å–∫–∞—è',
       '–°–≤–µ—Ä–¥–ª–æ–≤—Å–∫–∞—è', '–•–∞–Ω—Ç—ã-–ú–∞–Ω—Å –ê–û', '–Ø–º–∞–ª–æ-H–µ–Ω –ê–û',
       '–¢—é–º–µ–Ω—Å–∫–∞—è', '–ß–µ–ª—è–±–∏–Ω—Å–∫–∞—è',
       '–ê–ª—Ç–∞–π', '–¢—ã–≤–∞', '–•–∞–∫–∞—Å–∏—è',
       '–ê–ª—Ç–∞–π—Å–∫–∏–π', '–ö—Ä–∞—Å–Ω–æ—è—Ä—Å–∫–∏–π', '–ò—Ä–∫—É—Ç—Å–∫–∞—è',
       '–ö–µ–º–µ—Ä–æ–≤—Å–∫–∞—è', '–ù–æ–≤–æ—Å–∏–±–∏—Ä—Å–∫–∞—è', '–û–º—Å–∫–∞—è',
       '–¢–æ–º—Å–∫–∞—è', '–ë—É—Ä—è—Ç–∏—è', '–Ø–∫—É—Ç–∏—è',
       '–ó–∞–±–∞–π–∫–∞–ª—å—Å–∫–∏–π', '–ö–∞–º—á–∞—Ç—Å–∫–∏–π', '–ü—Ä–∏–º–æ—Ä—Å–∫–∏–π',
       '–•–∞–±–∞—Ä–æ–≤—Å–∫–∏–π', '–ê–º—É—Ä—Å–∫–∞—è', '–ú–∞–≥–∞–¥–∞–Ω—Å–∫–∞—è',
       '–°–∞—Ö–∞–ª–∏–Ω—Å–∫–∞—è', '–ï–≤—Ä–µ–π—Å–∫–∞—è –ê–û', '–ß—É–∫–æ—Ç—Å–∫–∏–π –ê–û']

In [5]:
coordinates={}
coordinates['–ë–µ–ª–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'               ] = [145,575]
coordinates['–ë—Ä—è–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                   ] = [140,510]
coordinates['–í–ª–∞–¥–∏–º–∏—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'               ] = [245,530]
coordinates['–í–æ—Ä–æ–Ω–µ–∂—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [170,590]
coordinates['–ò–≤–∞–Ω–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                 ] = [265,515]
coordinates['–ö–∞–ª—É–∂—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                  ] = [170,505]
coordinates['–ö–æ—Å—Ç—Ä–æ–º—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [300,500]
coordinates['–ö—É—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                    ] = [140,545]
coordinates['–õ–∏–ø–µ—Ü–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                   ] = [180,560]
coordinates['–ú–æ—Å–∫–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                 ] = [214,500]
coordinates['–û—Ä–ª–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                  ] = [165,535]
coordinates['–†—è–∑–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                  ] = [220,550]
coordinates['–°–º–æ–ª–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                 ] = [160,475]
coordinates['–¢–∞–º–±–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                 ] = [210,580]
coordinates['–¢–≤–µ—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                   ] = [190,455]
coordinates['–¢—É–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                   ] = [185,530]
coordinates['–Ø—Ä–æ—Å–ª–∞–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [260,480]
coordinates['–ú–æ—Å–∫–≤–∞'                             ] = [60, 460]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö–∞—Ä–µ–ª–∏—è'                 ] = [290,360]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö–æ–º–∏'                    ] = [440,470]
coordinates['–ù–µ–Ω–µ—Ü–∫–∏–π –ê–û'                        ] = [480,400]
coordinates['–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û'       ] = [340,420]
coordinates['–í–æ–ª–æ–≥–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [290,460]
coordinates['–ö–∞–ª–∏–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'            ] = [80,380]
coordinates['–õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'              ] = [240,410]
coordinates['–ú—É—Ä–º–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                 ] = [360,310]
coordinates['–ù–æ–≤–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'               ] = [210,425]
coordinates['–ü—Å–∫–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                  ] = [170,410]
coordinates['–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥'                    ] = [200,370]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ê–¥—ã–≥–µ—è'                  ] = [90,690]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö–∞–ª–º—ã–∫–∏—è'                ] = [170,705]
coordinates['–ö—Ä–∞—Å–Ω–æ–¥–∞—Ä—Å–∫–∏–π –∫—Ä–∞–π'                 ] = [92,660]
coordinates['–ê—Å—Ç—Ä–∞—Ö–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'               ] = [205,710]
coordinates['–í–æ–ª–≥–æ–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'              ] = [195,645]
coordinates['–†–æ—Å—Ç–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                 ] = [140,650]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –î–∞–≥–µ—Å—Ç–∞–Ω'                ] = [146,792]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ò–Ω–≥—É—à–µ—Ç–∏—è'               ] = [127,760]
coordinates['–ö–∞–±–∞—Ä–¥–∏–Ω–æ-–ë–∞–ª–∫–∞—Ä–∏—è'                 ] = [110,735]
coordinates['–ö–∞—Ä–∞—á–∞–µ–≤–æ-–ß–µ—Ä–∫–µ—Å–∏—è'                 ] = [95,720]
coordinates['–°–µ–≤–µ—Ä–Ω–∞—è –û—Å–µ—Ç–∏—è'                    ] = [115,750]
coordinates['–ß–µ—á–µ–Ω—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞'               ] = [140,760]
coordinates['–°—Ç–∞–≤—Ä–æ–ø–æ–ª—å—Å–∫–∏–π –∫—Ä–∞–π'                ] = [125,710]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ë–∞—à–∫–æ—Ä—Ç–æ—Å—Ç–∞–Ω'            ] = [365,650]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ú–∞—Ä–∏–π –≠–ª'                ] = [320,560]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ú–æ—Ä–¥–æ–≤–∏—è'                ] = [250,570]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω'               ] = [320,600]
coordinates['–£–¥–º—É—Ä—Ç—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞'              ] = [360,580]
coordinates['–ß—É–≤–∞—à—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞'               ] = [290,575]
coordinates['–ü–µ—Ä–º—Å–∫–∏–π –∫—Ä–∞–π'                      ] = [410,560]
coordinates['–ö–∏—Ä–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                  ] = [350,540]
coordinates['–ù–∏–∂–µ–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'              ] = [270,550]
coordinates['–û—Ä–µ–Ω–±—É—Ä–≥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'               ] = [320,660]
coordinates['–ü–µ–Ω–∑–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                 ] = [250,600]
coordinates['–°–∞–º–∞—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                  ] = [300,630]
coordinates['–°–∞—Ä–∞—Ç–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [250,640]
coordinates['–£–ª—å—è–Ω–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [275,600]
coordinates['–ö—É—Ä–≥–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                 ] = [460,660]
coordinates['–°–≤–µ—Ä–¥–ª–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'               ] = [460,600]
coordinates['–•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–∏–π –ê–û'                ] = [550,550]
coordinates['–Ø–º–∞–ª–æ-H–µ–Ω–µ—Ü–∫–∏–π –ê–û'                  ] = [630,510]
coordinates['–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å –±–µ–∑ –ê–û'           ] = [510,640]
coordinates['–ß–µ–ª—è–±–∏–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [420,660]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ê–ª—Ç–∞–π'                   ] = [700,800]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ë—É—Ä—è—Ç–∏—è'                 ] = [980,720]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢—ã–≤–∞'                    ] = [810,790]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –•–∞–∫–∞—Å–∏—è'                 ] = [750,755]
coordinates['–ê–ª—Ç–∞–π—Å–∫–∏–π –∫—Ä–∞–π'                     ] = [650,770]
coordinates['–ó–∞–±–∞–π–∫–∞–ª—å—Å–∫–∏–π –∫—Ä–∞–π'                 ] = [1060,710]
coordinates['–ö—Ä–∞—Å–Ω–æ—è—Ä—Å–∫–∏–π –∫—Ä–∞–π'                  ] = [790,520]
coordinates['–ò—Ä–∫—É—Ç—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                  ] = [900,670]
coordinates['–ö–µ–º–µ—Ä–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [710,725]
coordinates['–ù–æ–≤–æ—Å–∏–±–∏—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'              ] = [620,710]
coordinates['–û–º—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                     ] = [560,680]
coordinates['–¢–æ–º—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                    ] = [660,650]
coordinates['–Ø–∫—É—Ç–∏—è'                             ] = [1030,430]
coordinates['–ö–∞–º—á–∞—Ç—Å–∫–∏–π –∫—Ä–∞–π'                    ] = [1390,360]
coordinates['–ü—Ä–∏–º–æ—Ä—Å–∫–∏–π –∫—Ä–∞–π'                    ] = [1340,730]
coordinates['–•–∞–±–∞—Ä–æ–≤—Å–∫–∏–π –∫—Ä–∞–π'                   ] = [1200,510]
coordinates['–ê–º—É—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                   ] = [1160,630]
coordinates['–ú–∞–≥–∞–¥–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [1250,320]
coordinates['–°–∞—Ö–∞–ª–∏–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'                ] = [1355,575]
coordinates['–ï–≤—Ä–µ–π—Å–∫–∞—è –ê–û'                       ] = [1265,690]
coordinates['–ß—É–∫–æ—Ç—Å–∫–∏–π –ê–û'                       ] = [1260,150]
coordinates['–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö—Ä—ã–º'                    ] = [35,620]
coordinates['–°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å'                        ] = [20,625]

In [6]:
regionsToEnglish = {'–ú–æ—Å–∫–≤–∞':'Moscow', 
                    '–ú–æ—Å–∫–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Moscow Oblast',
                    '–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥':'St. Petersburg',       
                    '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω':'Tatarstan',
                    '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ë–∞—à–∫–æ—Ä—Ç–æ—Å—Ç–∞–Ω':'Bashkortostan',
                    '–ö—Ä–∞—Å–Ω–æ–¥–∞—Ä—Å–∫–∏–π –∫—Ä–∞–π':'Krasnodar Krai',
                    '–°–≤–µ—Ä–¥–ª–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Sverdlovsk Oblast',
                    '–°–∞–º–∞—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Samara Oblast',
                    '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –î–∞–≥–µ—Å—Ç–∞–Ω':'Dagestan',
                    '–ß–µ–ª—è–±–∏–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Chelyabinsk Oblast',
                    '–ù–∏–∂–µ–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Nizhny Novgorod Oblast',
                    '–†–æ—Å—Ç–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Rostov Oblast',
                    '–í–æ–ª–≥–æ–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Volgograd Oblast',
                    '–õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Leningrad Oblast',
                    '–ü–µ–Ω–∑–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Penza Oblast',
                    '–û—Ä–µ–Ω–±—É—Ä–≥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Orenburg Oblast',
                    '–ù–æ–≤–æ—Å–∏–±–∏—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Novosibirsk Oblast',
                    '–ü–µ—Ä–º—Å–∫–∏–π –∫—Ä–∞–π':'Perm Krai',
                   }

regionsToEnglishShort = {
                    '–ú–æ—Å–∫–≤–∞':'Moscow', 
                    '–ú–æ—Å–∫–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Moscow Oblast',
                    '–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥':'St. Petersburg',       
                    '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω':'Tatarstan',
                    '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ë–∞—à–∫–æ—Ä—Ç–æ—Å—Ç–∞–Ω':'Bashkortostan',
                    '–ö—Ä–∞—Å–Ω–æ–¥–∞—Ä—Å–∫–∏–π –∫—Ä–∞–π':'Krasnodar Krai',
                    '–°–≤–µ—Ä–¥–ª–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Sverdlovsk O.',
                    '–°–∞–º–∞—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Samara O.',
                    '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –î–∞–≥–µ—Å—Ç–∞–Ω':'Dagestan',
                    '–ß–µ–ª—è–±–∏–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Chelyabinsk O.',
                    '–ù–∏–∂–µ–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'N. Novgorod O.',
                    '–†–æ—Å—Ç–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Rostov O.',
                    '–í–æ–ª–≥–æ–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Volgograd O.',
                    '–õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Leningrad O.',
                    '–ü–µ–Ω–∑–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Penza O.',
                    '–û—Ä–µ–Ω–±—É—Ä–≥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Orenburg O.',
                    '–ù–æ–≤–æ—Å–∏–±–∏—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å':'Novosibirsk O.',
                    '–ü–µ—Ä–º—Å–∫–∏–π –∫—Ä–∞–π':'Perm Krai',
                        }

## Load data

Data sources:

* https://www.fedstat.ru/indicator/33556

* https://www.fedstat.ru/indicator/31556

* https://rosstat.gov.ru/storage/mediabank/ldMWepjj/edn11-2020.htm

* https://docs.google.com/spreadsheets/d/1nCxvNcuZGNswsf97mliLikmUIsOrOGZtL-VI7xfN-Zw (scraped by Sergey Shpilkin)

In [7]:
table = pd.read_excel('russian-data/data.xls')

regions = table.values[3:,0]
regions = np.array([r.strip(' *') for r in regions])

deaths = table.values[3:,1:].astype(float)

# fill in fields that are empty for some years for unclear reasons
deaths[regions=='–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å (–∫—Ä–æ–º–µ –ù–µ–Ω–µ—Ü–∫–æ–≥–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–≥–æ –æ–∫—Ä—É–≥–∞)',:] = \
    deaths[regions=='–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å',:] - \
    deaths[regions=='–ù–µ–Ω–µ—Ü–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥ (–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å)',:]

deaths[regions=='–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å (–∫—Ä–æ–º–µ –•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–æ–≥–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–≥–æ –æ–∫—Ä—É–≥–∞-–Æ–≥—Ä—ã –∏ –Ø–º–∞–ª–æ-–ù–µ–Ω–µ—Ü–∫–æ–≥–æ –∞–≤—Ç–æ–Ω–æ–º–Ω–æ–≥–æ –æ–∫—Ä—É–≥–∞)',:] = \
    deaths[regions=='–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å',:] - \
    deaths[regions=='–•–∞–Ω—Ç—ã-–ú–∞–Ω—Å–∏–π—Å–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥ - –Æ–≥—Ä–∞ (–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å)',:] - \
    deaths[regions=='–Ø–º–∞–ª–æ-–ù–µ–Ω–µ—Ü–∫–∏–π –∞–≤—Ç–æ–Ω–æ–º–Ω—ã–π –æ–∫—Ä—É–≥ (–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å)',:]

kickout = np.isnan(deaths[:,-1])
kickout |= np.array(['—Ñ–µ–¥–µ—Ä–∞–ª—å–Ω—ã–π –æ–∫—Ä—É–≥' in r for r in regions])
kickout |= np.isin(regions, ['–ê—Ä—Ö–∞–Ω–≥–µ–ª—å—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å', '–¢—é–º–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å'])

regions = regions[~kickout]
deaths = deaths[~kickout]

regions = rename(regions)

assert(all(deaths[0] == np.nansum(deaths[1:],axis=0)))

In [8]:
deaths_year = np.zeros((regions.size, 15, 12)) * np.nan
for y in range(14):
    deaths_year[:,y,:] = deaths[:, y*12:(y+1)*12]
deaths_year[:,14,:deaths.shape[1]-14*12] = deaths[:, 14*12:]

deaths = deaths_year

In [9]:
# Append last months

d = pd.read_excel('russian-data/edn10_2020.xlsx', sheet_name='t1_1')
r = d.values[:,0][4:]
d = d.values[:,5][4:]
r = np.array([a.strip() for a in r])
r = rename(r)

for i,reg in enumerate(regions):
    deaths[i,-1,9] = d[r==reg][0]
    
d = pd.read_excel('russian-data/edn11_2020.xlsx', sheet_name='t1_1')
r = d.values[:,0][4:]
d = d.values[:,5][4:]
r = np.array([a.strip() for a in r])
r = rename(r)

for i,reg in enumerate(regions):
    deaths[i,-1,10] = d[r==reg][0]

In [10]:
print(deaths[0,-1])

[164075. 143179. 152740. 150873. 172914. 162758. 181479. 157181. 170717.
 205456. 219872.     nan]


In [11]:
covid_deaths = np.zeros((regions.size, 12, 4))

files = ['edn04_2020.xlsx', 'edn05_2020.xlsx', 'edn06_2020.xlsx',
         'edn07_2020.xlsx', 'edn08_2020.xlsx', 'edn09_2020.xlsx',
         'edn10_2020.xlsx', 'edn11_2020.xlsx']
sheets = ['5.1', '5_1', '5_1', 't5_1', '5.1', '5.1', '5.1', 't5_1']
fromrow = [6,5,6,6,6,6,5,5]

for month, (file, sheet, row) in enumerate(zip(files, sheets, fromrow)):
    d = pd.read_excel('russian-data/'+file, sheet_name=sheet)
    r = d.values[:,0][row:-1]
    d = d.values[:,[2,3,5,6]][row:-1,:]
    r = np.array([a.strip() for a in r])
    r = rename(r)
    for i,reg in enumerate(regions):
        covid_deaths[i,3+month,:] = d[r==reg][0]

covid_deaths[np.isnan(covid_deaths)] = 0
    
assert(np.all(covid_deaths[0] == np.sum(covid_deaths[1:,:,:], axis=0)))

print(np.sum(covid_deaths,axis=0), '\n')
print(np.sum(covid_deaths[0],axis=0), '\n')
print(np.sum(covid_deaths[0]))

[[    0.     0.     0.     0.]
 [    0.     0.     0.     0.]
 [    0.     0.     0.     0.]
 [ 2700.   796.   870.  1284.]
 [11852.  3354.  3218.  6914.]
 [11650.  2984.  2968.  7068.]
 [10126.  2042.  2474.  6100.]
 [ 6872.  1164.  2368.  4942.]
 [ 9158.  1718.  2856.  6626.]
 [26154.  4052.  3588. 14872.]
 [39252.  7968.  4356. 19714.]
 [    0.     0.     0.     0.]] 

[58882. 12039. 11349. 33760.] 

116030.0


In [12]:
table = pd.read_excel('russian-data/data_population.xls')

r = table.values[2:,0]
r = np.array([a.strip(' *') for a in r])
r = rename(r)
pop = table.values[2:,2].astype(float)

population = np.zeros(regions.size)
for i,reg in enumerate(regions):
    population[i] = pop[r==reg][0]

In [13]:
table = pd.read_csv('russian-data/StopCoronaRF - died.csv')
off = table.values[:,table.columns=='01.12.2020'].flatten()
r = table.values[:,0]
r = rename(r)

official_deaths = np.zeros(regions.size)
official_deaths_monthly = np.zeros((regions.size, 12))
for i,reg in enumerate(regions):
    if reg != '–†–æ—Å—Å–∏–π—Å–∫–∞—è –§–µ–¥–µ—Ä–∞—Ü–∏—è':
        official_deaths[i] = off[r==reg][0]
        for m in range(12):
            if m==11:
                nextm = 1
                nexty = 2021
            else:
                nextm = m+2
                nexty = 2020
            if f'01.{nextm:02d}.{nexty}' in table.columns and f'01.{m+1:02d}.2020' in table.columns:
                official_deaths_monthly[i,m] = \
                    table.values[:,table.columns==f'01.{nextm:02d}.{nexty}'].flatten()[r==reg][0] - \
                    table.values[:,table.columns==f'01.{m+1:02d}.2020'].flatten()[r==reg][0]
            else:
                official_deaths_monthly[i,m] = np.nan
official_deaths[regions=='–†–æ—Å—Å–∏–π—Å–∫–∞—è –§–µ–¥–µ—Ä–∞—Ü–∏—è'] = np.sum(official_deaths)
official_deaths_monthly[regions=='–†–æ—Å—Å–∏–π—Å–∫–∞—è –§–µ–¥–µ—Ä–∞—Ü–∏—è',:] = np.sum(official_deaths_monthly,axis=0)

print(official_deaths[0])
print(official_deaths_monthly[0])

40464.0
[   nan    nan    nan  1145.  3686.  4681.  4522.  3241.  3592.  7344.
 12229. 17091.]


## Compute excess deaths

In [14]:
# Compared to 2019
deaths_model = deaths[:,-2,:]
excess = deaths[:,-1,:] - deaths_model
total = np.nansum(excess[:,3:], axis=1)
print(total[0])

# Compared to the 2017-19 avearge
deaths_model = np.mean(deaths[:,-4:-1,:], axis=1)
excess = deaths[:,-1,:] - deaths_model
total = np.nansum(excess[:,3:], axis=1)
print(total[0])

# Compared to the 2015-19 average
deaths_model = np.mean(deaths[:,-6:-1,:], axis=1)
excess = deaths[:,-1,:] - deaths_model
total = np.nansum(excess[:,3:], axis=1)
print(total[0])

242598.0
230763.33333333334
212433.4


In [15]:
from scipy import stats

yearly = np.mean(deaths,axis=2)
for r in range(yearly.shape[0]):
    b = stats.linregress(np.arange(2015, 2020), yearly[r,-6:-1])
    yearly[r,-1] = b.intercept + b.slope * 2020
    
monthly = np.nanmedian(deaths[:,:-1,:] - yearly[:,:-1,np.newaxis], axis=1)

deaths_model = monthly + yearly[:,-1][:,np.newaxis]

excess = deaths[:,-1,:] - deaths_model
total = np.nansum(excess[:,3:], axis=1)

ratio = excess / deaths_model
maxratio = np.nanmax(ratio,axis=1)
fullratio = total / np.sum(deaths_model[:,~np.isnan(ratio[0])][:,3:], axis=1)
yearratio = total / np.sum(deaths_model, axis=1)

excess_permln = excess / population[:,np.newaxis] * 1e+6
total_permln  = total / population * 1e+6

print(np.round(excess[0,:]))
print(f'{total[0]:.0f}')
print(' '.join([f'{r*100:.0f}%' for r in ratio[0]]))
print(' '.join([f'{r*100:.0f}%' for r in excess[0]/np.sum(deaths_model[0])]))
print(' '.join([f'{r:.1f}' for r in excess[0]/official_deaths_monthly[0]]))
print(f'{np.sum(total[1:]):.0f}')
print(f'{total_permln[0]:.0f}')
print(f'{fullratio[0]*100:.1f}%')
print(f'{yearratio[0]*100:.1f}%')

print(f'{np.nansum(excess[0,:]):.0f}')

[  511.  1536. -3902.  4771. 21070. 18950. 34728. 14410. 33590. 56120.
 80461.    nan]
264100
0% 1% -2% 3% 14% 13% 24% 10% 24% 38% 58% nan%
0% 0% -0% 0% 1% 1% 2% 1% 2% 3% 5% nan%
nan nan nan 4.2 5.7 4.0 7.7 4.4 9.4 7.6 6.6 nan
266709
1799
22.8%
15.0%
262245


In [16]:
r=0

X = np.concatenate((np.ones((5,1)), np.arange(2015,2020)[:,np.newaxis]), axis=1)
y = yearly[r,-6:-1][:,np.newaxis]
b = np.linalg.inv(X.T @ X) @ X.T @ y
yhat = X @ b
sigma2 = np.sum((y-yhat)**2) / (5-2)
x = np.array([[1], [2020]])
yhat = b.T @ x
print(yhat[0][0])

predictive_var = sigma2 * (1 + x.T @ np.linalg.inv(X.T @ X) @ x)
predictive_std = np.sqrt(predictive_var)[0][0]

print(predictive_std)

a = 3
b = np.where(np.isnan(deaths[r,-1,:]))[0][0]
print(predictive_std*(b-a))
print(predictive_std*(b-a)*np.array([-1.96,1.96]) + total[0])

147164.28328810912
2048.718950118371
16389.75160094697
[231975.77852881 296223.60480452]


## Comparison betwen countries

https://www.nytimes.com/interactive/2020/04/21/world/coronavirus-missing-deaths.html  
https://www.economist.com/graphic-detail/2020/07/15/tracking-covid-19-excess-deaths-across-countries  
https://www.ft.com/content/a2901ce8-5eb7-4633-b89c-cbdf5b386938

Iran:   
https://www.medrxiv.org/content/10.1101/2020.12.07.20245621v1.full

Daily data: https://en.wikipedia.org/wiki/COVID-19_pandemic_by_country_and_territory#Total_cases_and_deaths  
Mortality rate: https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_mortality_rate

In [17]:
# Now is Jan 1, 2021

data = [['United States üá∫üá∏', 349397, 'Ec', 'Dec 12', 375065, 290791, 0, 328.2],
        ['Brazil üáßüá∑',        194976, 'Ec','Nov 21', 184202, 168974, 0, 209.5],
        ['India üáÆüá≥',         148994, '', '', np.nan, np.nan, 0, 1353], 
        ['Mexico üá≤üáΩ',        125807, 'Ec', 'Nov 21', 251805, 101357, 0, 126.2],
        ['Italy üáÆüáπ',          74159, 'Ec', 'Oct 27', 63645, 36963, 0, 60.4],
        ['United Kingdom üá¨üáß', 74125, 'Ec', 'Dec 18', 82146, 53368, 0, 66.7], # COVID DEATHS FROM OURWORLDINDATA
        ['France üá´üá∑',         64632, 'Ec', 'Dec 1', 55186, 52094, 0, 67.0],
        ['Russia üá∑üá∫',         57555, 'DK', 'Nov 30', total[0], official_deaths[0], 0, 146.8],
        ['Iran üáÆüá∑',           55337, 'Ghafari et al.', 'Sep 21', 58900, 24301, 0, 81.9], #7931+19460+40851
        ['Spain üá™üá∏',          50837, 'Ec', 'Dec 29', 69702, 49974, 0, 46.7],
        ['Argentina üá¶üá∑',      43245, '', '', np.nan, np.nan, 0, 44.5],
        ['Colombia üá®üá¥',       43213, 'NYT', 'Oct 4', 27000, 28787, 0, 49.7],
        ['Peru üáµüá™',           37680, 'Ec', 'Dec 31', 86156, 37650, 0, 32.0],
        ['Germany üá©üá™',        34222, 'Ec', 'Nov 24', 17290, 14808, 0, 83.02],
        ['Poland üáµüá±',         28956, 'FT', 'Oct', 16546, 5783, 0, 38.0],
        ['South Africa üáøüá¶',   28469, 'Ec', 'Dec 29', 58315, 27541, 0, 57.8],
        ['Indonesia üáÆüá©',      22329, '', '', np.nan, np.nan, 0, 267.7],
        ['Turkey üáπüá∑',         20881, '', '', np.nan, np.nan, 0, 82],
        ['Belgium üáßüá™',        19528, 'Ec', 'Dec 15', 19836, 18299, 0, 11.5],
        ['Ukraine üá∫üá¶',        18680, 'DK', 'Oct 31', 13168, 7515, 0, 42.0],
        ['Chile üá®üá±',          16608, 'Ec', 'Dec 29', 14075, 16444, 0, 18.7],
        ['Romania üá∑üá¥',        15841, '', '', np.nan, np.nan, 0, 19.4],
        ['Canada üá®üá¶',         15606, '', '', np.nan, np.nan, 0, 37.6],
        ['Ecuador üá™üá®',        14032, 'Ec', 'Sep 30', 34067, 11355, 0, 17.08],
        ['Iraq üáÆüá∂',           12824, '', '', np.nan, np.nan, 0, 38.4],
        ['Czech Republic üá®üáø', 11711, '', '', np.nan, np.nan, 0, 10.7],
        ['Netherlands üá≥üá±',    11432, 'Ec', 'Dec 25', 15988, 11005, 0, 17.3],
        ['Pakistan üáµüá∞',       10176, '', '', np.nan, np.nan, 0, 212.2],
        ['Hungary üá≠üá∫',         9667, '', '', np.nan, np.nan, 0, 9.8],
        ['Philippines üáµüá≠',     9248, '', '', np.nan, np.nan, 0, 106.7],
        ['Bolivia üáßüá¥',         9165, 'NYT', 'Sep', 25600,7965, 0, 11.4],
       ]

df = pd.DataFrame(data, columns=['Country', 'Covid19 deaths, now', 'Source',
                                 'Mortality data until', 'Excess deaths', 
                                 'Covid19 deaths', 'Underreporting (excess deaths / covid19 deaths)', 
                                 'Population (mln)'])

df['Underreporting (excess deaths / covid19 deaths)'] = df['Excess deaths'] / df['Covid19 deaths']
df['Excess deaths per million'] = df['Excess deaths'] / df['Population (mln)']
df.index += 1

df['Estimated deaths'] = np.maximum(df['Underreporting (excess deaths / covid19 deaths)'],1) * df['Covid19 deaths, now']
df['Estimated deaths per 100,000'] = df['Estimated deaths'] / df['Population (mln)'] / 10

df.style.format({'Underreporting (excess deaths / covid19 deaths)': lambda x: f'{x:.1f}' if ~np.isnan(x) else '', 
                 'Excess deaths':  lambda x: f'{1000*(x/1000).round():,.0f}' if ~np.isnan(x) else '',
                 'Covid19 deaths': lambda x: f'{x:,.0f}' if ~np.isnan(x) else '',
                 'Excess deaths per million': 
                                   lambda x: f'{10*(x/10).round():,.0f}' if ~np.isnan(x) else '',
                 'Population (mln)': '{:.1f}',
                 'Estimated deaths': 
                                   lambda x: f'{1000*(x/1000).round():,.0f}' if ~np.isnan(x) else '',
                 'Estimated deaths per 100,000': 
                                   lambda x: f'{np.round(x):,.0f}' if ~np.isnan(x) else '',
                 'Covid19 deaths, now': '{:,.0f}'}).set_caption(
    'Excess mortality estimates').hide_columns(
    ['Population (mln)','Excess deaths per million','Mortality data until','Excess deaths','Covid19 deaths']).set_table_styles(
    [dict(selector='th', props=[('max-width', '180px')])])

Unnamed: 0,Country,"Covid19 deaths, now",Source,Underreporting (excess deaths / covid19 deaths),Estimated deaths,"Estimated deaths per 100,000"
1,United States üá∫üá∏,349397,Ec,1.3,451000.0,137.0
2,Brazil üáßüá∑,194976,Ec,1.1,213000.0,101.0
3,India üáÆüá≥,148994,,,,
4,Mexico üá≤üáΩ,125807,Ec,2.5,313000.0,248.0
5,Italy üáÆüáπ,74159,Ec,1.7,128000.0,211.0
6,United Kingdom üá¨üáß,74125,Ec,1.5,114000.0,171.0
7,France üá´üá∑,64632,Ec,1.1,68000.0,102.0
8,Russia üá∑üá∫,57555,DK,6.5,376000.0,256.0
9,Iran üáÆüá∑,55337,Ghafari et al.,2.4,134000.0,164.0
10,Spain üá™üá∏,50837,Ec,1.4,71000.0,152.0


# Figures for the paper

In [18]:
fig = plt.figure(figsize=(8*.65, 4.5*.65))

r=0
plt.plot(deaths[r, -6:-2, :].T, linewidth=.75, color='#aaaaaa', label='')
plt.plot(deaths[r, -2, :].T, linewidth=.75, color='#aaaaaa', label='2015‚Äì2019')
plt.plot(deaths_model[r,:], 'k', linewidth=1.5, label='Projection based on 2006‚Äì2019')
plt.plot(deaths[r, -1, :], 'r.-', linewidth=1.5, label='2020')
plt.legend(frameon=False)

a = 3
b = np.where(np.isnan(deaths[r,-1,:]))[0][0]
poly = np.zeros(((b-a)*2, 2))
poly[:,0] = np.concatenate((np.arange(a,b), np.arange(b-1,a-1,-1)))
poly[:,1] = np.concatenate((deaths[r,-1,a:b], deaths_model[r,a:b][::-1]))
poly = Polygon(poly, facecolor='r', edgecolor='r', alpha=.4, zorder=5)
plt.gca().add_patch(poly)

plt.text(6.5,90000,f'{np.round(total[0]/100)*100:,.0f}\nexcess deaths\n(April‚ÄìNovember)',
         color='r', ha='center')

plt.ylabel('Number of deaths per month')
plt.gca().get_yaxis().set_major_formatter(
    matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.xticks(np.arange(12), months_eng_short)
plt.ylim([0,225000])

sns.despine()
plt.tight_layout()

plt.savefig('img/fig1.png', dpi=200)
plt.savefig('img/fig1.pdf')

<IPython.core.display.Javascript object>

In [19]:
r = total/(official_deaths+.0001)
r[official_deaths==0] = 0
ind = np.argsort(r)
for i in ind[::-1]:
    if r[i]>20:
        print(f'{regions[i]:25} - {r[i]:.1f}')

–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ë–∞—à–∫–æ—Ä—Ç–æ—Å—Ç–∞–Ω   - 114.8
–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω      - 68.5
–°–∞—Ö–∞–ª–∏–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å       - 67.5
–ß–µ—á–µ–Ω—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞      - 33.8
–ë–µ–ª–≥–æ—Ä–æ–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å      - 33.6
–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ú–æ—Ä–¥–æ–≤–∏—è       - 28.1
–ß–µ–ª—è–±–∏–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å       - 27.8
–õ–∏–ø–µ—Ü–∫–∞—è –æ–±–ª–∞—Å—Ç—å          - 27.3
–Ø—Ä–æ—Å–ª–∞–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å       - 27.0
–†—è–∑–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å         - 26.1
–õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å     - 25.0
–¢–∞–º–±–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å        - 24.9
–û—Ä–µ–Ω–±—É—Ä–≥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å      - 23.8
–°–∞–º–∞—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å         - 21.0
–°–∞—Ä–∞—Ç–æ–≤—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å       - 20.6


In [20]:
r = yearratio
ind = np.argsort(r)
for i in ind[::-1]:
    if r[i]>0.2:
        print(f'{regions[i]:25} - {r[i]:.2f}')

–ß–µ—á–µ–Ω—Å–∫–∞—è –†–µ—Å–ø—É–±–ª–∏–∫–∞      - 0.37
–†–µ—Å–ø—É–±–ª–∏–∫–∞ –î–∞–≥–µ—Å—Ç–∞–Ω       - 0.30
–Ø–º–∞–ª–æ-H–µ–Ω–µ—Ü–∫–∏–π –ê–û         - 0.27
–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ò–Ω–≥—É—à–µ—Ç–∏—è      - 0.27
–†–µ—Å–ø—É–±–ª–∏–∫–∞ –¢–∞—Ç–∞—Ä—Å—Ç–∞–Ω      - 0.22
–°–∞–º–∞—Ä—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å         - 0.22
–û—Ä–µ–Ω–±—É—Ä–≥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å      - 0.21
–ü–µ–Ω–∑–µ–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å        - 0.20
–û–º—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å            - 0.20
–õ–∏–ø–µ—Ü–∫–∞—è –æ–±–ª–∞—Å—Ç—å          - 0.20


In [21]:
r = 0

plt.figure(figsize=(8/2,4.5/2))
plt.plot(np.arange(2006,2020), yearly[r][:-1],'k.')

plt.plot(np.arange(2006,2010), yearly[0][:4], 'k-', zorder=-1, lw=.5)
plt.plot(np.arange(2011,2015), yearly[0][5:-6], 'k-', zorder=-1, lw=.5)
plt.plot(np.arange(2015,2020), yearly[0][-6:-1], 'k-', zorder=-1, lw=.5)

plt.plot([2009,2011], [yearly[0][3],yearly[0][5]], 'k:', zorder=-1, lw=.75)

excl = ['–†–æ—Å—Å–∏–π—Å–∫–∞—è –§–µ–¥–µ—Ä–∞—Ü–∏—è', '–†–µ—Å–ø—É–±–ª–∏–∫–∞ –ö—Ä—ã–º', '–°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å']
plt.plot(np.arange(2014,2020), 
         np.nansum(yearly[[r not in excl for r in regions],:], axis=0)[-7:-1], 
         'k:', zorder=-1, lw=.75)

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

xs = np.linspace(2015,2020,1000).reshape(-1,1)
model = make_pipeline(PolynomialFeatures(1), LinearRegression())
ys = model.fit(np.arange(2015,2020).reshape(-1,1), yearly[r][-6:-1]).predict(xs)
plt.plot(xs[-1], ys[-1], 'r.', lw=.5)
plt.plot(xs, ys, 'r', lw=.5, zorder=-1)

X = np.concatenate((np.ones((5,1)), np.arange(2015,2020)[:,np.newaxis]), axis=1)
y = yearly[r,-6:-1][:,np.newaxis]
b = np.linalg.inv(X.T @ X) @ X.T @ y
yhat = X @ b
sigma2 = np.sum((y-yhat)**2) / (5-2)
x = np.array([[1], [2020]])
yhat = b.T @ x
predictive_var = sigma2 * (1 + x.T @ np.linalg.inv(X.T @ X) @ x)
predictive_std = np.sqrt(predictive_var)[0][0]

plt.plot([xs[-1],xs[-1]], [ys[-1]-predictive_std, ys[-1]+predictive_std], 'r', lw=.5)

plt.xticks([2005,2010,2015,2020])
plt.gca().get_yaxis().set_major_formatter(
    matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.ylabel('Average monthly deaths')
sns.despine()
plt.tight_layout()

plt.text(2009.5,172000,'2010\nheat wave', fontsize=9)
plt.text(2017,155000,'with\nCrimea', fontsize=9)
plt.text(2014,145000,'without\nCrimea', fontsize=9)
plt.text(2020.2,150000,'projection', fontsize=9, color='r', rotation=90)

plt.savefig('img/box.png', dpi=200)
plt.savefig('img/box.pdf')

<IPython.core.display.Javascript object>

In [22]:
fig = plt.figure(figsize=(8,4.5))
ax1 = plt.axes([.2,.085,.8,.39])
ax2 = plt.axes([0,.5,.5,.5])

<IPython.core.display.Javascript object>

In [23]:
import matplotlib.image as mpimg
rusmap = mpimg.imread('russian-data/map.png')

plt.sca(ax2)
plt.imshow(rusmap)

peak = np.zeros(regions.size)
for i in range(peak.size):
    f = np.where(ratio[i]>0.25)[0]
    if f.size>0:
        peak[i] = f[0]
        
print(f'Minimum max-ratio across all regions: {np.min(np.nanmax(ratio,axis=1)):.2f}')
        
mycolors = matplotlib.colors.ListedColormap(np.array(plt.get_cmap('tab10').colors)[[0,1,2,4,5,6,3,7,8,9]])

for r in range(1,regions.size):
    area = total[r]/50
    if peak[r]>0:
        plt.scatter(coordinates[regions[r]][0],
                coordinates[regions[r]][1],
                s = area, c = peak[r]-4, edgecolor='none', 
                vmin=0, vmax=10, alpha=.8, cmap=mycolors)
    else:
        plt.scatter(coordinates[regions[r]][0],
                coordinates[regions[r]][1],
                s = area, c = '#aaaaaa', edgecolor='none', alpha=.8)
        
plt.plot([coordinates['–ú–æ—Å–∫–≤–∞'][0],214],
         [coordinates['–ú–æ—Å–∫–≤–∞'][1],505], 
         linewidth=.75, color=plt.get_cmap('tab10')(0))

plt.text(100, 50, 'First wave:', color='k', fontsize=8)
plt.text(150,  120, 'May', color=mycolors(0), fontsize=8)
plt.text(150,  180, 'Jun', color=mycolors(1), fontsize=8)
plt.text(150,  240, 'Jul', color=mycolors(2), fontsize=8)
plt.text(300, 120, 'Aug', color=mycolors(3), fontsize=8)
plt.text(300, 180, 'Sep', color=mycolors(4), fontsize=8)
plt.text(300, 240, 'Oct', color=mycolors(5), fontsize=8)
plt.text(450, 120, 'Nov', color=mycolors(6), fontsize=8)
# plt.text(450, 180, 'Dec', color=mycolors(7), fontsize=8)

plt.scatter(.45, .95, transform=plt.gca().transAxes, s=10000/50,
            c='#aaaaaa', edgecolor='none', alpha=.8)
plt.scatter(.56, .95, transform=plt.gca().transAxes, s=5000/50,
            c='#aaaaaa', edgecolor='none', alpha=.8)
plt.scatter(.65, .95, transform=plt.gca().transAxes, s=1000/50,
            c='#aaaaaa', edgecolor='none', alpha=.8)
plt.text(.45, .85, '10,000', transform=plt.gca().transAxes, c='#aaaaaa', 
         fontsize=7, ha='center')
plt.text(.56, .85, '5,000', transform=plt.gca().transAxes, c='#aaaaaa', 
         fontsize=7, ha='center')
plt.text(.65, .85, '1,000', transform=plt.gca().transAxes, c='#aaaaaa', 
         fontsize=7, ha='center')
    
plt.xticks([])
plt.yticks([])
sns.despine(ax=ax2, left=True, bottom=True)

Minimum max-ratio across all regions: 0.27


In [24]:
n=15
ind = np.argsort(total[1:])[::-1][:n]
y1=np.sum(covid_deaths[1:,:,0], axis=1)[ind]
y2=np.sum(covid_deaths[1:,:,1], axis=1)[ind]
y3=np.sum(covid_deaths[1:,:,2], axis=1)[ind]
y4=np.sum(covid_deaths[1:,:,3], axis=1)[ind]
y5=total[1:][ind]

plt.sca(ax1)
plt.barh(-np.arange(ind.size), y1, color='black', label='–°ovid-19 confirmed')
plt.barh(-np.arange(ind.size), y2, left=y1, color='#777777', label='–°ovid-19 suspected')
plt.barh(-np.arange(ind.size), y3, left=y1+y2, color='#999999', label='–°ovid-19 contributed')
plt.barh(-np.arange(ind.size), y4, left=y1+y2+y3, color='#bbbbbb', label='–°ovid-19 not contributed')
plt.scatter(y5, -np.arange(ind.size), s=8, color='red', label='Excess mortality',
                clip_on=False, zorder=4)
plt.scatter(official_deaths[1:][ind], -np.arange(ind.size), s=8, color='orange', 
                label='Daily reported\nCovid-19 deaths', clip_on=False, zorder=3)

for i,r in enumerate(ind):
    plt.text(y5[i]+200, -i-.4, f'{np.round(y5[i]/100)*100:,.0f}', color='red', fontsize=8)
    plt.text(np.max(y5)+4000, -i-.4, f'{y5[i]/official_deaths[1:][ind][i]:.1f}', 
             color='k', fontsize=8, ha='right').set_clip_on(True)
    plt.text(np.max(y5)+6500, -i-.4, f'{np.round(total_permln[1:][ind][i]/10):.0f}', 
             color='k', fontsize=8, ha='right').set_clip_on(True)
    plt.text(np.max(y5)+9500, -i-.4, f'{yearratio[1:][ind][i]*100:.0f}%', 
             color='k', fontsize=8, ha='right').set_clip_on(True)

plt.text(np.max(y5)+3000, 1, 'Under-\nreporting', fontsize=8, ha='center')
plt.text(np.max(y5)+6000, 1, 'Deaths\nper 100k', fontsize=8, ha='center')
plt.text(np.max(y5)+9000, 1, 'Yearly\nincrease', fontsize=8, ha='center')

plt.xlim([0,np.max(y5)+11000])
plt.xticks(np.arange(0,25001,2500), fontsize=8)
plt.gca().get_xaxis().set_major_formatter(
    matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.ylim([-ind.size,1])
plt.yticks(-np.arange(ind.size), [regionsToEnglish[r] for r in regions[1:][ind]], fontsize=8)
plt.legend(loc=[.4,.1], fontsize=8)
plt.xlabel('Number of deaths (April‚ÄìNovember)', fontsize=8)

sns.despine(ax=ax1)

In [25]:
for i,r in enumerate(ind+1):
    ax = plt.axes([.52+(i%5)*.099, 1-(1+np.floor(i/5))*.15, .08, .1])
                  
    plt.plot(deaths[r, -6:-1, :].T, color='#aaaaaa', linewidth=.5, clip_on=False)
    plt.plot(deaths_model[r,:], 'k', linewidth=1, clip_on=False)
    plt.plot(deaths[r, -1, :], 'r.-', linewidth=1, markersize=3, clip_on=False)
    
    a = 3
    b = np.where(np.isnan(deaths[r,-1,:]))[0][0]
    poly = np.zeros(((b-a)*2, 2))
    poly[:,0] = np.concatenate((np.arange(a,b), np.arange(b-1,a-1,-1)))
    poly[:,1] = np.concatenate((deaths[r,-1,a:b], deaths_model[r,a:b][::-1]))
    poly = Polygon(poly, facecolor='r', edgecolor='r', alpha=.4, zorder=5)
    plt.gca().add_patch(poly)
    
    plt.xticks([])
    ym = np.mean(deaths_model[r,])
    plt.ylim([0, ym*1.75])
    plt.xlim([-0.5,11.5])    
    if i>0:
        plt.yticks([])
    else:
        plt.yticks([0,ym/2,ym,ym*1.5],['0','50%','100%','150%'], fontsize=8)
    
    toll = np.round(total[r]/100)*100
    plt.text(.05, .1, f'{toll:,.0f}', color='r', fontsize=8,
                 transform=plt.gca().transAxes)
    
    plt.title(regionsToEnglishShort[regions[r]], fontsize=7)
    sns.despine(ax=ax, left=i>0)

In [26]:
fig.text(0, 1,'(a)',fontsize=10,color='k',va='top')
fig.text(.47,1,'(b)',fontsize=10,color='k',va='top')
fig.text(0,.5,'(c)',fontsize=10,color='k',va='top')

plt.savefig('img/regions.png', dpi=200)
plt.savefig('img/regions.pdf', dpi=300)

In [27]:
fig = plt.figure(figsize=(8, 3.5))

n = 20

plt.barh(-np.arange(n), df[:n]['Covid19 deaths, now'], label='Daily reported –°ovid-19 deaths')
delta = df[:n]['Estimated deaths'] - df[:n]['Covid19 deaths, now']
delta[delta<0] = np.nan
plt.barh(-np.arange(n), delta, left=df[:n]['Covid19 deaths, now']+2000, color='#d52323',
         label='Estimated excess deaths')
         
plt.yticks(-np.arange(n), [a[:-2] for a in df[:n]['Country']])
plt.xlabel('Number of deaths as of 1 January 2021')

for i in np.where(delta==0)[0]:
    plt.barh(-i, 2000, left=np.array(df[:n]['Covid19 deaths, now'])[i]-2000, color='k')
for i in np.where(np.isnan(delta))[0]:
    plt.text(np.array(df[:n]['Covid19 deaths, now'])[i] + 5000, -i-.3, 'n/a', color='r', fontsize=9)
for i in range(n):
    if np.isnan(df['Estimated deaths'].values[i]):
        continue
    plt.text(df['Estimated deaths'].values[i] + 5000, -i-.3, 
             f"{df['Underreporting (excess deaths / covid19 deaths)'].values[i]:.1f} [{df['Source'].values[i]}]",
             color='k', fontsize=9)
    plt.text(540000, -i-.3, 
             f"{df['Estimated deaths per 100,000'].values[i]:.0f}",
             color='#d52323', ha='right')

plt.gca().tick_params(axis='y', length=0)

plt.gca().annotate('Undercount:\nexcess deaths /\ndaily reported\n–°ovid-19 deaths\n[Source]',
                   xy=(400000, -7.5), xycoords='data',
                   xytext=(410000, -14), textcoords='data',
                   arrowprops=dict(arrowstyle="->", connectionstyle="arc3"),
                   fontsize=9)

plt.text(550000,-n/2+.7,'Estimated excess deaths per 100,000', rotation=90, va='center')

plt.ylim([-n+.3,.7])
plt.xlim([0,480000])
plt.xticks(np.arange(0,400001,100000))
plt.gca().get_xaxis().set_major_formatter(
    matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))

sns.despine()
plt.tight_layout()

plt.legend(loc=(.4,.05), fontsize=9)

plt.savefig('img/countries.png', dpi=200)
plt.savefig('img/countries.pdf')

<IPython.core.display.Javascript object>