It is found that a few countries, that performed well in 2020 and 2021, are not doing well in 2022 (have more deaths in 2022, and even worse than many other countries that used to performed very poor. Can you demonstrate this situation based on Excess Deaths in the 3 years (2020, 2021, 2022)? 

In [109]:
import random
import pandas as pd
import numpy as np
import math
import json
import matplotlib.pyplot as plt
from pandas import Timestamp
from datetime import datetime
from time import time
from os import getcwd
from os.path import join
%matplotlib inline


## Load Data

In [110]:
path = join(getcwd().rstrip('src'),
            'data/world_mortality.csv').replace('\\', '/')
DF = pd.read_csv(path)
# DF = pd.read_csv('~/AI/DATA/BigData/DeathBirthRate/world_mortality2015-20220214.csv')
DF.rename(columns={'country_name': 'country'}, inplace=True)
print(DF.head(10))


  iso3c  country  year  time time_unit  deaths
0   ALB  Albania  2015     1   monthly  2490.0
1   ALB  Albania  2015     2   monthly  2139.0
2   ALB  Albania  2015     3   monthly  2051.0
3   ALB  Albania  2015     4   monthly  1906.0
4   ALB  Albania  2015     5   monthly  1709.0
5   ALB  Albania  2015     6   monthly  1561.0
6   ALB  Albania  2015     7   monthly  2008.0
7   ALB  Albania  2015     8   monthly  1687.0
8   ALB  Albania  2015     9   monthly  1569.0
9   ALB  Albania  2015    10   monthly  1560.0


## Get progress last week

In [111]:
DF_Regular = DF[DF.year < 2020]
DF_2020 = DF[(DF.year == 2020)]
DF_2021 = DF[(DF.year == 2021)]
DF_2022 = DF[(DF.year == 2022)]

### 2015-2019 Data

In [112]:
Regular = DF_Regular.groupby('country').mean().reset_index()
Regular['AverageAnnualUnitCount'] = round(Regular.time * 2 - 1, 0)
Regular.head()

Unnamed: 0,country,year,time,deaths,AverageAnnualUnitCount
0,Albania,2017.0,6.5,1829.65,12.0
1,Algeria,2018.5,6.5,14940.804167,12.0
2,Andorra,2017.0,6.5,25.85,12.0
3,Antigua and Barbuda,2017.0,6.5,47.55,12.0
4,Argentina,2017.0,6.5,28449.05,12.0


In [113]:
import copy as copy
tmp = copy.copy(DF)
time_unit_dict = {'monthly': 12, 'weekly': 52}
tmp['AnnualUnitCount'] = [time_unit_dict[x] for x in tmp.time_unit]
tmp = tmp.groupby('country').mean().reset_index()[
    ['country', 'AnnualUnitCount']]
tmp.head()


Unnamed: 0,country,AnnualUnitCount
0,Albania,12.0
1,Algeria,12.0
2,Andorra,12.0
3,Antigua and Barbuda,12.0
4,Argentina,12.0


In [114]:
Regular = pd.merge(Regular, tmp, on='country')
Regular['deaths'] = Regular.deaths * Regular.AnnualUnitCount
Regular = Regular[['country', 'deaths', 'AverageAnnualUnitCount']]
Regular.head()


Unnamed: 0,country,deaths,AverageAnnualUnitCount
0,Albania,21955.8,12.0
1,Algeria,179289.65,12.0
2,Andorra,310.2,12.0
3,Antigua and Barbuda,570.6,12.0
4,Argentina,341388.6,12.0


### 2020 Data

In [115]:
DF_2020 = DF_2020.groupby('country').mean().reset_index()
DF_2020['AverageAnnualUnitCount'] = round(DF_2020.time * 2 - 1, 0)
DF_2020 = pd.merge(DF_2020, tmp, on='country')
DF_2020['deaths'] = DF_2020.deaths * DF_2020.AnnualUnitCount
DF_2020 = DF_2020[['country', 'deaths', 'AverageAnnualUnitCount']]
DF_2020.head()


Unnamed: 0,country,deaths,AverageAnnualUnitCount
0,Albania,27605.0,12.0
1,Algeria,235628.0,12.0
2,Andorra,419.0,12.0
3,Antigua and Barbuda,574.0,12.0
4,Argentina,376221.0,12.0


### 2021 Data

In [116]:
DF_2021 = DF_2021.groupby('country').mean().reset_index()
DF_2021['AverageAnnualUnitCount'] = round(DF_2021.time * 2 - 1, 0)
DF_2021 = pd.merge(DF_2021, tmp, on='country')
DF_2021['deaths'] = DF_2021.deaths * DF_2021.AnnualUnitCount
DF_2021 = DF_2021[['country', 'deaths', 'AverageAnnualUnitCount']]
DF_2021.head()


Unnamed: 0,country,deaths,AverageAnnualUnitCount
0,Albania,30580.0,12.0
1,Antigua and Barbuda,649.0,12.0
2,Armenia,34638.0,12.0
3,Aruba,964.0,12.0
4,Australia,170960.0,52.0


### 2022 Data

In [117]:
DF_2022 = DF_2022.groupby('country').mean().reset_index()
DF_2022['AverageAnnualUnitCount'] = round(DF_2022.time * 2 - 1, 0)
DF_2022 = pd.merge(DF_2022, tmp, on='country')
DF_2022['deaths'] = DF_2022.deaths * DF_2022.AnnualUnitCount
DF_2022 = DF_2022[['country', 'deaths', 'AverageAnnualUnitCount']]
DF_2022.head()


Unnamed: 0,country,deaths,AverageAnnualUnitCount
0,Albania,25708.0,6.0
1,Armenia,27462.666667,9.0
2,Australia,190732.533333,30.0
3,Austria,89276.571429,42.0
4,Azerbaijan,62637.0,8.0


### Merge Data

#### Death Count Year

In [118]:
newDF = pd.concat([Regular, DF_2020[['deaths']], DF_2021[['deaths']], DF_2022[['deaths']]], axis=1)
newDF.columns = ['country', 'RegularDeaths', 'RegularAverageAnnualUnitCount', '2020Deaths', '2021Deaths', '2022Deaths']
newDF = newDF.reindex(['country', 'RegularAverageAnnualUnitCount', 'RegularDeaths',
              '2020Deaths', '2021Deaths', '2022Deaths'], axis='columns')
newDF.head()

Unnamed: 0,country,RegularAverageAnnualUnitCount,RegularDeaths,2020Deaths,2021Deaths,2022Deaths
0,Albania,12.0,21955.8,27605.0,30580.0,25708.0
1,Algeria,12.0,179289.65,235628.0,649.0,27462.666667
2,Andorra,12.0,310.2,419.0,34638.0,190732.533333
3,Antigua and Barbuda,12.0,570.6,574.0,964.0,89276.571429
4,Argentina,12.0,341388.6,376221.0,170960.0,62637.0


#### Excess Death Per Year

In [119]:
EXD2020 = newDF['2020Deaths'] - newDF['RegularDeaths']
EXD2020.rename('EXD2020', inplace=True)
EXD2021 = newDF['2021Deaths'] - newDF['RegularDeaths']
EXD2021.rename('EXD2021', inplace=True)
EXD2022 = newDF['2022Deaths'] - newDF['RegularDeaths']
EXD2022.rename('EXD2022', inplace=True)
newDF = pd.concat([newDF, EXD2020, EXD2021, EXD2022], axis=1)
newDF.rename(columns={'RegularAverageAnnualUnitCount': 'RAAUC'}, inplace=True)
newDF.head()


Unnamed: 0,country,RAAUC,RegularDeaths,2020Deaths,2021Deaths,2022Deaths,EXD2020,EXD2021,EXD2022
0,Albania,12.0,21955.8,27605.0,30580.0,25708.0,5649.2,8624.2,3752.2
1,Algeria,12.0,179289.65,235628.0,649.0,27462.666667,56338.35,-178640.65,-151826.983333
2,Andorra,12.0,310.2,419.0,34638.0,190732.533333,108.8,34327.8,190422.333333
3,Antigua and Barbuda,12.0,570.6,574.0,964.0,89276.571429,3.4,393.4,88705.971429
4,Argentina,12.0,341388.6,376221.0,170960.0,62637.0,34832.4,-170428.6,-278751.6


#### Merge with OWID Data

In [120]:
path = join(getcwd().rstrip('src'),
            'data/owid-covid-data.csv').replace('\\', '/')
data = pd.read_csv(path)
data = data[['location', 'population']]
data.rename(columns={'location': 'country'}, inplace=True)
data.groupby('country').count().sort_values(by='population', ascending=False)
data.drop_duplicates(subset=['country'], inplace=True)
data.reset_index(drop=True, inplace=True)
data.head()


Unnamed: 0,country,population
0,Afghanistan,40099460.0
1,Africa,1392394000.0
2,Albania,2854710.0
3,Algeria,44177970.0
4,Andorra,79034.0


In [121]:
con1 = list(data.country)
con2 = list(newDF.country)
pop = []
for c in con2:
    if c in con1:
        pop.append(data[data.country == c].population.values[0])
    else:
        pop.append(0)
pop = pd.Series(pop, name='population')
newDF = pd.concat([newDF, pop], axis=1)
newDF.head()


Unnamed: 0,country,RAAUC,RegularDeaths,2020Deaths,2021Deaths,2022Deaths,EXD2020,EXD2021,EXD2022,population
0,Albania,12.0,21955.8,27605.0,30580.0,25708.0,5649.2,8624.2,3752.2,2854710.0
1,Algeria,12.0,179289.65,235628.0,649.0,27462.666667,56338.35,-178640.65,-151826.983333,44177969.0
2,Andorra,12.0,310.2,419.0,34638.0,190732.533333,108.8,34327.8,190422.333333,79034.0
3,Antigua and Barbuda,12.0,570.6,574.0,964.0,89276.571429,3.4,393.4,88705.971429,93220.0
4,Argentina,12.0,341388.6,376221.0,170960.0,62637.0,34832.4,-170428.6,-278751.6,45276780.0


#### Excess Death in Percentage

In [122]:
rdp = newDF.RegularDeaths / newDF.population
rdp.name = 'RDPM' # RDPM = Regular Deaths Per Million
irdp2020 = newDF.EXD2020 / newDF.population
irdp2020.name = 'IRDPM2020' # IRDPM = Irregular Deaths Per Million in 2020
irdp2021 = newDF.EXD2021 / newDF.population
irdp2021.name = 'IRDPM2021' # IRDPM = Irregular Deaths Per Million in 2021
irdp2022 = newDF.EXD2022 / newDF.population
irdp2022.name = 'IRDPM2022' # IRDPM = Irregular Deaths Per Million in 2022
edp2020 = newDF['2020Deaths'] / newDF.population
edp2020.name = 'EDPM2020' # EDPM = Expected Deaths Per Million in 2020
edp2021 = newDF['2021Deaths'] / newDF.population
edp2021.name = 'EDPM2021' # EDPM = Expected Deaths Per Million in 2021
edp2022 = newDF['2022Deaths'] / newDF.population
edp2022.name = 'EDPM2022' # EDPM = Expected Deaths Per Million in 2022
newDF = pd.concat([newDF, rdp, irdp2020, irdp2021, irdp2022, edp2020, edp2021, edp2022], axis=1)
newDF.rename(columns={'2020Deaths': 'Deaths2020'}, inplace=True)
newDF.rename(columns={'2021Deaths': 'Deaths2021'}, inplace=True)
newDF.rename(columns={'2022Deaths': 'Deaths2022'}, inplace=True)
newDF.head()


Unnamed: 0,country,RAAUC,RegularDeaths,Deaths2020,Deaths2021,Deaths2022,EXD2020,EXD2021,EXD2022,population,RDPM,IRDPM2020,IRDPM2021,IRDPM2022,EDPM2020,EDPM2021,EDPM2022
0,Albania,12.0,21955.8,27605.0,30580.0,25708.0,5649.2,8624.2,3752.2,2854710.0,0.007691,0.001979,0.003021,0.001314,0.00967,0.010712,0.009005
1,Algeria,12.0,179289.65,235628.0,649.0,27462.666667,56338.35,-178640.65,-151826.983333,44177969.0,0.004058,0.001275,-0.004044,-0.003437,0.005334,1.5e-05,0.000622
2,Andorra,12.0,310.2,419.0,34638.0,190732.533333,108.8,34327.8,190422.333333,79034.0,0.003925,0.001377,0.434342,2.409372,0.005302,0.438267,2.413297
3,Antigua and Barbuda,12.0,570.6,574.0,964.0,89276.571429,3.4,393.4,88705.971429,93220.0,0.006121,3.6e-05,0.00422,0.951577,0.006157,0.010341,0.957698
4,Argentina,12.0,341388.6,376221.0,170960.0,62637.0,34832.4,-170428.6,-278751.6,45276780.0,0.00754,0.000769,-0.003764,-0.006157,0.008309,0.003776,0.001383


In [123]:
newDF.to_csv(join(getcwd().rstrip('src'),
                  'data/bd_w12_hw/COVID_small_flat_table.csv').replace('\\', '/'), sep=';', index=False)


## Demonstrate excess death changes in the range of 2020 to 2022