It is found that a few countries, that performed well in 2020 and 2021, are not doing well in 2022 (have more deaths in 2022, and even worse than many other countries that used to performed very poor. Can you demonstrate this situation based on Excess Deaths in the 3 years (2020, 2021, 2022)? 

In [81]:
import random
import pandas as pd
import numpy as np
import math
import json
import matplotlib.pyplot as plt
from pandas import Timestamp
from datetime import datetime
from time import time
from os import getcwd
from os.path import join
%matplotlib inline


## Load Data

In [82]:
path = join(getcwd().rstrip('src'),
            'data/world_mortality.csv').replace('\\', '/')
DF = pd.read_csv(path)
# DF = pd.read_csv('~/AI/DATA/BigData/DeathBirthRate/world_mortality2015-20220214.csv')
DF.rename(columns={'country_name': 'country'}, inplace=True)
print(DF.head(10))


  iso3c  country  year  time time_unit  deaths
0   ALB  Albania  2015     1   monthly  2490.0
1   ALB  Albania  2015     2   monthly  2139.0
2   ALB  Albania  2015     3   monthly  2051.0
3   ALB  Albania  2015     4   monthly  1906.0
4   ALB  Albania  2015     5   monthly  1709.0
5   ALB  Albania  2015     6   monthly  1561.0
6   ALB  Albania  2015     7   monthly  2008.0
7   ALB  Albania  2015     8   monthly  1687.0
8   ALB  Albania  2015     9   monthly  1569.0
9   ALB  Albania  2015    10   monthly  1560.0


## Get progress last week

In [83]:
DF_Regular = DF[DF.year < 2020]
DF_2020 = DF[(DF.year == 2020)]
DF_2021 = DF[(DF.year == 2021)]
DF_2022 = DF[(DF.year == 2022)]

### 2015-2019 Data

In [84]:
Regular = DF_Regular.groupby('country').mean().reset_index()
Regular['AverageAnnualUnitCount'] = round(Regular.time * 2 - 1, 0)
Regular.head()

Unnamed: 0,country,year,time,deaths,AverageAnnualUnitCount
0,Albania,2017.0,6.5,1829.65,12.0
1,Algeria,2018.5,6.5,14940.804167,12.0
2,Andorra,2017.0,6.5,25.85,12.0
3,Antigua and Barbuda,2017.0,6.5,47.55,12.0
4,Argentina,2017.0,6.5,28449.05,12.0


In [85]:
import copy as copy
tmp = copy.copy(DF)
time_unit_dict = {'monthly': 12, 'weekly': 52}
tmp['AnnualUnitCount'] = [time_unit_dict[x] for x in tmp.time_unit]
tmp = tmp.groupby('country').mean().reset_index()[
    ['country', 'AnnualUnitCount']]
tmp.head()


Unnamed: 0,country,AnnualUnitCount
0,Albania,12.0
1,Algeria,12.0
2,Andorra,12.0
3,Antigua and Barbuda,12.0
4,Argentina,12.0


In [86]:
Regular = pd.merge(Regular, tmp, on='country')
Regular['deaths'] = Regular.deaths * Regular.AnnualUnitCount
Regular = Regular[['country', 'deaths', 'AverageAnnualUnitCount']]
Regular.head()


Unnamed: 0,country,deaths,AverageAnnualUnitCount
0,Albania,21955.8,12.0
1,Algeria,179289.65,12.0
2,Andorra,310.2,12.0
3,Antigua and Barbuda,570.6,12.0
4,Argentina,341388.6,12.0


### 2020 Data

In [87]:
DF_2020 = DF_2020.groupby('country').mean().reset_index()
DF_2020['AverageAnnualUnitCount'] = round(DF_2020.time * 2 - 1, 0)
DF_2020 = pd.merge(DF_2020, tmp, on='country')
DF_2020['deaths'] = DF_2020.deaths * DF_2020.AnnualUnitCount
DF_2020 = DF_2020[['country', 'deaths', 'AverageAnnualUnitCount']]
DF_2020.head()


Unnamed: 0,country,deaths,AverageAnnualUnitCount
0,Albania,27605.0,12.0
1,Algeria,235628.0,12.0
2,Andorra,419.0,12.0
3,Antigua and Barbuda,574.0,12.0
4,Argentina,376221.0,12.0


### 2021 Data

In [88]:
DF_2021 = DF_2021.groupby('country').mean().reset_index()
DF_2021['AverageAnnualUnitCount'] = round(DF_2021.time * 2 - 1, 0)
DF_2021 = pd.merge(DF_2021, tmp, on='country')
DF_2021['deaths'] = DF_2021.deaths * DF_2021.AnnualUnitCount
DF_2021 = DF_2021[['country', 'deaths', 'AverageAnnualUnitCount']]
DF_2021.head()


Unnamed: 0,country,deaths,AverageAnnualUnitCount
0,Albania,30580.0,12.0
1,Antigua and Barbuda,649.0,12.0
2,Armenia,34638.0,12.0
3,Aruba,964.0,12.0
4,Australia,170960.0,52.0


### 2022 Data

In [89]:
DF_2022 = DF_2022.groupby('country').mean().reset_index()
DF_2022['AverageAnnualUnitCount'] = round(DF_2022.time * 2 - 1, 0)
DF_2022 = pd.merge(DF_2022, tmp, on='country')
DF_2022['deaths'] = DF_2022.deaths * DF_2022.AnnualUnitCount
DF_2022 = DF_2022[['country', 'deaths', 'AverageAnnualUnitCount']]
DF_2022.head()


Unnamed: 0,country,deaths,AverageAnnualUnitCount
0,Albania,25708.0,6.0
1,Armenia,27462.666667,9.0
2,Australia,190732.533333,30.0
3,Austria,89276.571429,42.0
4,Azerbaijan,62637.0,8.0


### Merge Data

In [90]:
newDF = pd.concat([Regular, DF_2020[['deaths']], DF_2021[['deaths']], DF_2022[['deaths']]], axis=1)
newDF.columns = ['country', 'RegularDeaths', 'RegularAverageAnnualUnitCount', '2020Deaths', '2021Deaths', '2022Deaths']
newDF.reindex(columns=['country', 'RegularAverageAnnualUnitCount', 'RegularDeaths',
              '2020Deaths', '2021Deaths', '2022Deaths'])
newDF.head()

Unnamed: 0,country,RegularDeaths,RegularAverageAnnualUnitCount,2020Deaths,2021Deaths,2022Deaths
0,Albania,21955.8,12.0,27605.0,30580.0,25708.0
1,Algeria,179289.65,12.0,235628.0,649.0,27462.666667
2,Andorra,310.2,12.0,419.0,34638.0,190732.533333
3,Antigua and Barbuda,570.6,12.0,574.0,964.0,89276.571429
4,Argentina,341388.6,12.0,376221.0,170960.0,62637.0
