# War fatalities in Russia in 2022 estimated via excess male mortality
## Different model specifications

In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression

In [2]:
df = pd.read_csv('../deaths-by-age-gender-region-year/deaths-by-age-gender-region-year-1990-2022.csv.gz')

df

Unnamed: 0,Region,Year,Age,Gender,Deaths
0,Алтайский край,1990,0-4 лет,f,264
1,Алтайский край,1990,0-4 лет,m,429
2,Алтайский край,1990,10-14 лет,f,28
3,Алтайский край,1990,10-14 лет,m,76
4,Алтайский край,1990,15-19 лет,f,63
...,...,...,...,...,...
112995,Ярославская область,2022,85 и более,m,807
112996,Ярославская область,2022,Всего,f,10264
112997,Ярославская область,2022,Всего,m,9202
112998,Ярославская область,2022,Неизвестно,f,1


In [3]:
# Load HIV deaths

df_hiv = pd.read_excel('../deaths-hiv/deaths-hiv.xlsx', skiprows=1, index_col=0)

hiv_m = df_hiv.values[1:, 1::3].astype(int)
hiv_f = df_hiv.values[1:, 2::3].astype(int)

# Getting values for 0 year old by subtracting the sum over 1+ from the total
hiv_m[0] -= hiv_m[1:].sum(axis=0)
hiv_f[0] -= hiv_f[1:].sum(axis=0)

# Summing over 5-year bands
hiv_m = hiv_m[:-2].reshape(-1, 5, hiv_m.shape[-1]).sum(axis=1)
hiv_f = hiv_f[:-2].reshape(-1, 5, hiv_f.shape[-1]).sum(axis=1)

hiv_years = np.arange(2006, 2023)

In [4]:
region = 'Российская Федерация'

agegroups = ['15-19 лет', '20-24 лет', '25-29 лет',
       '30-34 лет', '35-39 лет', '40-44 лет', '45-49 лет', 
       '50-54 лет', '55-59 лет']

LinReg = LinearRegression()

excess = np.zeros((len(agegroups), 2, 2, 3))
uncert = np.zeros_like(excess)

for h, hiv in enumerate([False, True]):
    for i, age in enumerate(agegroups):
        # Get the data
        male = df[
            (df.Region == region) & 
            (df.Age == age) & 
            (df.Gender == 'm') & 
            (df.Year >= 2006)
        ][['Year', 'Deaths']].values

        female = df[
            (df.Region == region) & 
            (df.Age == age) & 
            (df.Gender == 'f') & 
            (df.Year >= 2006)
        ][['Year', 'Deaths']].values

        if hiv:
            male[:,1] -= hiv_m[3 + i, hiv_years >= 2006]
            female[:,1] -= hiv_f[3 + i, hiv_years >= 2006]

        ratio = male[:,1] / female[:,1]
        x = np.arange(2006, 2023)

        for j, fit_length in enumerate([1, 5, 10]):
            beg = fit_length + 3

            # Perform linear fit and extrapolate        
            LinReg.fit(x[-beg:-3].reshape(-1,1), ratio[-beg:-3].reshape(-1,1))
            yhat_sklearn = LinReg.predict(x[-beg:].reshape(-1,1))
            excess[i, h, 0, j] = male[-1,1] - yhat_sklearn[-1][0] * female[-1,1]
            
            # Compute the uncertainty
            if fit_length > 1:
                X = np.concatenate((x[-beg:].reshape(-1,1), np.ones((beg,1))), axis=1)
                y = ratio[-beg:].reshape(-1,1)
                beta = np.linalg.pinv(X[-beg:-3].T @ X[-beg:-3]) @ X[-beg:-3].T @ y[-beg:-3]
                yhat = X[-beg:] @ beta
                assert(np.allclose(yhat, yhat_sklearn))

                sigma2 = np.sum((y[-beg:-3] - yhat[-beg:-3])**2) / (y.size - 2) # sigma^2 = MSE/(n-p)
                S = np.linalg.pinv(X[-beg:-3].T @ X[-beg:-3])
                predictive_var = sigma2 * X[-1:] @ S @ X[-1:].T + sigma2
                predictive_std = np.sqrt(predictive_var[0][0]) 
                uncert[i, h, 0, j] = predictive_std * female[-1,1]

            # Perform exponential fit and extrapolate
            LinReg.fit(x[-beg:-3].reshape(-1,1), np.log(ratio[-beg:-3] - 1).reshape(-1,1))
            yhat_sklearn = LinReg.predict(x[-beg:].reshape(-1,1))
            yhat_sklearn = np.exp(yhat_sklearn) + 1
            excess[i, h, 1, j] = male[-1,1] - yhat_sklearn[-1][0] * female[-1,1]
            
            # Compute the uncertainty
            if fit_length > 1:
                X = np.concatenate((x[-beg:].reshape(-1,1), np.ones((beg,1))), axis=1)
                y = np.log(ratio[-beg:] - 1).reshape(-1,1)
                beta = np.linalg.pinv(X[-beg:-3].T @ X[-beg:-3]) @ X[-beg:-3].T @ y[-beg:-3]
                yhat = X[-beg:] @ beta
                assert(np.allclose(yhat, np.log(yhat_sklearn - 1)))

                sigma2 = np.sum((y[-beg:-3] - yhat[-beg:-3])**2) / (y.size - 2) # sigma^2 = MSE/(n-p)
                S = np.linalg.pinv(X[-beg:-3].T @ X[-beg:-3])
                predictive_var = sigma2 * X[-1:] @ S @ X[-1:].T + sigma2
                predictive_std = np.sqrt(predictive_var[0][0]) 
                predictive_std = np.max((
                    np.exp(yhat + predictive_std) - np.exp(yhat), 
                    np.exp(yhat) - np.exp(yhat - predictive_std)
                ))
                uncert[i, h, 1, j] = predictive_std * female[-1,1]

print('                               RAW                 |              HIV subtracted') 
print('                 1lin  5lin 10lin  1exp  5exp 10exp  1lin  5lin 10lin  1exp  5exp 10exp') 
print('---------------------------------------------------------------------------------------')

for i in range(len(excess)):
    print(f'{agegroups[i]}:      ', end='')
    for h in range(2):
        for e in range(2):
            for j in range(3):
                print(f'{excess[i,h,e,j]:5.0f} ', end='')
    print(f'\n              +-', end='')
    for h in range(2):
        for e in range(2):
            for j in range(3):
                print(f' {uncert[i,h,e,j]:4.0f} ', end='')
    print('\n')
print('---------------------------------------------------------------------------------------')
print('Total (15--49): ', end='')
for h in range(2):
    for e in range(2):
        for j in range(3):
            print(f'{np.sum(excess[:-2,h,e,j]):5.0f} ', end='')
print(f'\n              +-', end='')
for h in range(2):
    for e in range(2):
        for j in range(3):
            print(f' {np.sqrt(np.sum(uncert[:-2,h,e,j]**2)):4.0f} ', end='')

                               RAW                 |              HIV subtracted
                 1lin  5lin 10lin  1exp  5exp 10exp  1lin  5lin 10lin  1exp  5exp 10exp
---------------------------------------------------------------------------------------
15-19 лет:        329   353   292   329   352   293   331   337   273   331   337   275 
              +-    0   117   129     0   123   132     0   123   132     0   129   135 

20-24 лет:       2610  3069  2681  2610  3022  2681  2438  2868  2406  2438  2828  2416 
              +-    0   266   355     0   289   394     0   329   366     0   354   396 

25-29 лет:       3259  3961  3675  3259  3877  3619  2913  3438  2984  2913  3389  2979 
              +-    0   224   250     0   235   288     0   137   258     0   139   276 

30-34 лет:       3738  4264  5645  3738  4256  5375  3501  3395  4245  3501  3394  4200 
              +-    0   362   555     0   372   642     0   174   419     0   178   444 

35-39 лет:       3437  6047