In [97]:
def compute_confidence_interval(cases, deaths, confidence):
    survivors = cases - deaths
    sample = [0] * survivors + [1] * deaths
    mean = np.mean(sample)
    std = np.std(sample)
    z_value = norm.ppf((1 + confidence) / 2)
    return (mean - z_value * std / np.sqrt(cases), mean + z_value * std / np.sqrt(cases))

def compute_adjusted_cases(cases, deaths, mortality_baseline):
    if mortality_baseline > 0 and deaths > 0:
        return int(deaths / mortality_baseline)
    else:
        return int(cases)

In [98]:
import pandas as pd
import numpy as np

from scipy.stats import norm

df_it = pd.read_csv("Covid2019IT_20200403.csv")
df_nl = pd.read_csv("Covid2019NL_20200403.csv")
df_es = pd.read_csv("Covid2019ES_20200403.csv")
df_kr = pd.read_csv("Covid2019KR_20200403.csv")

In [99]:
df_it['Mortality_low'] = df_it.apply(lambda row: max(0, compute_confidence_interval(row['Cases'], row['Deaths'], 0.95)[0]), axis=1)
df_it['Mortality_high'] = df_it.apply(lambda row: max(0, compute_confidence_interval(row['Cases'], row['Deaths'], 0.95)[1]), axis=1)

df_nl['Mortality_low'] = df_nl.apply(lambda row: max(0, compute_confidence_interval(row['Cases'], row['Deaths'], 0.95)[0]), axis=1)
df_nl['Mortality_high'] = df_nl.apply(lambda row: max(0, compute_confidence_interval(row['Cases'], row['Deaths'], 0.95)[1]), axis=1)

df_es['Mortality_low'] = df_es.apply(lambda row: max(0, compute_confidence_interval(row['Cases'], row['Deaths'], 0.95)[0]), axis=1)
df_es['Mortality_high'] = df_es.apply(lambda row: max(0, compute_confidence_interval(row['Cases'], row['Deaths'], 0.95)[1]), axis=1)

df_kr['Mortality_low'] = df_kr.apply(lambda row: max(0, compute_confidence_interval(row['Cases'], row['Deaths'], 0.95)[0]), axis=1)
df_kr['Mortality_high'] = df_kr.apply(lambda row: max(0, compute_confidence_interval(row['Cases'], row['Deaths'], 0.95)[1]), axis=1)

In [100]:
df_it['Mortality_baseline'] = df_kr['Mortality']
df_nl['Mortality_baseline'] = df_kr['Mortality']
df_es['Mortality_baseline'] = df_kr['Mortality']

Assuming that in Korea testing was perfect and that the true mortality doesn't depend or country, let's count cases. The method is really shaky in the younger age groups (not many deaths)

In [101]:
df_it['Adjusted_cases'] = df_it.apply(lambda row: compute_adjusted_cases(row['Cases'], row['Deaths'], row['Mortality_baseline']), axis=1)
df_nl['Adjusted_cases'] = df_nl.apply(lambda row: compute_adjusted_cases(row['Cases'], row['Deaths'], row['Mortality_baseline']), axis=1)
df_es['Adjusted_cases'] = df_es.apply(lambda row: compute_adjusted_cases(row['Cases'], row['Deaths'], row['Mortality_baseline']), axis=1)

In [102]:
df_it.head(10)

Unnamed: 0,Age,Cases,Deaths,Mortality,Overall,Cases per 1000,Mortality_low,Mortality_high,Mortality_baseline,Adjusted_cases
0,Above 80,17759,4923,0.2772,4465708,3.9767,0.270628,0.283795,0.1831,26886
1,70–79,17464,3456,0.1979,5935048,2.9425,0.191984,0.203802,0.0703,49160
2,60–69,16395,1162,0.0709,7391126,2.2182,0.066947,0.074803,0.0172,67558
3,50–59,18678,369,0.0198,9453168,1.9758,0.01776,0.021752,0.0055,67090
4,40–49,12084,89,0.0074,9225165,1.3099,0.005841,0.00889,0.0008,111250
5,30–39,6523,20,0.0031,7100743,0.9186,0.001724,0.004408,0.001,20000
6,20–29,3830,2,0.0005,6135226,0.6243,0.0,0.001246,0.0,3830
7,10–19,766,0,0.0,5740332,0.1334,0.0,0.0,0.0,766
8,0–9,589,0,0.0,5103576,0.1154,0.0,0.0,0.0,589


In [103]:
df_nl.head(10)

Unnamed: 0,Age,Cases,Deaths,Mortality,Overall,Cases per 1000,Mortality_low,Mortality_high,Mortality_baseline,Adjusted_cases
0,Above 80,2284,457,0.2001,819669,2.7865,0.18368,0.216495,0.1831,2495
1,70–79,2210,238,0.1077,1526904,1.4474,0.094768,0.120616,0.0703,3385
2,60–69,1670,66,0.0395,2109482,0.7917,0.030177,0.048865,0.0172,3837
3,50–59,1818,10,0.0055,2520370,0.7213,0.002101,0.0089,0.0055,1818
4,40–49,1128,0,0.0,2201959,0.5123,0.0,0.0,0.0008,1128
5,30–39,916,0,0.0,2075858,0.4413,0.0,0.0,0.001,916
6,20–29,677,0,0.0,2106722,0.3214,0.0,0.0,0.0,677
7,10–19,90,0,0.0,1973468,0.0456,0.0,0.0,0.0,90
8,0–9,44,0,0.0,1762690,0.025,0.0,0.0,0.0,44


In [104]:
df_es.head(10)

Unnamed: 0,Age,Cases,Deaths,Mortality,Overall,Cases per 1000,Mortality_low,Mortality_high,Mortality_baseline,Adjusted_cases
0,Above 80,8893,1965,0.221,2901252,3.0652,0.212337,0.229583,0.1831,10731
1,70–79,9295,914,0.0983,3921750,2.3701,0.092279,0.104386,0.0703,13001
2,60–69,9396,295,0.0314,5200462,1.8068,0.02787,0.034922,0.0172,17151
3,50–59,10108,99,0.0098,6944643,1.4555,0.007874,0.011714,0.0055,18000
4,40–49,8493,43,0.0051,7935505,1.0703,0.003554,0.006572,0.0008,53750
5,30–39,5656,13,0.0023,6158281,0.9184,0.00105,0.003546,0.001,13000
6,20–29,3016,6,0.002,4652133,0.6483,0.000399,0.00358,0.0,3016
7,10–19,393,1,0.0025,4682339,0.0839,0.0,0.007525,0.0,393
8,0–9,234,1,0.0043,4340417,0.0539,0.0,0.012632,0.0,234


In [105]:
df_kr.head(10)

Unnamed: 0,Age,Cases,Deaths,Mortality,Overall,Cases per 1000,Mortality_low,Mortality_high
0,Above 80,437,80,0.1831,1749770,0.2497,0.146808,0.219324
1,70–79,640,45,0.0703,3444643,0.1858,0.050504,0.090121
2,60–69,1218,21,0.0172,6135717,0.1985,0.009931,0.024552
3,50–59,1812,10,0.0055,8442921,0.2146,0.002108,0.00893
4,40–49,1297,1,0.0008,8330006,0.1557,0.0,0.002282
5,30–39,1002,1,0.001,7196849,0.1392,0.0,0.002953
6,20–29,2630,0,0.0,6797905,0.3869,0.0,0.0
7,10–19,513,0,0.0,4886624,0.105,0.0,0.0
8,0–9,112,0,0.0,4240885,0.0264,0.0,0.0


Assuming that in Korea testing was perfect in the 80+ group, the death rate is the same in all countries and that the infection rate is the same in every age group (it seems that for old people it is much more severe and will always be symptomatic).

In [107]:
true_ratio_it = df_it['Adjusted_cases'][0] / df_it['Overall'][0]
true_ratio_nl = df_nl['Adjusted_cases'][0] / df_nl['Overall'][0]
true_ratio_es = df_es['Adjusted_cases'][0] / df_es['Overall'][0]

In [108]:
sum(df_it['Cases']), int(sum(df_it['Overall']) * true_ratio_it), sum(df_it['Adjusted_cases'])

(94088, 364544, 347129)

In [109]:
sum(df_nl['Cases']), int(sum(df_nl['Overall']) * true_ratio_nl), sum(df_nl['Adjusted_cases'])

(10837, 52042, 14390)

In [110]:
sum(df_es['Cases']), int(sum(df_es['Overall']) * true_ratio_es), sum(df_es['Adjusted_cases'])

(55484, 172867, 129276)