```
PLEASE NOTE THAT THE BELOW HAS NOT BE THOROUGHLY CLEANED, IT REFLECTS THE CALCULATIONS MADE TO INFORM THE MAIN MODEL FUNCTION
```

In [None]:
import pandas as pd
import numpy as np
from collections import OrderedDict

## Helper function

In [None]:
# Regroup various age-group representations into our internal one, and vice versa
def regroup_by_age(
    inp, # first dimension is ages, others dont matter.
    fromAgeSplits, toAgeSplits, maxAge=100., maxAgeWeight = 5.):
    fromAgeSplits = np.concatenate([np.array([0]), fromAgeSplits, np.array([maxAge])]) # Add a zero at beginning for calculations
    toAgeSplits = np.concatenate([np.array([0]), toAgeSplits, np.array([maxAge])]) # Add inf at end for calculations
    def getOverlap(a, b):
        return max(0, min(a[1], b[1]) - max(a[0], b[0]))
    out = np.zeros((len(toAgeSplits)-1,)+inp.shape[1:])
    for from_ind in range(1, len(fromAgeSplits)):
        # Redistribute to the new bins by calculating how many years in from_ind-1:from_ind falls into each output bin
        cur_out_distribution = (
        [getOverlap(toAgeSplits[cur_to_ind-1:cur_to_ind+1],fromAgeSplits[from_ind-1:from_ind+1])  for cur_to_ind in range(1, len(toAgeSplits))]
        )
        
        if cur_out_distribution[-1] > 0:
            cur_out_distribution[-1] = maxAgeWeight # Define the relative number of ages if we have to distribute between second to last and last age groups

        cur_out_distribution = cur_out_distribution/np.sum(cur_out_distribution)
        
        for to_ind in range(len(out)):
            out[to_ind] += cur_out_distribution[to_ind] * inp[from_ind-1]
            
    return out

# CHESS -  COVID-19 Hospitalisation in England Surveillance System

Aggregate data available on request

In [None]:
df_UK_CHESSagg=pd.read_csv("~/covidwarwick/data/CHESSAggregateReport.csv")
aggDict={key: 'sum'  for key in df_UK_CHESSagg.columns[5:66]}
# Aggregate to national level
df_UK_CHESS=df_UK_CHESSagg.groupby("DateOfAdmission").agg(aggDict)

In [None]:
df_UK_CHESS

In [None]:
# Bit of data cleaning (the dates are wrong!, first line is sum of all column)
# Note that proper reporting started 
df_UK_chess_cumulative_05Apr = df_UK_CHESS.iloc[0]

In [None]:
df_UK_chess_cumulative_05Apr

In [None]:
nDataTypes = 6
nAgeGroups = 10 # Note that age ranges are <1, 1-4, 5-14, 15-24 ... 75-84 and 85+, let's make matrices

In [None]:
allVectors = OrderedDict()
for d1 in range(nDataTypes):
    if d1 == 0:
        allVectors[df_UK_chess_cumulative_05Apr.index[1+d1*nAgeGroups][:-len("_1_4")]] = (
            df_UK_chess_cumulative_05Apr.values[range(1+d1*nAgeGroups, 1+(d1+1)*nAgeGroups)])
    else:
        allVectors[df_UK_chess_cumulative_05Apr.index[d1*nAgeGroups][:-len("_LessThan_1")]] = (
            df_UK_chess_cumulative_05Apr.values[range(d1*nAgeGroups, (d1+1)*nAgeGroups)])
    

In [None]:
allVectors.keys()

In [None]:
totalSympAdmitted_byAge = allVectors["NewHospitalAdmissionsWithAcuteRespiratoryInfectionAdmittedDuringThePast24Hours"]
totalSympAdmitted_byAge

In [None]:
totalTested_byAge = allVectors["AllAdmittedPatientsTestedForCOVID19"]
totalTested_byAge 

In [None]:
totalPositive_byAge = allVectors['AllAdmittedPatientsWithNewLabConfirmedCOVID19']
totalPositive_byAge 

In [None]:
totalPositive_byAge/totalTested_byAge

In [None]:
icuriskSymp_byAge = allVectors['NewICU_HDUAdmissionsWithAcuteRespiratoryInfection']

In [None]:
icuriskCovid_byAge = allVectors['NewLabConfirmedCOVID19PatientsOnICU_HDU']

In [None]:
icuriskCovid_byAge

In [None]:
icuriskSymp_byAge

In [None]:
len(icuriskSymp_byAge)
            
    


In [None]:
np.concatenate([np.array([1,5,15,25]),np.arange(45,85+1,10)])

In [None]:
totalSympAdmitted_byAge_regroup = regroup_by_age(
    totalSympAdmitted_byAge,
    fromAgeSplits=np.concatenate([np.array([1,5,15,25]),np.arange(45,85+1,10)]),
    toAgeSplits=np.arange(10,80+1,10)
)


totalCOVIDAdmitted_byAge_regroup = regroup_by_age(
    totalPositive_byAge,
    fromAgeSplits=np.concatenate([np.array([1,5,15,25]),np.arange(45,85+1,10)]),
    toAgeSplits=np.arange(10,80+1,10)
)

In [None]:
# Load age data from Imperial college data:
agePopulationTotal = 1000.*np.array([8044.056,7642.473,8558.707,9295.024,8604.251,9173.465,7286.777,5830.635,3450.616])
#agePopulationTotal = 1000.*pd.read_csv("https://raw.githubusercontent.com/ImperialCollegeLondon/covid19model/master/data/ages.csv").iloc[3].values[2:]
agePopulationRatio = agePopulationTotal/np.sum(agePopulationTotal)


In [None]:


relativeAdmissionRisk_given_symptoms_by_age = totalSympAdmitted_byAge_regroup/agePopulationTotal
relativeAdmissionRisk_given_symptoms_by_age /= np.mean(relativeAdmissionRisk_given_symptoms_by_age)
relativeAdmissionRisk_given_symptoms_by_age -= 1

relativeAdmissionRisk_given_symptoms_by_age

In [None]:
totalSympAdmitted_byAge_regroup

In [None]:
totalCOVIDAdmitted_byAge_regroup/agePopulationTotal

In [None]:
1.61840668e-03/3.99559366e-04

In [None]:
relativeAdmissionRisk_given_COVID_by_age = totalCOVIDAdmitted_byAge_regroup/agePopulationTotal
relativeAdmissionRisk_given_COVID_by_age /= np.mean(relativeAdmissionRisk_given_COVID_by_age)
relativeAdmissionRisk_given_COVID_by_age -= 1


relativeAdmissionRisk_given_COVID_by_age

In [None]:
# From earlier version, very similar, seems stable, we can keep using it!
relativeAdmissionRisk_given_COVID_by_age = np.array([-0.94886625, -0.96332087, -0.86528671, -0.79828999, -0.61535305,
       -0.35214767,  0.12567034,  0.85809052,  3.55950368])

In [None]:
# Above two are quite similar, which makes one confident that testing in hospitals itself is not too biased!
# For now we can trust these computed parameters

# NHS England COVID deaths by age group

In [None]:
# NHS daily deaths report (about 24 hours behind, with ~5-7 days of `unreliable` data)
# TODO manually update link and column numbers (maybe not consistent across days, cannot yet automate)
df_UK_NHS_daily_COVID_deaths = pd.read_excel(
    "https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/2020/04/COVID-19-total-announced-deaths-6-April-2020.xlsx",
    sheet_name = "COVID19 total deaths by age",
    index_col=0,
    usecols = "B,E:AN",
    skip_rows = range(17),
    nrows = 22
).iloc[14:].transpose().set_index("Age group").rename_axis(index = "Date", columns = "AgeGroup")

df_UK_NHS_daily_COVID_deaths.index = pd.to_datetime(df_UK_NHS_daily_COVID_deaths.index, format="%Y-%m-%d")

df_UK_NHS_daily_COVID_deaths

In [None]:
df_UK_NHS_daily_COVID_deaths.drop(df_UK_NHS_daily_COVID_deaths.columns[:2], axis=1).to_hdf("data/all_data.h5", key = "clean_NHS_England_daily_COVID_deaths_by_age")

In [None]:
totalDeaths_byAge = df_UK_NHS_daily_COVID_deaths.sum(0).values[2:]
totalDeaths_byAge

In [None]:
np.arange(20,80+1,20)

In [None]:
np.arange(10,80+1,10)

In [None]:
totalDeaths_byAge_regroup = regroup_by_age(
    totalDeaths_byAge,
    fromAgeSplits=np.arange(20,80+1,20),
    toAgeSplits=np.arange(10,80+1,10)
)

totalDeaths_byAge_regroup

In [None]:
# This is not a very good assumption, we rather want to make it linear with age, 
# which with 50-50% splits of the bins would mean that 75% of the higher bin
# Probably more realistic
totalDeaths_byAge_regroupLinear = np.array([5*0.25, 5*0.75, 38*0.25, 38*0.75, 353*0.25, 353*0.75, 1946*0.25, 1946*0.75, 2555.3])
totalDeaths_byAge_regroupLinear

In [None]:
1.47973579e-03/6.21576976e-07

In [None]:
relativeDeathRisk_given_COVID_by_age = totalDeaths_byAge_regroupLinear/agePopulationTotal
relativeDeathRisk_given_COVID_by_age /= np.mean(relativeDeathRisk_given_COVID_by_age)
relativeDeathRisk_given_COVID_by_age -= 1


relativeDeathRisk_given_COVID_by_age

In [None]:
relativeDeathRisk_given_COVID_by_age = np.array([-0.99873039, -0.99599102, -0.99093115, -0.97494866, -0.91620111,
       -0.7642025 , -0.4545135 ,  1.04514869,  5.05036963])

In [None]:
totalDeaths_byAge_regroup.astype(int)

In [None]:

caseFatalityRatioHospital_given_COVID_by_age = totalDeaths_byAge_regroupLinear/totalCOVIDAdmitted_byAge_regroup
caseFatalityRatioHospital_given_COVID_by_age

In [None]:
caseFatalityRatioHospital_given_COVID_by_age = np.array([0.00856164, 0.03768844, 0.02321319, 0.04282494, 0.07512237,
       0.12550367, 0.167096  , 0.37953452, 0.45757006])

In [None]:
totalCOVIDAdmitted_byAge_regroup

In [None]:
# Overall hospitalised case fatality ratio
np.sum(totalDeaths_byAge_regroup)/np.sum(totalCOVIDAdmitted_byAge_regroup)

# A&E attandances by age group

NHS data 2018-19, Table 6 of https://digital.nhs.uk/data-and-information/publications/statistical/hospital-accident--emergency-activity/2018-19, https://files.digital.nhs.uk/06/C0AC02/AE1819_National_Data_Tables_v4.xlsx

In [None]:
ae_attendances_by_age = np.array([
    619842+1607748+1091497, 
    1147661+222246+203356+228898+287171+322682,
    1641251 + 1648698,
    1534060 + 1298664,
    1108324 + 1185519,
    1210073 + 1101959,
    940500 +  879026,
    970384 +  869129,
    846931 +  674704 + 471264 + 256260
])

In [None]:
riskOfAEAttandance = ae_attendances_by_age/agePopulationTotal
riskOfAEAttandance

In [None]:
riskOfAEAttandance = np.array([0.41261361, 0.31560648, 0.3843979 , 0.30475704, 0.26659415,
       0.25203475, 0.24970244, 0.31549102, 0.65181376])

In [None]:
riskOfAEAttandance