In [None]:
# This workbook accompanies the CURIAL Validation manuscript

# This workbook produces summary tables for each study cohort - including demographics,
# alongside supplementary tables with summary stats for blood tests, vital signs, blood gasses

# Reports level of data completeness

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import (YEARLY, DateFormatter,rrulewrapper, RRuleLocator, drange)
pd.options.display.float_format = '{:,.2f}'.format

In [None]:
#A function to harmonise differently reported ethnic groups/ethnicity
def ethnicityHarmoniser(df):
    output = df.replace({
    "A": "White", 
    "Z":"Unknown", "Z9":"Unknown", 
    "C":"White", "C3":"White", "CP":"White", "CA":"White", "CB":"White", "CY":"White", "CC":"White", "CW":"White", "CK":"White","CS":"White", "CN":"White", "CR":"White", "CQ":"White", "CF":"White", "C2":"White", "CH":"White", "CU":"White", "CHI":"White",
    "J":"South Asian",
    "L":"Other", "LK":"Other", "LJ":"Other", "LA":"Other", "LH":"Other", "LE":"Other", "LD":"Other", "LG":"Other",
    "H":"South Asian",
    "B":"White",
    "S":"Other", "SE":"Other", "SC":"Other", "SD":"Other", "SA":"Other",
    "N":"Black", "NK":"Black", 
    "M":"Black",
    "G":"Mixed", "GF":"Mixed", "GB":"Mixed",
    "P":"Black", "PE":"Black", "PD":"Black", "PC":"Black", "PA":"Black",
    "K":"South Asian",
    "D":"Mixed", "CX":"Mixed", "GE":"Mixed", "GD":"Mixed", "GA":"Mixed",
    "R":"Chinese",
    "F":"Mixed", 
    "E":"Mixed",
    "Z":"Unknown","ZR":"Unknown",
    "White - British":"White",
    "White- British":"White",
    "Other- Not stated":"Unknown",
    "White- Any other white background":"White",
    "Other - Not Stated":"Unknown",
    "White- Irish":"White",
    "Asian or Asian British - Pakistani":"South Asian",
    "Other- Any other ethnic group":"Other",
    "Black- Any other black background":"Black",
    "White - Any Other White Background":"White",
    "Asian or Asian British - Indian":"South Asian",
    "Asian - Any Other Asian Background":"Other",
    "Other - Not Known":"Unknown",
    "Black or Black British - Caribbean":"Black",
    "Other- Not known":"Unknown",
    "Asian or Asian British -Indian":"South Asian",
    "Other - Any Other Ethnic Group":"Other",
    "Asian or Asian British - Bangladeshi":"South Asian",
    "Mixed - White and Black African":"Mixed",
    "Mixed - White and Black Caribbean":"Mixed",
    "Asian or Asian British -Any other Asian background":"Other",
    "Other-Chinese":"Chinese",
    "1":"Unknown", "WHT":"Unknown","62":"Unknown","19":"Unknown","87":"Unknown",
    })
    return output

In [None]:
#Load training population (pre-pandemic controls)
OUHprepandemicControls = pd.read_csv('OUHPrePandemicControls.csv', parse_dates=True)

#Apply inclusion criteria
OUHprepandemicControls = OUHprepandemicControls[(OUHprepandemicControls.ArrivalDateTime < '2019-12-01') & (OUHprepandemicControls.Age >= 18)]

#Process ethnicity values to common format between all datasets
OUHprepandemicControls['Ethnicity'] = ethnicityHarmoniser(OUHprepandemicControls['Ethnicity'])

In [None]:
#Define training cases cohort
OUHw1cases = pd.read_csv('OUHw1ConfirmedCases.csv', parse_dates=True)

#Confirm date range
OUHw1cases = OUHw1cases[(OUHw1cases.ArrivalDateTime < '2020-07-01') & (OUHw1cases['Covid-19 Positive'] == 1.0)]

#Process ethnicity values to common format between all datasets
OUHw1cases['Ethnicity'] = ethnicityHarmoniser(OUHw1cases['Ethnicity'])

In [None]:
#Define OUH w2 validation population
OUHw2 = pd.read_csv('OUHWave2Attendances.csv', parse_dates=True)

#Define second wave (from 10th Oct to end of dataset 6 March)
OUHw2 = OUHw2[(OUHw2.ArrivalDateTime >= '2020-10-01')]
               
#Process ethnicity values to common format between all datasets
OUHw2['Ethnicity']=ethnicityHarmoniser(OUHw2['Ethnicity'])

In [None]:
#Define UHB validation Population
UHBPopulation = pd.read_csv('UHBValidation.csv',parse_dates=True)

#Define population as admitted population (both admitted + ED provided), and from pandemic period
UHBPopulation=UHBPopulation[UHBPopulation.ArrivalDateTime > '2019-12-01' ]
UHBPopulation = UHBPopulation[(UHBPopulation.Admission == 1.0) | (UHBPopulation.ICU == 1.0)]

In [None]:
#Define and import Portsmouth Population
PortsmouthPopulation=pd.read_csv('PUHValidation.csv',parse_dates=True)

#Set inclusion criteria - population is admitted population and excluding invalid results
PortsmouthPopulation = PortsmouthPopulation[~(PortsmouthPopulation['Covid-19 Positive'].isna())]
PortsmouthPopulation = PortsmouthPopulation[(df.Admission == 1.0) | (PortsmouthPopulation.ICU == 1.0)]

PortsmouthPopulation['Ethnicity']=ethnicityHarmoniser(PortsmouthPopulation['Ethnicity'])

In [None]:
#Define and import Bedfordshire Hospitals NHS Trust Popilation
BedfordshirePopulation = pd.read_csv('BHValidation.csv',parse_dates=True)

#Set inclusion criteria - population is admitted population and excluding invalid results
BedfordshirePopulation = BedfordshirePopulation[(BedfordshirePopulation.Admission == 1.0) | (BedfordshirePopulation.ICU == 1.0)]
BedfordshirePopulation = BedfordshirePopulation[~(BedfordshirePopulation['Covid-19 Positive'].isna())]

#Process ethnicity values to common format between all datasets
BedfordshirePopulation['Ethnicity']=ethnicityHarmoniser(BedfordshirePopulation['Ethnicity'])

In [None]:
#Define OUH w2 LFD-validation population
#File contains only tested participants
OUHw2LFD = pd.read_csv('OUHWave2WithLFDs.csv', parse_dates=True)

#Enforce inclusion criteria, restricting to patients who had an LFD test, excluding invalid results, and patients who were admitted
OUHw2LFD = OUHw2LFD[(OUHw2LFD.Lateral_flow_result=='Positive') | (OUHw2LFD.Lateral_flow_result=='Negative')]
OUHw2LFD = OUHw2LFD[OUHw2LFD.Admission == 1]

#Process ethnicity values to common format between all datasets
OUHw2LFD['Ethnicity']=ethnicityHarmoniser(OUHw2LFD['Ethnicity'])

In [None]:
#Curial_Rapide Prospective Validation Study
CRValidation = pd.read_csv("CURIAL-OLOServiceEvaluation.csv", 
                         parse_dates=['ArrivalDateTime', 'OLOMachineDateTime', 'VitalsDateTime', 'PCRDateTime', 'LFDDateTime', 'LabDateTime', 'DTADateTime', 'DischargeDateTime', 'ArrivalDate', 'ArrivalTime'],
                         infer_datetime_format=True)

#Process ethnicity values to common format between all datasets
CRValidation['Ethnicity']=ethnicityHarmoniser(CRValidation['Ethnicity'])

In [None]:
#Create Summary Table
summarytable = pd.DataFrame(index=["n Total",
                                     "n Covid-19", 
                                     "Sex M (%)", 
                                     "Sex F (%)",
                                    "Age (IQR)",
                                     "LFD (% positive)", 
                                     "Ethnicity: White", 
                                     "Ethnicity: Not stated", 
                                     "Ethnicity: South Asian",
                                     "Ethnicity: Chinese",
                                     "Ethnicity: Black", 
                                     "Ethnicity: Other", 
                                     "Ethnicity: Mixed", 
                                     ])

In [None]:
#Add row to summarytable for pre-pandemic training
df = OUHprepandemicControls
dfSummaryStats = df.describe()['Age'].round(2).map(str)
genderSummaryStats = df['Gender'].value_counts().map(str)
ethnicityNumbers = df['Ethnicity'].value_counts().map(str)
summarytable['Training: OUH pre-pandemic cohort'] = [len(df), 
                                   df['Covid-19 Positive'].sum(), 
                                   genderSummaryStats['M']+ " (" +str(((df['Gender'].value_counts()['M']/len(df))*100).round(2))+ ")",
                                   genderSummaryStats['F']+ " (" +str(((df['Gender'].value_counts()['F']/len(df))*100).round(2))+ ")",
                                   dfSummaryStats['50%'] + " (" + dfSummaryStats['25%'] + "-"+ dfSummaryStats['75%'] + ")",
                                   np.NaN,
                                   ethnicityNumbers['White']+ " (" +str(((df['Ethnicity'].value_counts()['White']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Unknown']+ " (" +str(((df['Ethnicity'].value_counts()['Unknown']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['South Asian']+ " (" +str(((df['Ethnicity'].value_counts()['South Asian']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Chinese']+ " (" +str(((df['Ethnicity'].value_counts()['Chinese']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Black']+ " (" +str(((df['Ethnicity'].value_counts()['Black']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Other']+ " (" +str(((df['Ethnicity'].value_counts()['Other']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Mixed']+ " (" +str(((df['Ethnicity'].value_counts()['Mixed']/len(df))*100).round(2))+ ")",
                                  ]

In [None]:
#Add row to summarytable for wave 1 cases training
df = OUHw1cases
dfSummaryStats = df.describe()['Age'].round(2).map(str)
genderSummaryStats = df['Gender'].value_counts().map(str)
ethnicityNumbers = df['Ethnicity'].value_counts().map(str)
summarytable['Training: OUH cases cohort'] = [len(df), 
                                   df['Covid-19 Positive'].sum(), 
                                   genderSummaryStats['M']+ " (" +str(((df['Gender'].value_counts()['M']/len(df))*100).round(2))+ ")",
                                   genderSummaryStats['F']+ " (" +str(((df['Gender'].value_counts()['F']/len(df))*100).round(2))+ ")",
                                   dfSummaryStats['50%'] + " (" + dfSummaryStats['25%'] + "-"+ dfSummaryStats['75%'] + ")",
                                   np.NaN,
                                   ethnicityNumbers['White']+ " (" +str(((df['Ethnicity'].value_counts()['White']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Unknown']+ " (" +str(((df['Ethnicity'].value_counts()['Unknown']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['South Asian']+ " (" +str(((df['Ethnicity'].value_counts()['South Asian']/len(df))*100).round(2))+ ")",
                                   "0 (0)",
                                   ethnicityNumbers['Black']+ " (" +str(((df['Ethnicity'].value_counts()['Black']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Other']+ " (" +str(((df['Ethnicity'].value_counts()['Other']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Mixed']+ " (" +str(((df['Ethnicity'].value_counts()['Mixed']/len(df))*100).round(2))+ ")",
                                  ]

In [None]:
#Add row to summarytable for FULL OUH W2 VALIDATION
df = OUHw2
dfSummaryStats = df.describe()['Age'].round(2).map(str)
genderSummaryStats = df['Gender'].value_counts().map(str)
ethnicityNumbers = df['Ethnicity'].value_counts().map(str)
summarytable['Validation: OUH w2 Validation'] = [len(df),
                                   df['Covid-19 Positive'].sum(), 
                                   genderSummaryStats['M']+ " (" +str(((df['Gender'].value_counts()['M']/len(df))*100).round(2))+ ")",
                                   genderSummaryStats['F']+ " (" +str(((df['Gender'].value_counts()['F']/len(df))*100).round(2))+ ")",
                                   dfSummaryStats['50%'] + " (" + dfSummaryStats['25%'] + "-"+ dfSummaryStats['75%'] + ")",
                                   np.NaN,
                                   ethnicityNumbers['White']+ " (" +str(((df['Ethnicity'].value_counts()['White']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Unknown']+ " (" +str(((df['Ethnicity'].value_counts()['Unknown']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['South Asian']+ " (" +str(((df['Ethnicity'].value_counts()['South Asian']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Chinese']+ " (" +str(((df['Ethnicity'].value_counts()['Chinese']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Black']+ " (" +str(((df['Ethnicity'].value_counts()['Black']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Other']+ " (" +str(((df['Ethnicity'].value_counts()['Other']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Mixed']+ " (" +str(((df['Ethnicity'].value_counts()['Mixed']/len(df))*100).round(2))+ ")",
                                  ]

In [None]:
#Add row to summarytable for Validvalue_countsPortsmouth
df = PortsmouthPopulation

dfSummaryStats = df.describe()['Age'].round(2).map(str)
genderSummaryStats = df['Gender'].value_counts().map(str)
ethnicityNumbers = df['Ethnicity'].value_counts().map(str)

#NB: Ajdusted for n=0 Chinese population in this cohort
summarytable['Validation: Portsmouth University Hospitals'] = [len(df), 
                                   df['Covid-19 Positive'].sum(), 
                                   genderSummaryStats[0]+ " (" +str(((df['Gender'].value_counts()[0]/len(df))*100).round(2))+ ")",
                                   genderSummaryStats[1]+ " (" +str(((df['Gender'].value_counts()[1]/len(df))*100).round(2))+ ")",
                                   dfSummaryStats['50%'] + " (" + dfSummaryStats['25%'] + "-"+ dfSummaryStats['75%'] + ")",
                                   np.NaN,
                                   ethnicityNumbers['White']+ " (" +str(((df['Ethnicity'].value_counts()['White']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Unknown']+ " (" +str(((df['Ethnicity'].value_counts()['Unknown']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['South Asian']+ " (" +str(((df['Ethnicity'].value_counts()['South Asian']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Chinese']+ " (" +str(((df['Ethnicity'].value_counts()['Chinese']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Black']+ " (" +str(((df['Ethnicity'].value_counts()['Black']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Other']+ " (" +str(((df['Ethnicity'].value_counts()['Other']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Mixed']+ " (" +str(((df['Ethnicity'].value_counts()['Mixed']/len(df))*100).round(2))+ ")",
                                  ]

In [None]:
#Add row to summarytable for Validation: UHB
df = UHBPopulation

dfSummaryStats = df.describe()['Age'].round(2).map(str)
genderSummaryStats = df['Gender'].value_counts().map(str)
ethnicityNumbers = df['Ethnicity'].value_counts().map(str)
summarytable['Validation: UHB'] = [len(df), 
                                   df['Covid-19 Positive'].sum(), 
                                   genderSummaryStats['M']+ " (" +str(((df['Gender'].value_counts()['M']/len(df))*100).round(2))+ ")",
                                   genderSummaryStats['F']+ " (" +str(((df['Gender'].value_counts()['F']/len(df))*100).round(2))+ ")",
                                   dfSummaryStats['50%'] + " (" + dfSummaryStats['25%'] + "-"+ dfSummaryStats['75%'] + ")",
                                   np.NaN,
                                   ethnicityNumbers['White']+ " (" +str(((df['Ethnicity'].value_counts()['White']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Unknown']+ " (" +str(((df['Ethnicity'].value_counts()['Unknown']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['South Asian']+ " (" +str(((df['Ethnicity'].value_counts()['South Asian']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Chinese']+ " (" +str(((df['Ethnicity'].value_counts()['Chinese']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Black']+ " (" +str(((df['Ethnicity'].value_counts()['Black']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Other']+ " (" +str(((df['Ethnicity'].value_counts()['Other']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Mixed']+ " (" +str(((df['Ethnicity'].value_counts()['Mixed']/len(df))*100).round(2))+ ")",
                                  ]

In [None]:
#Add row to summarytable for validation: Bedford
df = BedfordshirePopulation

dfSummaryStats = df.describe()['Age'].round(2).map(str)
genderSummaryStats = df['Gender'].value_counts().map(str)
ethnicityNumbers = df['Ethnicity'].value_counts().map(str)

#NB: Ajdusted for n=0 Chinese population in this cohort
summarytable['Validation: Bedfordshire Hospitals'] = [len(df), 
                                   df['Covid-19 Positive'].sum(), 
                                   genderSummaryStats[0]+ " (" +str(((df['Gender'].value_counts()[0]/len(df))*100).round(2))+ ")",
                                   genderSummaryStats[1]+ " (" +str(((df['Gender'].value_counts()[1]/len(df))*100).round(2))+ ")",
                                   dfSummaryStats['50%'] + " (" + dfSummaryStats['25%'] + "-"+ dfSummaryStats['75%'] + ")",
                                   np.NaN,
                                   ethnicityNumbers['White']+ " (" +str(((df['Ethnicity'].value_counts()['White']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Unknown']+ " (" +str(((df['Ethnicity'].value_counts()['Unknown']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['South Asian']+ " (" +str(((df['Ethnicity'].value_counts()['South Asian']/len(df))*100).round(2))+ ")",
                                   "0 (0)",
                                   ethnicityNumbers['Black']+ " (" +str(((df['Ethnicity'].value_counts()['Black']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Other']+ " (" +str(((df['Ethnicity'].value_counts()['Other']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Mixed']+ " (" +str(((df['Ethnicity'].value_counts()['Mixed']/len(df))*100).round(2))+ ")",
                                  ]

In [None]:
#Add row to summarytable for Validation-LFD: OUH
df = OUHw2LFD
dfSummaryStats = df.describe()['Age'].round(2).map(str)
genderSummaryStats = df['Gender'].value_counts().map(str)
ethnicityNumbers = df['Ethnicity'].value_counts().map(str)
summarytable['Validation-LFD: OUH LFD Comparison'] = [len(df),
                                   df['Covid-19 Positive'].sum(), 
                                   genderSummaryStats['M']+ " (" +str(((df['Gender'].value_counts()['M']/len(df))*100).round(2))+ ")",
                                   genderSummaryStats['F']+ " (" +str(((df['Gender'].value_counts()['F']/len(df))*100).round(2))+ ")",
                                   dfSummaryStats['50%'] + " (" + dfSummaryStats['25%'] + "-"+ dfSummaryStats['75%'] + ")",
                                   str(df['Lateral_flow_result'].value_counts()['Positive'])+ " (" +str(((df['Lateral_flow_result'].value_counts()['Positive']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['White']+ " (" +str(((df['Ethnicity'].value_counts()['White']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Unknown']+ " (" +str(((df['Ethnicity'].value_counts()['Unknown']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['South Asian']+ " (" +str(((df['Ethnicity'].value_counts()['South Asian']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Chinese']+ " (" +str(((df['Ethnicity'].value_counts()['Chinese']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Black']+ " (" +str(((df['Ethnicity'].value_counts()['Black']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Other']+ " (" +str(((df['Ethnicity'].value_counts()['Other']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Mixed']+ " (" +str(((df['Ethnicity'].value_counts()['Mixed']/len(df))*100).round(2))+ ")",
                                  ]

In [None]:
#Add row to summry table for C-R/OLO service evaluation
df = CRValidation
dfSummaryStats = df.describe()['Age'].round(2).map(str)
genderSummaryStats = df['Sex'].value_counts().map(str)
ethnicityNumbers = df['Ethnicity'].value_counts().map(str)
summarytable['Real-World Validation: CURIAL-Rapide'] = [len(df),
                                   df['PCRResult'].value_counts()['Positive'], 
                                   genderSummaryStats['man']+ " (" +str(((df['Sex'].value_counts()['man']/len(df))*100).round(2))+ ")",
                                   genderSummaryStats['woman']+ " (" +str(((df['Sex'].value_counts()['woman']/len(df))*100).round(2))+ ")",
                                   dfSummaryStats['50%'] + " (" + dfSummaryStats['25%'] + "-"+ dfSummaryStats['75%'] + ")",
                                   str(df['LFDResult'].value_counts()['POSITIVE'])+ " (" +str(((df['LFDResult'].value_counts()['POSITIVE']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['White']+ " (" +str(((df['Ethnicity'].value_counts()['White']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Unknown']+ " (" +str(((df['Ethnicity'].value_counts()['Unknown']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['South Asian']+ " (" +str(((df['Ethnicity'].value_counts()['South Asian']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Chinese']+ " (" +str(((df['Ethnicity'].value_counts()['Chinese']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Black']+ " (" +str(((df['Ethnicity'].value_counts()['Black']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Other']+ " (" +str(((df['Ethnicity'].value_counts()['Other']/len(df))*100).round(2))+ ")",
                                   ethnicityNumbers['Mixed']+ " (" +str(((df['Ethnicity'].value_counts()['Mixed']/len(df))*100).round(2))+ ")",
                                  ]

In [None]:
summarytable

In [None]:
########################################
# Now create a summary table with each of the key features #

In [None]:
#Generate ordered indicies for tables
bloodsIndex = UHBPopulation.loc[:,'Blood_Test HAEMOGLOBIN':'Blood_Test CRP'].columns
vitalsIndex = UHBPopulation.columns[UHBPopulation.columns.str.startswith('Vital_Sign')]
gasIndex = UHBPopulation.columns[UHBPopulation.columns.str.startswith('Blood_Gas')]

In [None]:
#Create Tables with ordered indicies
BloodParams = pd.DataFrame(index=bloodsIndex)
VitalsParams = pd.DataFrame(index=vitalsIndex)
GasParams = pd.DataFrame(index=gasIndex)

#Now make missing data tables
BloodsCompleteness = pd.DataFrame(index=bloodsIndex)
VitalsCompleteness = pd.DataFrame(index=vitalsIndex)

In [None]:
#Add in OUH pre-pandemic training summary stats
OUHprepandemicControls = OUHprepandemicControls[(OUHprepandemicControls['ArrivalDateTime'] < '2019-12-01')]
summaryBloodsMetrics = OUHprepandemicControls[bloodsIndex].describe().T.round(2)
summaryVitalsMetrics = OUHprepandemicControls[vitalsIndex].describe().T.round(1)
summaryGasMetrics = OUHprepandemicControls[gasIndex].describe().T.round(2)
BloodParams['OUH Training: Prepandemic median (IQR)'] = summaryBloodsMetrics['50%'].map(str) + " (" + summaryBloodsMetrics['25%'].map(str) + "-"+ summaryBloodsMetrics['75%'].map(str) + ")"
VitalsParams['OUH Training: Prepandemic median (IQR)'] = summaryVitalsMetrics['50%'].map(str) + " (" + summaryVitalsMetrics['25%'].map(str) + "-"+ summaryVitalsMetrics['75%'].map(str) + ")"
GasParams['OUH Training: Prepandemic median (IQR)'] = summaryGasMetrics['50%'].map(str) + " (" + summaryGasMetrics['25%'].map(str) + "-"+ summaryGasMetrics['75%'].map(str) + ")"

In [None]:
#Add in OUH pandemic training summary stats
OUHw1cases = OUHw1cases[(OUHw1cases['Covid-19 Positive'] == 1.0)]
summaryBloodsMetrics = OUHw1cases[bloodsIndex].describe().T.round(2)
summaryVitalsMetrics = OUHw1cases[vitalsIndex].describe().T.round(1)
summaryGasMetrics = OUHw1cases[gasIndex].describe().T.round(2)
BloodParams['OUH Training: Cases (w1) median (IQR)'] = summaryBloodsMetrics['50%'].map(str) + " (" + summaryBloodsMetrics['25%'].map(str) + "-"+ summaryBloodsMetrics['75%'].map(str) + ")"
VitalsParams['OUH Training: Cases (w1) median (IQR)'] = summaryVitalsMetrics['50%'].map(str) + " (" + summaryVitalsMetrics['25%'].map(str) + "-"+ summaryVitalsMetrics['75%'].map(str) + ")"
GasParams['OUH Training: Cases (w1) median (IQR)'] = summaryGasMetrics['50%'].map(str) + " (" + summaryGasMetrics['25%'].map(str) + "-"+ summaryGasMetrics['75%'].map(str) + ")"

In [None]:
#Add in OUH w2 Summary Stats
summaryBloodsMetrics = OUHw2[bloodsIndex].describe().T.round(2)
summaryVitalsMetrics = OUHw2[vitalsIndex].describe().T.round(1)
summaryGasMetrics = OUHw2[gasIndex].describe().T.round(2)
BloodParams['OUHw2 median (IQR)'] = summaryBloodsMetrics['50%'].map(str) + " (" + summaryBloodsMetrics['25%'].map(str) + "-"+ summaryBloodsMetrics['75%'].map(str) + ")"
VitalsParams['OUHw2 median (IQR)'] = summaryVitalsMetrics['50%'].map(str) + " (" + summaryVitalsMetrics['25%'].map(str) + "-"+ summaryVitalsMetrics['75%'].map(str) + ")"
GasParams['OUHw2 median (IQR)'] = summaryGasMetrics['50%'].map(str) + " (" + summaryGasMetrics['25%'].map(str) + "-"+ summaryGasMetrics['75%'].map(str) + ")"

completenessFor = OUHw2
BloodsCompleteness['OUHw2Validation'] = completenessFor[bloodsIndex].count().map(str)+ "/" + str(completenessFor[bloodsIndex].shape[0]) + " (" +((completenessFor[bloodsIndex].count().values / completenessFor[bloodsIndex].shape[0])*100).round(1).astype(str) + "%)"
VitalsCompleteness['OUHw2Validation'] = completenessFor[vitalsIndex].count().map(str)+ "/" + str(completenessFor[vitalsIndex].shape[0]) + " (" +((completenessFor[vitalsIndex].count().values / completenessFor[vitalsIndex].shape[0])*100).round(1).astype(str) + "%)"

In [None]:
#Add in UHB Summary Stats
summaryBloodsMetrics = UHBPopulation[bloodsIndex].describe().T.round(2)
summaryVitalsMetrics = UHBPopulation[vitalsIndex].describe().T.round(1)
summaryGasMetrics = UHBPopulation[gasIndex].describe().T.round(2)
BloodParams['UHB median (IQR)'] = summaryBloodsMetrics['50%'].map(str) + " (" + summaryBloodsMetrics['25%'].map(str) + "-"+ summaryBloodsMetrics['75%'].map(str) + ")"
VitalsParams['UHB median (IQR)'] = summaryVitalsMetrics['50%'].map(str) + " (" + summaryVitalsMetrics['25%'].map(str) + "-"+ summaryVitalsMetrics['75%'].map(str) + ")"
GasParams['UHB median (IQR)'] = summaryGasMetrics['50%'].map(str) + " (" + summaryGasMetrics['25%'].map(str) + "-"+ summaryGasMetrics['75%'].map(str) + ")"

completenessFor = UHBPopulation
BloodsCompleteness['UHB'] = completenessFor[bloodsIndex].count().map(str)+ "/" + str(completenessFor[bloodsIndex].shape[0]) + " (" +((completenessFor[bloodsIndex].count().values / completenessFor[bloodsIndex].shape[0])*100).round(1).astype(str) + "%)"
VitalsCompleteness['UHB'] = completenessFor[vitalsIndex].count().map(str)+ "/" + str(completenessFor[vitalsIndex].shape[0]) + " (" +((completenessFor[vitalsIndex].count().values / completenessFor[vitalsIndex].shape[0])*100).round(1).astype(str) + "%)"

In [None]:
#Add in Portsmouth Summary Stats
summaryBloodsMetrics = PortsmouthPopulation[bloodsIndex].describe().T.round(2)
summaryVitalsMetrics = PortsmouthPopulation[vitalsIndex].describe().T.round(1)
#summaryGasMetrics = PortsmouthPopulation[gasIndex].describe().T.round(2)
BloodParams['Portsmouth median (IQR)'] = summaryBloodsMetrics['50%'].map(str) + " (" + summaryBloodsMetrics['25%'].map(str) + "-"+ summaryBloodsMetrics['75%'].map(str) + ")"
VitalsParams['Portsmouth median (IQR)'] = summaryVitalsMetrics['50%'].map(str) + " (" + summaryVitalsMetrics['25%'].map(str) + "-"+ summaryVitalsMetrics['75%'].map(str) + ")"
#GasParams['Portsmouth Mean (IQR)'] = summaryGasMetrics['mean'].map(str) + " (" + summaryGasMetrics['25%'].map(str) + "-"+ summaryGasMetrics['75%'].map(str) + ")"

completenessFor = PortsmouthPopulation
BloodsCompleteness['PUH'] = completenessFor[bloodsIndex].count().map(str)+ "/" + str(completenessFor[bloodsIndex].shape[0]) + " (" +((completenessFor[bloodsIndex].count().values / completenessFor[bloodsIndex].shape[0])*100).round(1).astype(str) + "%)"
VitalsCompleteness['PUH'] = completenessFor[vitalsIndex].count().map(str)+ "/" + str(completenessFor[vitalsIndex].shape[0]) + " (" +((completenessFor[vitalsIndex].count().values / completenessFor[vitalsIndex].shape[0])*100).round(1).astype(str) + "%)"

In [None]:
#Add in Bedford Summary Stats
summaryBloodsMetrics = BedfordshirePopulation[bloodsIndex].describe().T.round(2)
summaryVitalsMetrics = BedfordshirePopulation[vitalsIndex].describe().T.round(1)
summaryGasMetrics = BedfordshirePopulation[gasIndex].describe().T.round(2)
BloodParams['Bedfordshire median (IQR)'] = summaryBloodsMetrics['50%'].map(str) + " (" + summaryBloodsMetrics['25%'].map(str) + "-"+ summaryBloodsMetrics['75%'].map(str) + ")"
VitalsParams['Bedfordshire median (IQR)'] = summaryVitalsMetrics['50%'].map(str) + " (" + summaryVitalsMetrics['25%'].map(str) + "-"+ summaryVitalsMetrics['75%'].map(str) + ")"
GasParams['Bedfordshire median (IQR)'] = summaryGasMetrics['50%'].map(str) + " (" + summaryGasMetrics['25%'].map(str) + "-"+ summaryGasMetrics['75%'].map(str) + ")"


completenessFor = BedfordshirePopulation
BloodsCompleteness['Bedford'] = completenessFor[bloodsIndex].count().map(str)+ "/" + str(completenessFor[bloodsIndex].shape[0]) + " (" +((completenessFor[bloodsIndex].count().values / completenessFor[bloodsIndex].shape[0])*100).round(1).astype(str) + "%)"
VitalsCompleteness['Bedford'] = completenessFor[vitalsIndex].count().map(str)+ "/" + str(completenessFor[vitalsIndex].shape[0]) + " (" +((completenessFor[vitalsIndex].count().values / completenessFor[vitalsIndex].shape[0])*100).round(1).astype(str) + "%)"

In [None]:
#Add in OUH LFT Summary Stats
summaryBloodsMetrics = OUHw2LFD[bloodsIndex].describe().T.round(2)
summaryVitalsMetrics = OUHw2LFD[vitalsIndex].describe().T.round(1)
summaryGasMetrics = OUHw2LFD[gasIndex].describe().T.round(2)
BloodParams['OUHw2 LFT median (IQR)'] = summaryBloodsMetrics['50%'].map(str) + " (" + summaryBloodsMetrics['25%'].map(str) + "-"+ summaryBloodsMetrics['75%'].map(str) + ")"
VitalsParams['OUHw2 LFT median (IQR)'] = summaryVitalsMetrics['50%'].map(str) + " (" + summaryVitalsMetrics['25%'].map(str) + "-"+ summaryVitalsMetrics['75%'].map(str) + ")"
GasParams['OUHw2 LFT median (IQR)'] = summaryGasMetrics['50%'].map(str) + " (" + summaryGasMetrics['25%'].map(str) + "-"+ summaryGasMetrics['75%'].map(str) + ")"

completenessFor = OUHw2LFD
BloodsCompleteness['OUHw2 LFT'] = completenessFor[bloodsIndex].count().map(str)+ "/" + str(completenessFor[bloodsIndex].shape[0]) + " (" +((completenessFor[bloodsIndex].count().values / completenessFor[bloodsIndex].shape[0])*100).round(1).astype(str) + "%)"
VitalsCompleteness['OUHw2 LFT'] = completenessFor[vitalsIndex].count().map(str)+ "/" + str(completenessFor[vitalsIndex].shape[0]) + " (" +((completenessFor[vitalsIndex].count().values / completenessFor[vitalsIndex].shape[0])*100).round(1).astype(str) + "%)"

In [None]:
#Add in CURIAL-Rapide Summary Stats
#Identify C-R specific bloods, and create an empty df with the non-FBC bloods
missingcols = bloodsIndex[~bloodsIndex.isin(CRValidation.columns)]
missingColsDf = pd.DataFrame(columns=missingcols)

#Now add dummy rows with NaNs to the dataset for non-FBC bloods to populate full table rows correctly
fullBloods = pd.concat([CRValidation, missingColsDf])

summaryBloodsMetrics = fullBloods[bloodsIndex].describe().T.round(2)
summaryVitalsMetrics = CRValidation[vitalsIndex].describe().T.round(1)
BloodParams['CURIAL-Rapide median (IQR)'] = summaryBloodsMetrics['50%'].map(str) + " (" + summaryBloodsMetrics['25%'].map(str) + "-"+ summaryBloodsMetrics['75%'].map(str) + ")"
VitalsParams['CURIAL-Rapide median (IQR)'] = summaryVitalsMetrics['50%'].map(str) + " (" + summaryVitalsMetrics['25%'].map(str) + "-"+ summaryVitalsMetrics['75%'].map(str) + ")"

completenessFor = CRValidation
BloodsCompleteness['C-R Evaluation'] = fullBloods[bloodsIndex].count().map(str)+ "/" + str(fullBloods[bloodsIndex].shape[0]) + " (" +((fullBloods[bloodsIndex].count().values / fullBloods[bloodsIndex].shape[0])*100).round(1).astype(str) + "%)"
VitalsCompleteness['C-R Evaluation'] = completenessFor[vitalsIndex].count().map(str)+ "/" + str(completenessFor[vitalsIndex].shape[0]) + " (" +((completenessFor[vitalsIndex].count().values / completenessFor[vitalsIndex].shape[0])*100).round(1).astype(str) + "%)"

In [None]:
BloodParams
#BloodParams.to_csv("SI S2 Blood Params.csv")

In [None]:
#Print tables of missing data
BloodsCompleteness
#BloodsCompleteness.to_csv("SI Bloods Completeness.csv")

In [None]:
VitalsCompleteness
#VitalsCompleteness.to_csv("SI Vitals Completeness.csv")

In [None]:
VitalsParams
#VitalsParams.to_csv("SI S2 Vitals Params.csv")

In [None]:
GasParams
#GasParams.to_csv("SI S2 Gas Params.csv")

In [None]:
#summarytable.round(1).to_csv("04 Summary Table 2.csv")

In [None]:
#Comparing ages between populations
from scipy import stats

#Portsmouth vs Bham
stats.kruskal(PortsmouthPopulation.Age, UHBPopulation.Age)

In [None]:
#Bedford vs Bham
stats.kruskal(UHBPopulation.Age, BedfordshirePopulation.Age)

In [None]:
#Portsmouth vs Bedford
stats.kruskal(PortsmouthPopulation.Age, BedfordshirePopulation.Age)