In [4]:
import pandas as pd
import numpy as np
from datetime import date, time
from IPython.display import display
pd.options.display.max_columns = None
import os
import re

## Functions

In [5]:
#function for creating lists of SIDs
def listify(cell):
    if pd.isna(cell):
        return []
    else:
        cell = str(cell)
        cell = cell.replace(" ", "")
        listy = list(cell.split(","))
        return listy

In [6]:
#function for correcting double counts for same concept
def no_double(component, *args):
    component = pd.DataFrame(component)
    count = 1
    for data in args:
        if not isinstance(data, pd.Series):
            for column in data:
                name = 'arg' + str(count)
                column_series = pd.Series(column)
                component[name] = column_series
                count += 1
        else:
            component = pd.concat((component, data), axis=1)
    
    component['sum'] = component.iloc[:, 1:].sum(axis=1)

    updated_component = []
    
    for row in component.iterrows():
        if row[1][0] >0 and row[1][-1] >0:
            updated_component.append(0)
        elif row[1][0] == 1 and row[1][-1] == 0:
            updated_component.append(1)
        else:
            updated_component.append(0)
    
    return updated_component

In [223]:
#function for converting operative severity to numbers
def op_severity(severity):
    updated_severity = []
    
    for item in severity:
        item = str(item)
        item = item.lower()
        if 'complex' in item:
            updated_severity.append(5)
        elif 'xmajor' in item:
            updated_severity.append(4)
        elif 'major' in item:
            updated_severity.append(3)
        elif 'intermediate' in item:
            updated_severity.append(2)
        elif 'minor' in item:
            updated_severity.append(1)
        else:
            updated_severity.append(3)
            print('Unknown operation severity code : '+item)
            
    return updated_severity

In [39]:
#function for calculating CCI
def cci(data):
    CCI = []
    for row in data.iterrows():
        score = 0
        if row[1]['cardiac'] == 1:
            score += 1
        if row[1]['kidney'] == 1:
            score += 2
        if row[1]['liver'] == 1:
            score += 2
        if row[1]['neuro'] == 1:
            score += 2
        if row[1]['pulm'] == 1:
            score += 1
        if row[1]['dementia'] == 1:
            score += 1
        if row[1]['diabetes'] == 1:
            score += 1
        if row[1]['comp_diabetes'] == 1:
            score += 2
        if row[1]['malignancy'] == 1:
            score += 2
        if row[1]['metastasis'] == 1:
            score += 6
        if row[1]['pvd'] == 1:
            score += 1
        if row[1]['rheum'] == 1:
            score += 1
        if row[1]['pulm'] == 1:
            score += 1
        CCI.append(score)
    
    return CCI

In [129]:
#functions for identifying post-op crit care for patients admitted to level 0 & 1
def crit_care(p):
    updated_crit_care = []
    for row in p.iterrows():
        if row[1][30] == 1:
            updated_crit_care.append(0)
        elif row[1][57] > 1:
            updated_crit_care.append('Not applicable')
        elif (row[1][37] + row[1][37]) >0:
            updated_crit_care.append(1)
        else:
            updated_crit_care.append(0)
    return updated_crit_care

def crit_care_7d(p, ts):
    updated_crit_care = []
    for row in p.iterrows():
        if row[1][30] == 1: #cancelled
            updated_crit_care.append(0)
        elif row[1][57] > 1: #level 2-3 post-op
            updated_crit_care.append('Not applicable')
        elif (row[1][-8]) >0: #LOS in levels 2-3 >0
            SID = row[0]
            date = row[1][29]
            date = date.replace(hour=0, minute=0)
            date_position = ts.index.get_loc(date)
            date_7 = date_position + 7
            check = ts.iloc[date_position:date_7, [25, 29]]
            check = check['adm_7d_lvl2'] + check['adm_7d_lvl3']
            count = 0
            for row in check:
                if SID in row:
                    updated_crit_care.append(1)
                    count += 1
                    break
            if count == 0:
                updated_crit_care.append(0)
        else:
            updated_crit_care.append(0)
    return updated_crit_care

In [159]:
#function for adding dates of death from encounter end
def died_date(died, date, encounter_end):
    checker = pd.concat((died, date, encounter_end), axis=1)
    updated_date = []
    
    for row in checker.iterrows():
        if row[1]['died'] == 0:
            updated_date.append(0)
        if row[1]['died'] == 1:
            if row[1]['died_date'] == 0:
                updated_date.append(pd.to_datetime(row[1]['encounter_end'], errors='coerce'))
        if row[1]['died'] == 1:
            if row[1]['died_date'] != 0:
                updated_date.append(pd.to_datetime(row[1]['died_date'], errors='coerce'))
    return updated_date

In [184]:
#function for calculating stepdowns
def stepdowns(proposed, actual, dataset):
    stepdown_list = []
    for row in dataset.iterrows():
        if row[1]['cancelled'] == 1:
            stepdown_list.append('Not applicable')
        elif row[1]['proposed_lvl'] not in proposed:
            stepdown_list.append('Not applicable')
        elif row[1]['actual_lvl'] in actual:
            stepdown_list.append(1)
        else:
            stepdown_list.append(0)
    
    return stepdown_list

def stepdowns_nobed(proposed, actual, dataset):
    stepdown_list = []
    for row in dataset.iterrows():
        if row[1]['cancelled'] == 1:
            stepdown_list.append('Not applicable')
        elif row[1]['difference_nobed'] == 0:
            stepdown_list.append(0)
        elif row[1]['proposed_lvl'] not in proposed:
            stepdown_list.append('Not applicable')
        elif row[1]['actual_lvl'] in actual:
            stepdown_list.append(1)
        else:
            stepdown_list.append(0)
    
    return stepdown_list

In [229]:
#function for loading data for each hospital
def data_load(p, ts):

    #loading patient_crf
    patient = pd.read_excel("C:\\Users\\chris\\Documents\\Job Documents\\Portfolio Evidence\\Research\\Post-Operative Critical Care Beds Project\\Results Data\\Cleaned CRFs\\"+p, sheet_name=None)
    try:
        patient = pd.DataFrame(data=patient['Sheet1'])
    except:
        print('The sheet isnt called Sheet1, rename this')
        
    patient.columns = patient.iloc[2]
    patient = patient.iloc[3:]
    patient.columns.name = None
    
    patient = patient.iloc[:len(patient['sid_number'].explode().dropna())]
    
    if patient.columns[0] == 'hospital_number':
        patient = patient.iloc[:, 1:]
        
    patient.fillna(value=0, inplace=True)
    
    if 'rhem' in patient.columns:
        patient = patient.rename(columns={'rhem': 'rheum'})
    
    patient['diabetes'] = no_double(patient['diabetes'], patient['comp_diabetes'])
    patient['malignancy'] = no_double(patient['malignancy'], patient['metastasis'])
    patient['clin_j'] = no_double(patient['clin_j'], (patient['sop'], patient['cpet'], patient['risk_score']))
    patient['op_severity'] = op_severity(patient['op_severity'])
    patient['died_date'] = died_date(patient['died'], patient['died_date'], patient['encounter_end'])
    patient['CCI'] = cci(patient)
    patient['los_23'] = patient['los_2'] + patient['los_3']
    patient['1stepdowns'] = stepdowns([1], [0], patient)
    patient['1stepdowns_nobed'] = stepdowns_nobed([1], [0], patient)
    patient['23to0stepdowns'] = stepdowns([2, 3], [0], patient)
    patient['23to0stepdowns_nobed'] = stepdowns_nobed([2, 3], [0], patient)
    patient['23to1stepdowns'] = stepdowns([2, 3], [1], patient)
    patient['23to1stepdowns_nobed'] = stepdowns_nobed([2, 3], [1], patient)
   
    patient['risk_score'] = [1 if (row[1]['mortality_score'] + row[1]['morbidity_score']) > 0 else 0 for row in patient.iterrows()]
    
    if 'scheduled_date' in patient.columns:
        merged = []
        for row in patient.iterrows():
            row_data = row[1]
            time_merge = pd.Timestamp.combine(row_data['scheduled_date'], row_data['scheduled_start'])
            merged.append(time_merge)
        merged_series = pd.DataFrame(merged)
        merged_series = merged_series.set_index(patient['sid_number'])
        patient['scheduled_start'] = merged_series
        patient = patient.drop('scheduled_date', axis=1)
        
    patient['kts'] = pd.to_datetime(patient['kts'], errors='coerce')
    patient['encounter_start'] = pd.to_datetime(patient['encounter_start'], errors='coerce')
    patient['encounter_end'] = pd.to_datetime(patient['encounter_end'], errors='coerce')
        
    patient = patient.set_index('sid_number')
    patient.index.name = None
        
    code_num = str(patient.index[0])
    code = code_num[:5]

    if len(patient.columns) != 70:
        print("The Patient CRF for site "+code+" is the incorrect shape")
        patient.info()
    if list(patient.columns) != ['age', 'gender_male', 'gender_female', 'gender_non_binary', 'asian', 'black', 'mixed', 'white', 'other', 'cardiac', 'kidney', 'liver', 'neuro', 'pulm', 'dementia', 'diabetes', 'comp_diabetes', 'htn', 'malignancy', 'metastasis', 'pvd', 'rheum', 'surgical_speciality', 'proposed_procedure', 'cancer_surgery', 'ncepod_class', 'op_severity', 'scheduled_start', 'previous_cancellation', 'kts', 'cancelled', 'critical_care_7_days', 'encounter_start', 'encounter_end', 'los_encounter', 'los_0', 'los_1', 'los_2', 'los_3', 'died', 'died_date', 'cancel_nobed', 'cancel_other', 'cancel_other_text', 'sop', 'cpet', 'clin_j', 'risk_score', 'score_text', 'mortality_score', 'morbidity_score', 'other_reason', 'refer_other_text', 'over_8', '1-7_days', 'otd', 'proposed_lvl', 'actual_lvl', 'difference_nobed', 'difference_other', 'difference_other_text', 'difference_unknown', 'CCI', 'los_23', '1stepdowns', '1stepdowns_nobed', '23to0stepdowns', '23to0stepdowns_nobed', '23to1stepdowns', '23to1stepdowns_nobed']:
        print("The Patient CRF for site "+code+" has the wrong column index")
        intended = ['age', 'gender_male', 'gender_female', 'gender_non_binary', 'asian', 'black', 'mixed', 'white', 'other', 'cardiac', 'kidney', 'liver', 'neuro', 'pulm', 'dementia', 'diabetes', 'comp_diabetes', 'htn', 'malignancy', 'metastasis', 'pvd', 'rheum', 'surgical_speciality', 'proposed_procedure', 'cancer_surgery', 'ncepod_class', 'op_severity', 'scheduled_start', 'previous_cancellation', 'kts', 'cancelled', 'critical_care_7_days', 'encounter_start', 'encounter_end', 'los_encounter', 'los_0', 'los_1', 'los_2', 'los_3', 'died', 'died_date', 'cancel_nobed', 'cancel_other', 'cancel_other_text', 'sop', 'cpet', 'clin_j', 'risk_score', 'score_text', 'mortality_score', 'morbidity_score', 'other_reason', 'refer_other_text', 'over_8', '1-7_days', 'otd', 'proposed_lvl', 'actual_lvl', 'difference_nobed', 'difference_other', 'difference_other_text', 'difference_unknown', 'CCI', 'los_23', '1stepdowns', '1stepdowns_nobed', '23to0stepdowns', '23to0stepdowns_nobed', '23to1stepdowns', '23to1stepdowns_nobed']
        actual = list(patient.columns)
        print([item for item in actual if item not in intended])
        
    print('Successfully loaded patient CRF for: '+code)
        
    #loading time_series
    time_series = pd.read_excel("C:\\Users\\chris\\Documents\\Job Documents\\Portfolio Evidence\\Research\\Post-Operative Critical Care Beds Project\\Results Data\\Cleaned CRFs\\"+ts, sheet_name=None)
    try:
        time_series['Anonymised Data'] = time_series['Sheet1']
    except:
        pass
    try:
        time_series['Anonymised Data'] = time_series['Sheet 1']
    except:
        pass
    
    time_series = pd.DataFrame(data=time_series['Anonymised Data'])
    time_series.columns = time_series.iloc[0]
    time_series = time_series.iloc[1:94, 0:31]
    
    time_series['date'] = pd.to_datetime(time_series['date'], errors='coerce')
    time_series = time_series.set_index('date')
    time_series.index.name = None
    
    time_series = time_series.applymap(listify)
    
    if list(time_series.columns) != ['total_op', 'total_day', 'total_elec', 'total_emer', 'c_otd_cancel', 'c_otd_cancel_nobed', 'c_ref_lvl1', 'c_plan_adm_lvl1', 'c_adm_lvl1', 'c_ref_lvl2', 'c_plan_adm_lvl2', 'c_adm_lvl2', 'c_adm_7d_lvl2', 'c_ref_lvl3', 'c_plan_adm_lvl3', 'c_adm_lvl3', 'c_adm_7d_lvl3', 'otd_cancel', 'otd_cancel_nobed', 'ref_lvl1', 'plan_adm_lvl1', 'adm_lvl1', 'ref_lvl2', 'plan_adm_lvl2', 'adm_lvl2', 'adm_7d_lvl2', 'ref_lvl3', 'plan_adm_lvl3', 'adm_lvl3', 'adm_7d_lvl3']:
        print("The time series CRF for site "+code+" has the wrong column index")
        intended = ['total_op', 'total_day', 'total_elec', 'total_emer', 'c_otd_cancel', 'c_otd_cancel_nobed', 'c_ref_lvl1', 'c_plan_adm_lvl1', 'c_adm_lvl1', 'c_ref_lvl2', 'c_plan_adm_lvl2', 'c_adm_lvl2', 'c_adm_7d_lvl2', 'c_ref_lvl3', 'c_plan_adm_lvl3', 'c_adm_lvl3', 'c_adm_7d_lvl3', 'otd_cancel', 'otd_cancel_nobed', 'ref_lvl1', 'plan_adm_lvl1', 'adm_lvl1', 'ref_lvl2', 'plan_adm_lvl2', 'adm_lvl2', 'adm_7d_lvl2', 'ref_lvl3', 'plan_adm_lvl3', 'adm_lvl3', 'adm_7d_lvl3']
        actual = list(time_series.columns)
        print([item for item in actual if item not in intended])
        
    #updating critical care within 7 days column using time series and patient CRFs
    patient['level0-1_crit_care'] = crit_care(patient)
    patient['level0-1_crit_care_7d'] = crit_care_7d(patient, time_series)
    
    print('Successfully loaded time-series CRF for: '+code)
    
    return [code, patient, time_series]

In [189]:
#function for deriving variables for the main analysis from the patient CRF
def pat_values(dataset):
    row_data = pd.DataFrame()
    
    #value extraction
    level0_1 = len(dataset[(dataset['actual_lvl'] < 2) & (dataset['cancelled'] == 0)])
    level0_1_crit_care = len(dataset[dataset['level0-1_crit_care'] == 1])
    level0_1_crit_care_7d = len(dataset[dataset['level0-1_crit_care_7d'] == 1])
    no_cancellations = dataset[dataset['cancelled'] == 0]
    no_cancellations_lvl_1 = dataset[(dataset['cancelled'] == 0) & (dataset['los_1'] > 0)]
    no_cancellations_lvl_23 = dataset[(dataset['cancelled'] == 0) & (dataset['los_23'] > 0)]
    died = dataset[(dataset['died'] == 1) & (dataset['cancelled'] == 0)]
    died_6m = []
    for item in died.iterrows():
        dd = item[1]['died_date']
        k = item[1]['kts']
        try:
            if (dd - k) < pd.Timedelta(182.5, unit='d'):
                died_6m.append(1)
        except:
            print(dd)
            print(k)         
    risk_score = dataset[dataset['risk_score'] == 1]
    mortality = risk_score[risk_score['mortality_score'] > 0]
    morbidity = risk_score[risk_score['morbidity_score'] > 0]
    level_1_ref = dataset[dataset['proposed_lvl'] == 1]
    level_23_ref = dataset[dataset['proposed_lvl'] > 1]
    level_1_ref_nocancel = no_cancellations[no_cancellations['proposed_lvl'] == 1]
    level_23_ref_nocancel = no_cancellations[no_cancellations['proposed_lvl'] > 1]
    
    #variable creation
    row_data['avg_age'] = [dataset['age'].mean()]
    row_data['%female'] = dataset['gender_female'].mean()
    row_data['%white'] = dataset['white'].mean()
    row_data['%BAME'] = 1 - dataset['white'].mean()
    row_data['%asian'] = dataset['asian'].mean()
    row_data['%black'] = dataset['black'].mean()
    row_data['avg_CCI'] = dataset['CCI'].mean()
    row_data['%cancer'] = dataset['cancer_surgery'].mean()
    row_data['avg_ncepod'] = dataset['ncepod_class'].mean()
    row_data['avg_severity'] = dataset['op_severity'].mean()
    row_data['%cancelled'] = dataset['cancelled'].mean()
    row_data['%cancelled_nobed'] = dataset['cancel_nobed'].mean()
    try:
        row_data['%level0-1_crit_care'] = level0_1_crit_care / level0_1
    except ZeroDivisionError:
        row_data['%level0-1_crit_care'] = 0
    try:
        row_data['%level0-1_crit_care_7d'] = level0_1_crit_care_7d / level0_1
    except ZeroDivisionError:
        row_data['%level0-1_crit_care_7d'] = 0
    row_data['LOS_mean'] = no_cancellations['los_encounter'].mean()
    row_data['LOS_median'] = no_cancellations['los_encounter'].median()
    row_data['LOS1_mean'] = no_cancellations_lvl_1['los_1'].mean()
    row_data['LOS1_median'] = no_cancellations_lvl_1['los_1'].median()
    row_data['LOS23_mean'] = no_cancellations_lvl_23['los_23'].mean()
    row_data['LOS23_median'] = no_cancellations_lvl_23['los_23'].median()
    row_data['%died'] = no_cancellations['died'].mean()
    row_data['%died6m'] = len(died_6m) / len(no_cancellations)
    row_data['%clin_j'] = dataset['clin_j'].mean()
    row_data['%cpet'] = dataset['cpet'].mean()
    row_data['%risk_score'] = dataset['risk_score'].mean()
    row_data['pred_mortality'] = mortality['mortality_score'].mean()
    row_data['pred_morbidity'] = morbidity['morbidity_score'].mean()
    row_data['%sop'] = dataset['sop'].mean()
    row_data['%ref_over8'] = dataset['over_8'].mean()
    row_data['%ref_1_7'] = dataset['1-7_days'].mean()
    row_data['%ref_otd'] = dataset['otd'].mean()
    row_data['%_1_ref_over8'] = level_1_ref['over_8'].mean()
    row_data['%_1_ref_1_7'] = level_1_ref['1-7_days'].mean()
    row_data['%_1_ref_otd'] = level_1_ref['otd'].mean()
    row_data['%_23_ref_over8'] = level_23_ref['over_8'].mean()
    row_data['%_23_ref_1_7'] = level_23_ref['1-7_days'].mean()
    row_data['%_23_ref_otd'] = level_23_ref['otd'].mean()
    row_data['%1stepdowns'] = level_1_ref_nocancel['1stepdowns'].mean()
    row_data['%1stepdowns_nobed'] = level_1_ref_nocancel['1stepdowns_nobed'].mean()
    row_data['%23to0stepdowns'] = level_23_ref_nocancel['23to0stepdowns'].mean()
    row_data['%23to0stepdowns_nobed'] = level_23_ref_nocancel['23to0stepdowns_nobed'].mean()
    row_data['%23to1stepdowns'] = level_23_ref_nocancel['23to1stepdowns'].mean()
    row_data['%23to1stepdowns_nobed'] = level_23_ref_nocancel['23to1stepdowns_nobed'].mean()
    
    return row_data

In [116]:
#function for deriving variables for the main analysis from the time series
def ts_values(dataset):
    row_data = pd.DataFrame()
    
    #value extraction
    total_op = dataset.iloc[91, 0]
    total_op = float(total_op[0])
    total_day = dataset.iloc[91, 1]
    total_day = float(total_day[0])
    surg_adm = total_op - total_day
    surg_adm_1000 = surg_adm / 1000
    total_elec = dataset.iloc[91, 2]
    total_elec = float(total_elec[0])
    total_emer = dataset.iloc[91, 3]
    total_emer = float(total_emer[0])
    otd_cancel = dataset.iloc[91, 4]
    otd_cancel = float(otd_cancel[0])
    
    otd_cancel_nobed = len(dataset['otd_cancel_nobed'].explode().dropna())
    ref_lvl1 = len(dataset['ref_lvl1'].explode().dropna())
    plan_adm_lvl1 = len(dataset['plan_adm_lvl1'].explode().dropna())
    adm_lvl1 = len(dataset['adm_lvl1'].explode().dropna())
    ref_lvl2 = len(dataset['ref_lvl2'].explode().dropna())
    plan_adm_lvl2 = len(dataset['plan_adm_lvl2'].explode().dropna())
    adm_lvl2 = len(dataset['adm_lvl2'].explode().dropna())
    adm_7d_lvl2 = len(dataset['adm_7d_lvl2'].explode().dropna())
    ref_lvl3 = len(dataset['ref_lvl3'].explode().dropna())
    plan_adm_lvl3 = len(dataset['plan_adm_lvl3'].explode().dropna())
    adm_lvl3 = len(dataset['adm_lvl3'].explode().dropna())
    adm_7d_lvl3 = len(dataset['adm_7d_lvl3'].explode().dropna())
    
    #variable creation
    row_data['total_op'] = [total_op]
    row_data['%day'] = total_day / total_op
    row_data['surg_adm'] = surg_adm
    row_data['%emerg'] = total_emer / (total_emer + total_elec)
    
    row_data['cancel_per_1000_total'] = otd_cancel / (total_op / 1000)
    row_data['cancel_nobed_per_1000'] = otd_cancel_nobed / surg_adm_1000
    
    row_data['ref_lvl1_per_1000'] = ref_lvl1 / surg_adm_1000
    row_data['plan_adm_lvl1_per_1000'] = plan_adm_lvl1 / surg_adm_1000
    row_data['adm_lvl1_per_1000'] = adm_lvl1 / surg_adm_1000
    try:
        row_data['%emerg_lvl1'] = (adm_lvl1 - plan_adm_lvl1) / adm_lvl1
    except ZeroDivisionError:
        row_data['%emerg_lvl1'] = 0
    
    row_data['ref_lvl23_per_1000'] = (ref_lvl2 + ref_lvl3) / surg_adm_1000
    row_data['plan_adm_lvl23_per_1000'] = (plan_adm_lvl2 + plan_adm_lvl3) / surg_adm_1000
    row_data['adm_lvl23_per_1000'] = (adm_lvl2 + adm_lvl3) / surg_adm_1000
    
    try:
        row_data['%emerg_lvl23'] = ((adm_lvl2 + adm_lvl3) - (plan_adm_lvl2 + plan_adm_lvl3)) / (adm_lvl2 + adm_lvl3)
    except ZeroDivisionError:
        row_data['%emerg_lvl23'] = 0
        
    return row_data

In [None]:
#function to create a dataframe of all of the patients followed up with relevant information
def patients(dataset):
    

## Main analysis

In [238]:
# load survey dataframe
main = pd.read_excel("C:\\Users\\chris\\Documents\\Job Documents\\Portfolio Evidence\\Research\\Post-Operative Critical Care Beds Project\\Results Data\\survey_dataframe.xlsx", sheet_name=None)
main = pd.DataFrame(data=main['Sheet1'])
main = main.set_index('hospital_code')
main.sort_index(axis=0, inplace=True)
main.index.name = None

#drop all without hospital code
main = main[main.index.notnull()]

display(main)

Unnamed: 0,hospital_name,type,icb,level_1,num_level_1,level_2,level_3,em_dept,services,mdt,risk_score,procedure_based,clinical_judgement,ref_anaes,ref_surg,ref_nurse,ref_spr,no_vetting,vet_anaes,vet_icu,vet_surg,vet_nurse,alo_anaes,alo_icu,alo_surg,alo_nurse,bed_nurse,nurse_bed_esc,cover_cons,cover_spr,cover_sho,clin_anaes,clin_icu,clin_surg,discharge_anaes,discharge_icu,discharge_surg,periph_vaso,central_vaso,cpap,bipap,stay_limit,all_spec,single_spec,level_1_ring,level_1_cap,level_1_esc,level_2_ring,level_2_cap,level_2_esc,level_3_ring,level_3_cap,level_3_esc,level_23_ring,level_23_cap,level_23_esc,totalringcap
7A1A4,Wrexham Maelor Hospital,2,Betsi Cadwalader University HB,1,1,0,1,1,"5, 10, 11, 13, 15, 16",0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1.5,2.0,1,0,0,1,1,1,0,0,1,1,0,1,1,1,1.0,0.0,0,3,3,0,0,0,0,12,12,0,12,12,0
R0A02,Manchester Royal Infirmary,1,NHS Greater Manchester ICB,1,1,1,1,1,"2, 5, 8, 10, 11, 13, 14, 15, 16",0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,2.0,3.0,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1.0,0.0,9,9,9,0,20,20,0,28,28,0,48,48,9
RAPD3,North Middlesex Hospital,2,NHS North Central London ICB,0,0,1,1,1,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,11,11,0,12,12,0,23,23,0
RFRPA,Rotherham Hospital,2,NHS South Yorkshire ICB,0,0,1,1,1,"5, 11, 13",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,8,8,0,5,5,0,13,13,0
RJ1AK,William Harvey Hospital,2,NHS Kent and Medway ICB,0,0,0,1,1,"6, 10, 11, 13,15",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,18,18,0,18,18,0
RJC02,Warwick Hospital,2,NHS Coventry and Warwickshire ICB,0,0,1,1,1,"5, 13",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,2,2,0,5,5,0,7,7,0
RMC01,Royal Bolton Hospital,2,NHS Greater Manchester ICB,0,0,1,1,1,"13, 15",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,10,10,0,8,8,0,18,18,0
RQM01,Chelsea and Westminster Hospital,1,NHS North West London ICB,1,1,0,1,1,"1, 3, 5, 10, 11, 13",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,1.0,0,1,0,1,0,1,1,0,1,0,0,1,0,1,1.0,0.0,0,0,4,0,0,0,0,14,22,0,14,22,0
RWDDA,Lincoln County Hospital,2,NHS Lincolnshire ICB,1,1,0,1,1,"5, 11, 13, 15",0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,2.67,3.67,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1.0,0.0,0,8,10,0,0,0,0,11,15,0,11,15,0
RXR20,Royal Blackburn Teaching Hospital,1,NHS Lancashire and South Cumbria ICB,1,1,1,1,1,"8, 11, 13, 16",0,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,3.0,4.0,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1.0,0.0,0,4,4,0,6,6,0,18,22,0,24,28,0


In [239]:
#iterate through CRF directory
p_crf = []
ts_crf = []

for file in os.listdir("C:\\Users\\chris\\Documents\\Job Documents\\Portfolio Evidence\\Research\\Post-Operative Critical Care Beds Project\\Results Data\\Cleaned CRFs"):
    if "_p.xlsx" in file:
        p_crf.append(str(file))
    if "_ts.xlsx" in file:
        ts_crf.append(file)
        
#create list of pairs to iterate over and derive file paths and check for unpaired datasets
for index, file in enumerate(p_crf):
    truncated = file.replace("_p.xlsx", "")
    if truncated+"_ts.xlsx" in ts_crf:
        p_crf[index] = [file, truncated+"_ts.xlsx"]
        ts_crf.remove(truncated+"_ts.xlsx")

paired = [item for item in p_crf if len(item) == 2]
p_crf = [item for item in p_crf if item not in paired]

print("The following files have no pairs")
print(p_crf)
print(ts_crf)

The following files have no pairs
[]
[]


In [240]:
#derive values from each pair in directory and add to dataframes
loaded_data = pd.DataFrame()
for pair in paired:
    hosp_data = data_load(pair[0], pair[1])
    hosp_code = hosp_data[0]
    pat_data = pat_values(hosp_data[1])
    print('Successfully generated patient values for: '+hosp_code)
    ts_data = ts_values(hosp_data[2])
    print('Successfully generated time-series values for: '+hosp_code)
    data_line = pd.concat((ts_data, pat_data), axis=1)
    data_line.rename(index={0:hosp_code},inplace=True)
    loaded_data = pd.concat((loaded_data, data_line), axis=0)

loaded_data.sort_index(axis=0, inplace=True)
#display(loaded_data)
main = pd.concat((main, loaded_data), axis=1)
display(main)

Successfully loaded patient CRF for: RXR20
Successfully loaded time-series CRF for: RXR20
Successfully generated patient values for: RXR20
Successfully generated time-series values for: RXR20
Successfully loaded patient CRF for: RMC01
Successfully loaded time-series CRF for: RMC01
Successfully generated patient values for: RMC01
Successfully generated time-series values for: RMC01
Successfully loaded patient CRF for: RQM01
Successfully loaded time-series CRF for: RQM01
Successfully generated patient values for: RQM01
Successfully generated time-series values for: RQM01
Successfully loaded patient CRF for: RWDDA
Successfully loaded time-series CRF for: RWDDA
Successfully generated patient values for: RWDDA
Successfully generated time-series values for: RWDDA
Successfully loaded patient CRF for: R0A02
Successfully loaded time-series CRF for: R0A02
Successfully generated patient values for: R0A02
Successfully generated time-series values for: R0A02
Successfully loaded patient CRF for: RAP

Unnamed: 0,hospital_name,type,icb,level_1,num_level_1,level_2,level_3,em_dept,services,mdt,risk_score,procedure_based,clinical_judgement,ref_anaes,ref_surg,ref_nurse,ref_spr,no_vetting,vet_anaes,vet_icu,vet_surg,vet_nurse,alo_anaes,alo_icu,alo_surg,alo_nurse,bed_nurse,nurse_bed_esc,cover_cons,cover_spr,cover_sho,clin_anaes,clin_icu,clin_surg,discharge_anaes,discharge_icu,discharge_surg,periph_vaso,central_vaso,cpap,bipap,stay_limit,all_spec,single_spec,level_1_ring,level_1_cap,level_1_esc,level_2_ring,level_2_cap,level_2_esc,level_3_ring,level_3_cap,level_3_esc,level_23_ring,level_23_cap,level_23_esc,totalringcap,total_op,%day,surg_adm,%emerg,cancel_per_1000_total,cancel_nobed_per_1000,ref_lvl1_per_1000,plan_adm_lvl1_per_1000,adm_lvl1_per_1000,%emerg_lvl1,ref_lvl23_per_1000,plan_adm_lvl23_per_1000,adm_lvl23_per_1000,%emerg_lvl23,avg_age,%female,%white,%BAME,%asian,%black,avg_CCI,%cancer,avg_ncepod,avg_severity,%cancelled,%cancelled_nobed,%level0-1_crit_care,%level0-1_crit_care_7d,LOS_mean,LOS_median,LOS1_mean,LOS1_median,LOS23_mean,LOS23_median,%died,%died6m,%clin_j,%cpet,%risk_score,pred_mortality,pred_morbidity,%sop,%ref_over8,%ref_1_7,%ref_otd,%_1_ref_over8,%_1_ref_1_7,%_1_ref_otd,%_23_ref_over8,%_23_ref_1_7,%_23_ref_otd,%1stepdowns,%1stepdowns_nobed,%23to0stepdowns,%23to0stepdowns_nobed,%23to1stepdowns,%23to1stepdowns_nobed
7A1A4,Wrexham Maelor Hospital,2,Betsi Cadwalader University HB,1,1,0,1,1,"5, 10, 11, 13, 15, 16",0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,1.5,2.0,1,0,0,1,1,1,0,0,1,1,0,1,1,1,1.0,0.0,0,3,3,0,0,0,0,12,12,0,12,12,0,2530.0,0.587747,1043.0,0.233202,35.968379,0.0,35.474593,32.598274,33.557047,0.028571,0.958773,0.0,2.876318,1.0,67.5,0.473684,1.0,0.0,0.0,0.0,3.421053,0.605263,3.710526,3.894737,0.0,0.0,0.0,0.0,1.131579,1.0,1.026316,1.0,1.833333,1.0,0.026316,0.026316,0.921053,0.0,0.0,,,0.052632,0.526316,0.263158,0.210526,0.540541,0.27027,0.189189,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
R0A02,Manchester Royal Infirmary,1,NHS Greater Manchester ICB,1,1,1,1,1,"2, 5, 8, 10, 11, 13, 14, 15, 16",0,0,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,2.0,3.0,0,1,0,1,1,1,1,0,0,1,1,1,0,0,1.0,0.0,9,9,9,0,20,20,0,28,28,0,48,48,9,2692.0,0.251857,2014.0,0.452956,70.208024,5.958292,68.520357,64.548163,73.982125,0.127517,42.701092,30.287984,38.232373,0.207792,64.598214,0.379464,0.772321,0.227679,0.044643,0.03125,4.571429,0.620536,3.15625,4.3125,0.053571,0.053571,0.105634,0.049296,10.990566,7.0,3.310345,3.0,5.662651,3.0,0.033019,0.014151,0.214286,0.236607,0.191964,0.044412,0.117742,0.678571,0.678571,0.241071,0.098214,0.630435,0.268116,0.101449,0.755814,0.197674,0.093023,0.0,0.0,0.0,0.0,0.141026,0.012821
RAPD3,North Middlesex Hospital,2,NHS North Central London ICB,0,0,1,1,1,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,11,11,0,12,12,0,23,23,0,2585.0,0.642166,925.0,0.157801,149.323017,0.0,0.0,0.0,0.0,0.0,9.72973,1.081081,30.27027,0.964286,69.0,0.666667,0.444444,0.555556,0.333333,0.222222,4.111111,0.222222,3.555556,5.0,0.0,0.0,0.0,0.0,22.555556,8.0,,,3.0,3.0,0.0,0.0,0.777778,0.0,0.111111,0.57,0.63,0.111111,0.111111,0.111111,0.777778,,,,0.111111,0.111111,0.777778,,,0.777778,0.0,0.0,0.0
RFRPA,Rotherham Hospital,2,NHS South Yorkshire ICB,0,0,1,1,1,"5, 11, 13",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,8,8,0,5,5,0,13,13,0,2494.0,0.492783,1265.0,0.200428,96.631917,6.324111,0.0,0.0,0.0,0.0,23.715415,11.067194,38.735178,0.714286,67.275862,0.448276,0.965517,0.034483,0.0,0.0,3.137931,0.344828,3.655172,3.724138,0.448276,0.241379,0.0,0.0,6.9375,5.5,,,1.6,1.0,0.125,0.125,0.310345,0.206897,0.344828,0.0313,0.1,0.068966,0.827586,0.137931,0.034483,,,,0.827586,0.137931,0.034483,,,0.0625,0.0,0.0,0.0
RJ1AK,William Harvey Hospital,2,NHS Kent and Medway ICB,0,0,0,1,1,"6, 10, 11, 13,15",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,18,18,0,18,18,0,2510.0,0.476494,1314.0,0.57367,49.800797,0.0,0.0,0.0,0.0,0.0,42.61796,13.69863,47.945205,0.714286,69.218182,0.309091,0.872727,0.127273,0.0,0.0,3.618182,0.745455,3.945455,3.781818,0.0,0.0,0.485714,0.0,12.763636,6.0,1.0,1.0,3.567568,3.0,0.0,0.0,0.690909,0.036364,0.0,,,0.272727,0.709091,0.290909,0.0,,,,0.709091,0.290909,0.0,,,0.636364,0.0,0.0,0.0
RJC02,Warwick Hospital,2,NHS Coventry and Warwickshire ICB,0,0,1,1,1,"5, 13",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,2,2,0,5,5,0,7,7,0,4021.0,0.54663,1823.0,0.087514,53.220592,0.0,0.0,0.0,0.0,0.0,2.742732,2.194185,2.194185,0.0,69.6,0.4,0.8,0.2,0.2,0.0,2.6,0.4,3.4,3.4,0.0,0.0,1.0,1.0,14.4,2.0,,,14.0,1.0,0.0,0.0,0.8,0.0,0.0,,,0.2,0.0,0.4,0.6,,,,0.0,0.4,0.6,,,0.2,0.0,0.0,0.0
RMC01,Royal Bolton Hospital,2,NHS Greater Manchester ICB,0,0,1,1,1,"13, 15",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,10,10,0,8,8,0,18,18,0,3331.0,0.627439,1241.0,0.425806,165.716001,0.0,0.0,0.0,0.0,0.0,27.39726,5.640612,30.620467,0.815789,63.088235,0.558824,0.911765,0.088235,0.058824,0.0,3.5,0.235294,4.0,3.205882,0.117647,0.0,0.0,0.0,10.533333,4.0,,,7.142857,7.0,0.033333,0.033333,0.588235,0.0,0.411765,1.434407,13.453333,0.0,0.588235,0.352941,0.058824,,,,0.588235,0.352941,0.058824,,,0.766667,0.0,0.0,0.0
RQM01,Chelsea and Westminster Hospital,1,NHS North West London ICB,1,1,0,1,1,"1, 3, 5, 10, 11, 13",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,1.0,0,1,0,1,0,1,1,0,1,0,0,1,0,1,1.0,0.0,0,0,4,0,0,0,0,14,22,0,14,22,0,3111.0,0.497911,1562.0,0.15802,9.000321,0.0,0.0,0.0,0.0,0.0,17.285531,17.285531,35.211268,0.509091,64.555556,0.407407,0.518519,0.481481,0.074074,0.148148,3.222222,0.555556,3.703704,4.703704,0.0,0.0,0.0,0.0,11.925926,8.0,,,1.851852,2.0,0.0,0.0,0.296296,0.0,0.259259,0.190833,0.471667,0.555556,0.296296,0.555556,0.148148,,,,0.296296,0.555556,0.148148,,,0.0,0.0,0.0,0.0
RWDDA,Lincoln County Hospital,2,NHS Lincolnshire ICB,1,1,0,1,1,"5, 11, 13, 15",0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,2.67,3.67,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1.0,0.0,0,8,10,0,0,0,0,11,15,0,11,15,0,2085.0,0.294964,1470.0,0.494484,130.935252,4.081633,49.659864,38.095238,57.823129,0.341176,11.564626,4.761905,36.054422,0.867925,66.829545,0.409091,0.965909,0.034091,0.034091,0.0,3.25,0.545455,3.056818,3.909091,0.159091,0.056818,0.063492,0.031746,6.297297,4.0,3.238806,2.0,1.928571,1.0,0.040541,0.040541,0.988636,0.0,0.0,,,0.0,0.443182,0.306818,0.261364,0.450704,0.267606,0.28169,0.411765,0.470588,0.176471,0.063492,0.0,0.090909,0.0,0.181818,0.0
RXR20,Royal Blackburn Teaching Hospital,1,NHS Lancashire and South Cumbria ICB,1,1,1,1,1,"8, 11, 13, 16",0,1,1,0,1,1,0,0,0,1,1,0,0,1,1,1,0,3.0,4.0,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1.0,0.0,0,4,4,0,6,6,0,18,22,0,24,28,0,6768.0,0.509013,3323.0,0.094379,83.628842,0.0,21.667168,19.560638,25.880229,0.244186,22.8709,18.356906,49.352994,0.628049,65.328859,0.42953,0.791946,0.208054,0.073826,0.006711,3.758389,0.630872,3.852349,4.409396,0.053691,0.0,0.025974,0.025974,13.375887,8.0,3.436782,3.0,4.03125,3.0,0.078014,0.078014,0.409396,0.33557,0.42953,0.038409,0.204138,0.006711,0.510067,0.167785,0.322148,0.561644,0.123288,0.315068,0.460526,0.210526,0.328947,0.0,0.0,0.0,0.0,0.148649,0.0


In [237]:
#test cell
#hosp_data = data_load(paired[5][0], paired[5][1])
#pat_data = pat_values(hosp_data[1])
#display(pat_data)
#ts_data = ts_values(hosp_data[2])
#display(ts_data)

In [None]:
#Analysis
#Code to summarise the key statistics from time series and patient CRF and save them in a dataframe where the index is hospital
#Code to summarise patient characteristics by saving all patients into 1 dataframe, add hospital details for each patient
#Look at characteristics of the patients referred to level 1 versus other levels
#Analyse the impact of particular structures of enhanced care (e.g. nurse referral) on what type of patients get admitted, referral to capacity ratio etc
#Penalisation via LASSO for analysis
#Look into fragility index for conclusions
#Calculate referral to resource ratio
#Patient dataset should look at the characteristics of those referred to level 1 vs level 2/3 ?lower CCI/acuity
#Quantify the number of patients that were referred to level 1 that were cancelled due to a lack of bed, compare to level 2/3 ?were any level 1 patients cancelled due to a lack of bed
#Compare cancellations due to a lack of bed between the time-series and patient CRFs and assess concordance