# Load Libraries

In [None]:
import pandas as pd
import openpyxl
import csv
import tqdm
import ast
import datetime
import math
import numpy as np
import os
import shutil
import gc
from datetime import datetime
import pytz
import sys
import glob

# Set base directory

In [None]:
HOME_DIR =  "/data/pathogen_ncd"

# Helper Functions

In [None]:
# Simple function specific to ICD10 codes to convert how UKB stores ICD codes 
# (no period) to the normal convention, with a period.
def fix_icd10(curr_icd):
    
    if len(curr_icd) > 3:
        new_icd = f"{curr_icd[:3]}.{curr_icd[3:]}"  
    else:
        new_icd = curr_icd
        
    return new_icd
    
# A bit more complicated function specific to ICD9 to convert how UKB stores ICD 
# codes (no period) to the normal convention, with a period.
def fix_icd9(curr_icd):
    
    curr_icd = str(curr_icd)
    # We need a decimal point!
    if len(curr_icd) > 3:
        
        # Need to deal with special E or V ICD9 codes
        # https://en.wikipedia.org/wiki/List_of_ICD-9_codes_E_and_V_codes:_external_causes_of_injury_and_supplemental_classification
        if (('E' in curr_icd) or ('V' in curr_icd)):
            # Remove the original decimal point
            new_icd = curr_icd.replace('.', '')

            # For these we need to do 4 chars, the letter and then the 3 numbers before period.
            new_icd = f"{new_icd[:4]}.{new_icd[4:]}"

            
        else:

            # Remove the original decimal point
            new_icd = curr_icd.replace('.', '')
            new_icd = f"{new_icd[:3]}.{new_icd[3:]}"


    else:
        new_icd = curr_icd.replace('.', '')
        print(f"Non-standard ICD9 code found: {curr_icd}  --> Setting to {new_icd}")
        
    
    
    return new_icd

# Convert a float date to datetime. Also deals with fractional years.
def fl_to_dt(in_float):
    yr = int(in_float)
    frac = in_float - yr
    
    # Using 365.25 for leap years
    tot_days = 365.25 * frac
    date = datetime.datetime(yr, 1, 1) + datetime.timedelta(days = tot_days)
    
    # Convert to YYYY-MM-DD format
    return date.strftime('%Y-%m-%d')

# Processing Raw Data

In [None]:
# PCP: gp_clin.tsv [3.8 GB]

# Rest of diags we used in our original analysis [636 MB]
# Hospital inpatient
# Death registry
# Cancer registry
# Self-report both cancer and non-cancer

## PCP/GP data

In [None]:
# Using data from Denaxas et al. (PMID: 35308936) to convert PCP/GP
# read codes over to ICD10 codes, which can then translate into 
# Phecodes

UKB_RAW_DIR = f'{HOME_DIR}/phecode/ukb/ukb_raw'
OUT_DIR = f'{HOME_DIR}/phecode/ukb/ukb_proc'
dat = pd.read_csv(f'{UKB_RAW_DIR}/base_data.tsv', sep = '\t')
dd  = pd.read_csv(f'{UKB_RAW_DIR}/data_dict.tsv', sep = '\t')
gp  = pd.read_csv(f'{UKB_RAW_DIR}/gp_clin.tsv', sep = '\t')

# 118,422,458
print(len(gp))

# From Spiros' Github (https://github.com/spiros/ukbiobank-read-to-phecode/tree/master) 
# and confirmed by them that these were the final version of the files used for
# their paper (PMC8861677)
UKB_GP_DICT_DIR = f"{HOME_DIR}/phecode/misc/ukb_gp_to_icd10_to_phecode"
v2 = pd.read_csv(f'{UKB_GP_DICT_DIR}/read2_to_phecode.csv', sep = ',')
v3 = pd.read_csv(f'{UKB_GP_DICT_DIR}/ctv3_to_phecode.csv', sep = ',')


# Extact the people with titer data to make to limit diag data 
# and make it much more manageable
cov_dat = f'{HOME_DIR}/procd/cov_dat.csv'
covs = pd.read_csv(cov_dat, sep = ',')
pat_ls = covs['eid'].unique().tolist()

demo = pd.read_csv(f'{UKB_RAW_DIR}/demo_data_procd.tsv', sep = '\t')
demo = demo.loc[demo['eid'].isin(pat_ls), :]

# 9,427 - Interesting... should be 9,429. Most likely 2 additional people 
# withdrew and I can't get their data anymore.
print(len(demo))

In [None]:
# Limit to just people with titer data (which should speed up all downstream calculations)
gp = gp.loc[gp['eid'].isin(pat_ls), :]

v2.columns = ['read_2', 'read2_icd10', 'read2_phecode']
v3.columns = ['read_3', 'read3_icd10', 'read3_phecode']

# Merge on Spiros' ICD codes and Phecodes
gp = gp.merge(v2, how = 'left', on = 'read_2')
gp = gp.merge(v3, how = 'left', on = 'read_3')

# 0: So none have an ICD from both read2 and read3 which I see as a good thing.
print(len(gp.loc[(~gp['read3_icd10'].isna()) & (~gp['read2_icd10'].isna()), :]))

# Most come from CTV3 codes which I also see as a good thing since that was the
# more recent coding system as more doctors shifted to EHRs

# 86,037
print(len(gp.loc[~gp['read3_icd10'].isna(), :]))

# 28,671
print(len(gp.loc[~gp['read2_icd10'].isna(), :]))

# Create the final ICD from whichever read2 or read3 is not NA
gp['fin_icd'] = np.where(~gp['read2_icd10'].isna(), gp['read2_icd10'], gp['read3_icd10'])

# 2,274,376
print(len(gp))

# Now drop any rows that didn't get an ICD10 code because they were filtered
# out by Spiros
clean_gp = gp.loc[gp['fin_icd'].notna(), :].copy(deep = True)

# Now just select the columns we need and save the clean copy
clean_gp = clean_gp.loc[:, ['eid', 'event_dt', 'fin_icd']]

# 114,708
print(len(clean_gp))

In [None]:
clean_gp.to_csv(f'{OUT_DIR}/gp/gp_prepped_for_phecode.tsv', sep = '\t', index = False)

## Rename a bunch of the columns from the UKB ID

In [None]:
# New this time:
# p20001 : self-report cancer code
# p20002 : self-report non-cancer diagnosis code
# p20006 : Interpolated yr self-report cancer diagnosed
# p20008 : Interpolated yr self-report non-cancer diagnosis code diagnosed

add_info_dict = {   'p40000_i0' :  'death_date_inst_0_ent_0',
                    'p40000_i1' : 'death_date_inst_1_ent_0',


                    'p40001_i0' : 'death_primary_cause_icd10_inst_0_ent_0',
                    'p40001_i1' : 'death_primary_cause_icd10_inst_1_ent_0',


                    'p40002_i0_a1' : 'death_secondary_cause_icd10_inst_0_ent_1',
                    'p40002_i0_a2' : 'death_secondary_cause_icd10_inst_0_ent_2',
                    'p40002_i0_a3' : 'death_secondary_cause_icd10_inst_0_ent_3',
                    'p40002_i0_a4' : 'death_secondary_cause_icd10_inst_0_ent_4',
                    'p40002_i0_a5' : 'death_secondary_cause_icd10_inst_0_ent_5',
                    'p40002_i0_a6' : 'death_secondary_cause_icd10_inst_0_ent_6',
                    'p40002_i0_a7' : 'death_secondary_cause_icd10_inst_0_ent_7',
                    'p40002_i0_a8' : 'death_secondary_cause_icd10_inst_0_ent_8',
                    'p40002_i0_a9' : 'death_secondary_cause_icd10_inst_0_ent_9',
                    'p40002_i0_a10' :  'death_secondary_cause_icd10_inst_0_ent_10',
                    'p40002_i0_a11' :  'death_secondary_cause_icd10_inst_0_ent_11',
                    'p40002_i0_a12' :  'death_secondary_cause_icd10_inst_0_ent_12',
                    'p40002_i0_a13' :  'death_secondary_cause_icd10_inst_0_ent_13',
                    'p40002_i0_a14' :  'death_secondary_cause_icd10_inst_0_ent_14',
                    'p40002_i1_a1' : 'death_secondary_cause_icd10_inst_1_ent_1',
                    'p40002_i1_a2' : 'death_secondary_cause_icd10_inst_1_ent_2',
                    'p40002_i1_a3' : 'death_secondary_cause_icd10_inst_1_ent_3',
                    'p40002_i1_a4' : 'death_secondary_cause_icd10_inst_1_ent_4',
                    'p40002_i1_a5' : 'death_secondary_cause_icd10_inst_1_ent_5',
                    'p40002_i1_a6' : 'death_secondary_cause_icd10_inst_1_ent_6',
                    'p40002_i1_a7' : 'death_secondary_cause_icd10_inst_1_ent_7',
                    'p40002_i1_a8' : 'death_secondary_cause_icd10_inst_1_ent_8',
                    'p40002_i1_a9' : 'death_secondary_cause_icd10_inst_1_ent_9',
                    'p40002_i1_a10' :  'death_secondary_cause_icd10_inst_1_ent_10',
                    'p40002_i1_a11' :  'death_secondary_cause_icd10_inst_1_ent_11',
                    'p40002_i1_a12' :  'death_secondary_cause_icd10_inst_1_ent_12',
                    'p40002_i1_a13' :  'death_secondary_cause_icd10_inst_1_ent_13',
                    'p40002_i1_a14' :  'death_secondary_cause_icd10_inst_1_ent_14',


                    'p40005_i0' : 'cancer_diag_date_inst_0_ent_0',
                    'p40005_i1' : 'cancer_diag_date_inst_1_ent_0',
                    'p40005_i2' : 'cancer_diag_date_inst_2_ent_0',
                    'p40005_i3' : 'cancer_diag_date_inst_3_ent_0',
                    'p40005_i4' : 'cancer_diag_date_inst_4_ent_0',
                    'p40005_i5' : 'cancer_diag_date_inst_5_ent_0',
                    'p40005_i6' : 'cancer_diag_date_inst_6_ent_0',
                    'p40005_i7' : 'cancer_diag_date_inst_7_ent_0',
                    'p40005_i8' : 'cancer_diag_date_inst_8_ent_0',
                    'p40005_i9' : 'cancer_diag_date_inst_9_ent_0',
                    'p40005_i10' :  'cancer_diag_date_inst_10_ent_0',
                    'p40005_i11' :  'cancer_diag_date_inst_11_ent_0',
                    'p40005_i12' :  'cancer_diag_date_inst_12_ent_0',
                    'p40005_i13' :  'cancer_diag_date_inst_13_ent_0',
                    'p40005_i14' :  'cancer_diag_date_inst_14_ent_0',
                    'p40005_i15' :  'cancer_diag_date_inst_15_ent_0',
                    'p40005_i16' :  'cancer_diag_date_inst_16_ent_0',
                    'p40005_i17' :  'cancer_diag_date_inst_17_ent_0',
                    'p40005_i18' : 'cancer_diag_date_inst_18_ent_0',
                    'p40005_i19' : 'cancer_diag_date_inst_19_ent_0',
                    'p40005_i20' : 'cancer_diag_date_inst_20_ent_0',
                    'p40005_i21' : 'cancer_diag_date_inst_21_ent_0',   

                    'p40006_i0' : 'cancer_icd10_inst_0_ent_0',
                    'p40006_i1' : 'cancer_icd10_inst_1_ent_0',
                    'p40006_i2' : 'cancer_icd10_inst_2_ent_0',
                    'p40006_i3' : 'cancer_icd10_inst_3_ent_0',
                    'p40006_i4' : 'cancer_icd10_inst_4_ent_0',
                    'p40006_i5' : 'cancer_icd10_inst_5_ent_0',
                    'p40006_i6' : 'cancer_icd10_inst_6_ent_0',
                    'p40006_i7' : 'cancer_icd10_inst_7_ent_0',
                    'p40006_i8' : 'cancer_icd10_inst_8_ent_0',
                    'p40006_i9' : 'cancer_icd10_inst_9_ent_0',
                    'p40006_i10' :  'cancer_icd10_inst_10_ent_0',
                    'p40006_i11' :  'cancer_icd10_inst_11_ent_0',
                    'p40006_i12' :  'cancer_icd10_inst_12_ent_0',
                    'p40006_i13' :  'cancer_icd10_inst_13_ent_0',
                    'p40006_i14' :  'cancer_icd10_inst_14_ent_0',
                    'p40006_i15' :  'cancer_icd10_inst_15_ent_0',
                    'p40006_i16' :  'cancer_icd10_inst_16_ent_0',
                    'p40006_i17' : 'cancer_icd10_inst_17_ent_0',
                    'p40006_i18' : 'cancer_icd10_inst_18_ent_0',
                    'p40006_i19' : 'cancer_icd10_inst_19_ent_0',
                    'p40006_i20' : 'cancer_icd10_inst_20_ent_0',
                    'p40006_i21' : 'cancer_icd10_inst_21_ent_0',    

                    'p40007_i0' : 'death_age_inst_0_ent_0',
                    'p40007_i1' : 'death_age_inst_1_ent_0',


                    'p40008_i0' : 'cancer_diag_age_inst_0_ent_0',
                    'p40008_i1' : 'cancer_diag_age_inst_1_ent_0',
                    'p40008_i2' : 'cancer_diag_age_inst_2_ent_0',
                    'p40008_i3' : 'cancer_diag_age_inst_3_ent_0',
                    'p40008_i4' : 'cancer_diag_age_inst_4_ent_0',
                    'p40008_i5' : 'cancer_diag_age_inst_5_ent_0',
                    'p40008_i6' : 'cancer_diag_age_inst_6_ent_0',
                    'p40008_i7' : 'cancer_diag_age_inst_7_ent_0',
                    'p40008_i8' : 'cancer_diag_age_inst_8_ent_0',
                    'p40008_i9' : 'cancer_diag_age_inst_9_ent_0',
                    'p40008_i10' :  'cancer_diag_age_inst_10_ent_0',
                    'p40008_i11' :  'cancer_diag_age_inst_11_ent_0',
                    'p40008_i12' :  'cancer_diag_age_inst_12_ent_0',
                    'p40008_i13' :  'cancer_diag_age_inst_13_ent_0',
                    'p40008_i14' :  'cancer_diag_age_inst_14_ent_0',
                    'p40008_i15' :  'cancer_diag_age_inst_15_ent_0',
                    'p40008_i16' :  'cancer_diag_age_inst_16_ent_0',
                    'p40008_i17' : 'cancer_diag_age_inst_17_ent_0',
                    'p40008_i18' : 'cancer_diag_age_inst_18_ent_0',
                    'p40008_i19' : 'cancer_diag_age_inst_19_ent_0',
                    'p40008_i20' : 'cancer_diag_age_inst_20_ent_0',
                    'p40008_i21' : 'cancer_diag_age_inst_21_ent_0',     

                    'p40013_i0' : 'cancer_icd9_inst_0_ent_0',
                    'p40013_i1' : 'cancer_icd9_inst_1_ent_0',
                    'p40013_i2' : 'cancer_icd9_inst_2_ent_0',
                    'p40013_i3' : 'cancer_icd9_inst_3_ent_0',
                    'p40013_i4' : 'cancer_icd9_inst_4_ent_0',
                    'p40013_i5' : 'cancer_icd9_inst_5_ent_0',
                    'p40013_i6' : 'cancer_icd9_inst_6_ent_0',
                    'p40013_i7' : 'cancer_icd9_inst_7_ent_0',
                    'p40013_i8' : 'cancer_icd9_inst_8_ent_0',
                    'p40013_i9' : 'cancer_icd9_inst_9_ent_0',
                    'p40013_i10' :  'cancer_icd9_inst_10_ent_0',
                    'p40013_i11' :  'cancer_icd9_inst_11_ent_0',
                    'p40013_i12' :  'cancer_icd9_inst_12_ent_0',
                    'p40013_i13' :  'cancer_icd9_inst_13_ent_0',
                    'p40013_i14' :  'cancer_icd9_inst_14_ent_0',


                    'p41201' : 'hospital_external_cause_icd10_inst_0_ent_0',


                    'p41202' : 'hospital_main_diag_icd10_inst_0_ent_0',


                    'p41203' : 'hospital_main_diag_icd9_inst_0_ent_0',


                    'p41204' : 'hospital_second_diag_icd10_inst_0_ent_0',


                    'p41205' : 'hospital_second_diag_icd9_inst_0_ent_0',

                    'p41262_a0' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_0',
                    'p41262_a1' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_1',
                    'p41262_a2' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_2',
                    'p41262_a3' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_3',
                    'p41262_a4' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_4',
                    'p41262_a5' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_5',
                    'p41262_a6' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_6',
                    'p41262_a7' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_7',
                    'p41262_a8' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_8',
                    'p41262_a9' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_9',
                    'p41262_a10' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_10',
                    'p41262_a11' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_11',
                    'p41262_a12' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_12',
                    'p41262_a13' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_13',
                    'p41262_a14' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_14',
                    'p41262_a15' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_15',
                    'p41262_a16' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_16',
                    'p41262_a17' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_17',
                    'p41262_a18' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_18',
                    'p41262_a19' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_19',
                    'p41262_a20' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_20',
                    'p41262_a21' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_21',
                    'p41262_a22' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_22',
                    'p41262_a23' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_23',
                    'p41262_a24' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_24',
                    'p41262_a25' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_25',
                    'p41262_a26' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_26',
                    'p41262_a27' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_27',
                    'p41262_a28' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_28',
                    'p41262_a29' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_29',
                    'p41262_a30' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_30',
                    'p41262_a31' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_31',
                    'p41262_a32' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_32',
                    'p41262_a33' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_33',
                    'p41262_a34' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_34',
                    'p41262_a35' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_35',
                    'p41262_a36' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_36',
                    'p41262_a37' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_37',
                    'p41262_a38' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_38',
                    'p41262_a39' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_39',
                    'p41262_a40' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_40',
                    'p41262_a41' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_41',
                    'p41262_a42' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_42',
                    'p41262_a43' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_43',
                    'p41262_a44' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_44',
                    'p41262_a45' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_45',
                    'p41262_a46' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_46',
                    'p41262_a47' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_47',
                    'p41262_a48' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_48',
                    'p41262_a49' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_49',
                    'p41262_a50' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_50',
                    'p41262_a51' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_51',
                    'p41262_a52' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_52',
                    'p41262_a53' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_53',
                    'p41262_a54' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_54',
                    'p41262_a55' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_55',
                    'p41262_a56' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_56',
                    'p41262_a57' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_57',
                    'p41262_a58' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_58',
                    'p41262_a59' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_59',
                    'p41262_a60' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_60',
                    'p41262_a61' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_61',
                    'p41262_a62' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_62',
                    'p41262_a63' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_63',
                    'p41262_a64' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_64',
                    'p41262_a65' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_65',
                    'p41262_a66' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_66',
                    'p41262_a67' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_67',
                    'p41262_a68' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_68',
                    'p41262_a69' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_69',
                    'p41262_a70' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_70',
                    'p41262_a71' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_71',
                    'p41262_a72' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_72',
                    'p41262_a73' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_73',
                    'p41262_a74' :  'hospital_date_of_first_main_diag_icd10_inst_0_ent_74',
                    'p41262_a75' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_75',
                    'p41262_a76' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_76',
                    'p41262_a77' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_77',
                    'p41262_a78' : 'hospital_date_of_first_main_diag_icd10_inst_0_ent_78',


                    'p41263_a0' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_0', 
                    'p41263_a1' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_1', 
                    'p41263_a2' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_2', 
                    'p41263_a3' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_3', 
                    'p41263_a4' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_4', 
                    'p41263_a5' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_5', 
                    'p41263_a6' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_6', 
                    'p41263_a7' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_7', 
                    'p41263_a8' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_8', 
                    'p41263_a9' : 'hospital_date_of_first_main_diag_icd9_inst_0_ent_9', 
                    'p41263_a10' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_10',
                    'p41263_a11' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_11',
                    'p41263_a12' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_12',
                    'p41263_a13' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_13',
                    'p41263_a14' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_14',
                    'p41263_a15' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_15',
                    'p41263_a16' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_16',
                    'p41263_a17' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_17',
                    'p41263_a18' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_18',
                    'p41263_a19' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_19',
                    'p41263_a20' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_20',
                    'p41263_a21' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_21',
                    'p41263_a22' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_22',
                    'p41263_a23' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_23',
                    'p41263_a24' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_24',
                    'p41263_a25' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_25',
                    'p41263_a26' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_26',
                    'p41263_a27' :  'hospital_date_of_first_main_diag_icd9_inst_0_ent_27',


                    'p41270' : 'hospital_diags_icd10_inst_0_ent_0',


                    'p41271' : 'hospital_diags_icd9_inst_0_ent_0',


                    'p41280_a0' : 'hospital_date_of_first_diags_icd10_inst_0_ent_0', 
                    'p41280_a1' : 'hospital_date_of_first_diags_icd10_inst_0_ent_1', 
                    'p41280_a2' : 'hospital_date_of_first_diags_icd10_inst_0_ent_2', 
                    'p41280_a3' : 'hospital_date_of_first_diags_icd10_inst_0_ent_3', 
                    'p41280_a4' : 'hospital_date_of_first_diags_icd10_inst_0_ent_4', 
                    'p41280_a5' : 'hospital_date_of_first_diags_icd10_inst_0_ent_5', 
                    'p41280_a6' : 'hospital_date_of_first_diags_icd10_inst_0_ent_6', 
                    'p41280_a7' : 'hospital_date_of_first_diags_icd10_inst_0_ent_7', 
                    'p41280_a8' : 'hospital_date_of_first_diags_icd10_inst_0_ent_8', 
                    'p41280_a9' : 'hospital_date_of_first_diags_icd10_inst_0_ent_9', 
                    'p41280_a10' :  'hospital_date_of_first_diags_icd10_inst_0_ent_10', 
                    'p41280_a11' :  'hospital_date_of_first_diags_icd10_inst_0_ent_11', 
                    'p41280_a12' :  'hospital_date_of_first_diags_icd10_inst_0_ent_12', 
                    'p41280_a13' :  'hospital_date_of_first_diags_icd10_inst_0_ent_13', 
                    'p41280_a14' :  'hospital_date_of_first_diags_icd10_inst_0_ent_14', 
                    'p41280_a15' :  'hospital_date_of_first_diags_icd10_inst_0_ent_15', 
                    'p41280_a16' :  'hospital_date_of_first_diags_icd10_inst_0_ent_16', 
                    'p41280_a17' :  'hospital_date_of_first_diags_icd10_inst_0_ent_17', 
                    'p41280_a18' :  'hospital_date_of_first_diags_icd10_inst_0_ent_18', 
                    'p41280_a19' :  'hospital_date_of_first_diags_icd10_inst_0_ent_19', 
                    'p41280_a20' :  'hospital_date_of_first_diags_icd10_inst_0_ent_20', 
                    'p41280_a21' :  'hospital_date_of_first_diags_icd10_inst_0_ent_21', 
                    'p41280_a22' :  'hospital_date_of_first_diags_icd10_inst_0_ent_22', 
                    'p41280_a23' :  'hospital_date_of_first_diags_icd10_inst_0_ent_23', 
                    'p41280_a24' :  'hospital_date_of_first_diags_icd10_inst_0_ent_24', 
                    'p41280_a25' :  'hospital_date_of_first_diags_icd10_inst_0_ent_25', 
                    'p41280_a26' :  'hospital_date_of_first_diags_icd10_inst_0_ent_26', 
                    'p41280_a27' :  'hospital_date_of_first_diags_icd10_inst_0_ent_27', 
                    'p41280_a28' :  'hospital_date_of_first_diags_icd10_inst_0_ent_28', 
                    'p41280_a29' :  'hospital_date_of_first_diags_icd10_inst_0_ent_29', 
                    'p41280_a30' :  'hospital_date_of_first_diags_icd10_inst_0_ent_30', 
                    'p41280_a31' :  'hospital_date_of_first_diags_icd10_inst_0_ent_31', 
                    'p41280_a32' :  'hospital_date_of_first_diags_icd10_inst_0_ent_32', 
                    'p41280_a33' :  'hospital_date_of_first_diags_icd10_inst_0_ent_33', 
                    'p41280_a34' :  'hospital_date_of_first_diags_icd10_inst_0_ent_34', 
                    'p41280_a35' :  'hospital_date_of_first_diags_icd10_inst_0_ent_35', 
                    'p41280_a36' :  'hospital_date_of_first_diags_icd10_inst_0_ent_36', 
                    'p41280_a37' :  'hospital_date_of_first_diags_icd10_inst_0_ent_37', 
                    'p41280_a38' :  'hospital_date_of_first_diags_icd10_inst_0_ent_38', 
                    'p41280_a39' :  'hospital_date_of_first_diags_icd10_inst_0_ent_39', 
                    'p41280_a40' :  'hospital_date_of_first_diags_icd10_inst_0_ent_40', 
                    'p41280_a41' :  'hospital_date_of_first_diags_icd10_inst_0_ent_41', 
                    'p41280_a42' :  'hospital_date_of_first_diags_icd10_inst_0_ent_42', 
                    'p41280_a43' :  'hospital_date_of_first_diags_icd10_inst_0_ent_43', 
                    'p41280_a44' :  'hospital_date_of_first_diags_icd10_inst_0_ent_44', 
                    'p41280_a45' :  'hospital_date_of_first_diags_icd10_inst_0_ent_45', 
                    'p41280_a46' :  'hospital_date_of_first_diags_icd10_inst_0_ent_46', 
                    'p41280_a47' :  'hospital_date_of_first_diags_icd10_inst_0_ent_47', 
                    'p41280_a48' :  'hospital_date_of_first_diags_icd10_inst_0_ent_48', 
                    'p41280_a49' :  'hospital_date_of_first_diags_icd10_inst_0_ent_49', 
                    'p41280_a50' :  'hospital_date_of_first_diags_icd10_inst_0_ent_50', 
                    'p41280_a51' :  'hospital_date_of_first_diags_icd10_inst_0_ent_51', 
                    'p41280_a52' :  'hospital_date_of_first_diags_icd10_inst_0_ent_52', 
                    'p41280_a53' :  'hospital_date_of_first_diags_icd10_inst_0_ent_53', 
                    'p41280_a54' :  'hospital_date_of_first_diags_icd10_inst_0_ent_54', 
                    'p41280_a55' :  'hospital_date_of_first_diags_icd10_inst_0_ent_55', 
                    'p41280_a56' :  'hospital_date_of_first_diags_icd10_inst_0_ent_56', 
                    'p41280_a57' :  'hospital_date_of_first_diags_icd10_inst_0_ent_57', 
                    'p41280_a58' :  'hospital_date_of_first_diags_icd10_inst_0_ent_58', 
                    'p41280_a59' :  'hospital_date_of_first_diags_icd10_inst_0_ent_59', 
                    'p41280_a60' :  'hospital_date_of_first_diags_icd10_inst_0_ent_60', 
                    'p41280_a61' :  'hospital_date_of_first_diags_icd10_inst_0_ent_61', 
                    'p41280_a62' :  'hospital_date_of_first_diags_icd10_inst_0_ent_62', 
                    'p41280_a63' :  'hospital_date_of_first_diags_icd10_inst_0_ent_63', 
                    'p41280_a64' :  'hospital_date_of_first_diags_icd10_inst_0_ent_64', 
                    'p41280_a65' :  'hospital_date_of_first_diags_icd10_inst_0_ent_65', 
                    'p41280_a66' :  'hospital_date_of_first_diags_icd10_inst_0_ent_66', 
                    'p41280_a67' :  'hospital_date_of_first_diags_icd10_inst_0_ent_67', 
                    'p41280_a68' :  'hospital_date_of_first_diags_icd10_inst_0_ent_68', 
                    'p41280_a69' :  'hospital_date_of_first_diags_icd10_inst_0_ent_69', 
                    'p41280_a70' :  'hospital_date_of_first_diags_icd10_inst_0_ent_70', 
                    'p41280_a71' :  'hospital_date_of_first_diags_icd10_inst_0_ent_71', 
                    'p41280_a72' :  'hospital_date_of_first_diags_icd10_inst_0_ent_72', 
                    'p41280_a73' :  'hospital_date_of_first_diags_icd10_inst_0_ent_73', 
                    'p41280_a74' :  'hospital_date_of_first_diags_icd10_inst_0_ent_74', 
                    'p41280_a75' :  'hospital_date_of_first_diags_icd10_inst_0_ent_75', 
                    'p41280_a76' :  'hospital_date_of_first_diags_icd10_inst_0_ent_76', 
                    'p41280_a77' :  'hospital_date_of_first_diags_icd10_inst_0_ent_77', 
                    'p41280_a78' :  'hospital_date_of_first_diags_icd10_inst_0_ent_78', 
                    'p41280_a79' :  'hospital_date_of_first_diags_icd10_inst_0_ent_79', 
                    'p41280_a80' :  'hospital_date_of_first_diags_icd10_inst_0_ent_80', 
                    'p41280_a81' :  'hospital_date_of_first_diags_icd10_inst_0_ent_81', 
                    'p41280_a82' :  'hospital_date_of_first_diags_icd10_inst_0_ent_82', 
                    'p41280_a83' :  'hospital_date_of_first_diags_icd10_inst_0_ent_83', 
                    'p41280_a84' :  'hospital_date_of_first_diags_icd10_inst_0_ent_84', 
                    'p41280_a85' :  'hospital_date_of_first_diags_icd10_inst_0_ent_85', 
                    'p41280_a86' :  'hospital_date_of_first_diags_icd10_inst_0_ent_86', 
                    'p41280_a87' :  'hospital_date_of_first_diags_icd10_inst_0_ent_87', 
                    'p41280_a88' :  'hospital_date_of_first_diags_icd10_inst_0_ent_88', 
                    'p41280_a89' :  'hospital_date_of_first_diags_icd10_inst_0_ent_89', 
                    'p41280_a90' :  'hospital_date_of_first_diags_icd10_inst_0_ent_90', 
                    'p41280_a91' :  'hospital_date_of_first_diags_icd10_inst_0_ent_91', 
                    'p41280_a92' :  'hospital_date_of_first_diags_icd10_inst_0_ent_92', 
                    'p41280_a93' :  'hospital_date_of_first_diags_icd10_inst_0_ent_93', 
                    'p41280_a94' :  'hospital_date_of_first_diags_icd10_inst_0_ent_94', 
                    'p41280_a95' :  'hospital_date_of_first_diags_icd10_inst_0_ent_95', 
                    'p41280_a96' :  'hospital_date_of_first_diags_icd10_inst_0_ent_96', 
                    'p41280_a97' :  'hospital_date_of_first_diags_icd10_inst_0_ent_97', 
                    'p41280_a98' :  'hospital_date_of_first_diags_icd10_inst_0_ent_98', 
                    'p41280_a99' :  'hospital_date_of_first_diags_icd10_inst_0_ent_99', 
                    'p41280_a100' :  'hospital_date_of_first_diags_icd10_inst_0_ent_100',
                    'p41280_a101' :  'hospital_date_of_first_diags_icd10_inst_0_ent_101',
                    'p41280_a102' :  'hospital_date_of_first_diags_icd10_inst_0_ent_102',
                    'p41280_a103' :  'hospital_date_of_first_diags_icd10_inst_0_ent_103',
                    'p41280_a104' :  'hospital_date_of_first_diags_icd10_inst_0_ent_104',
                    'p41280_a105' :  'hospital_date_of_first_diags_icd10_inst_0_ent_105',
                    'p41280_a106' :  'hospital_date_of_first_diags_icd10_inst_0_ent_106',
                    'p41280_a107' :  'hospital_date_of_first_diags_icd10_inst_0_ent_107',
                    'p41280_a108' :  'hospital_date_of_first_diags_icd10_inst_0_ent_108',
                    'p41280_a109' :  'hospital_date_of_first_diags_icd10_inst_0_ent_109',
                    'p41280_a110' :  'hospital_date_of_first_diags_icd10_inst_0_ent_110',
                    'p41280_a111' :  'hospital_date_of_first_diags_icd10_inst_0_ent_111',
                    'p41280_a112' :  'hospital_date_of_first_diags_icd10_inst_0_ent_112',
                    'p41280_a113' :  'hospital_date_of_first_diags_icd10_inst_0_ent_113',
                    'p41280_a114' :  'hospital_date_of_first_diags_icd10_inst_0_ent_114',
                    'p41280_a115' :  'hospital_date_of_first_diags_icd10_inst_0_ent_115',
                    'p41280_a116' :  'hospital_date_of_first_diags_icd10_inst_0_ent_116',
                    'p41280_a117' :  'hospital_date_of_first_diags_icd10_inst_0_ent_117',
                    'p41280_a118' :  'hospital_date_of_first_diags_icd10_inst_0_ent_118',
                    'p41280_a119' :  'hospital_date_of_first_diags_icd10_inst_0_ent_119',
                    'p41280_a120' :  'hospital_date_of_first_diags_icd10_inst_0_ent_120',
                    'p41280_a121' :  'hospital_date_of_first_diags_icd10_inst_0_ent_121',
                    'p41280_a122' :  'hospital_date_of_first_diags_icd10_inst_0_ent_122',
                    'p41280_a123' :  'hospital_date_of_first_diags_icd10_inst_0_ent_123',
                    'p41280_a124' :  'hospital_date_of_first_diags_icd10_inst_0_ent_124',
                    'p41280_a125' :  'hospital_date_of_first_diags_icd10_inst_0_ent_125',
                    'p41280_a126' :  'hospital_date_of_first_diags_icd10_inst_0_ent_126',
                    'p41280_a127' :  'hospital_date_of_first_diags_icd10_inst_0_ent_127',
                    'p41280_a128' :  'hospital_date_of_first_diags_icd10_inst_0_ent_128',
                    'p41280_a129' :  'hospital_date_of_first_diags_icd10_inst_0_ent_129',
                    'p41280_a130' :  'hospital_date_of_first_diags_icd10_inst_0_ent_130',
                    'p41280_a131' :  'hospital_date_of_first_diags_icd10_inst_0_ent_131',
                    'p41280_a132' :  'hospital_date_of_first_diags_icd10_inst_0_ent_132',
                    'p41280_a133' :  'hospital_date_of_first_diags_icd10_inst_0_ent_133',
                    'p41280_a134' :  'hospital_date_of_first_diags_icd10_inst_0_ent_134',
                    'p41280_a135' :  'hospital_date_of_first_diags_icd10_inst_0_ent_135',
                    'p41280_a136' :  'hospital_date_of_first_diags_icd10_inst_0_ent_136',
                    'p41280_a137' :  'hospital_date_of_first_diags_icd10_inst_0_ent_137',
                    'p41280_a138' :  'hospital_date_of_first_diags_icd10_inst_0_ent_138',
                    'p41280_a139' :  'hospital_date_of_first_diags_icd10_inst_0_ent_139',
                    'p41280_a140' :  'hospital_date_of_first_diags_icd10_inst_0_ent_140',
                    'p41280_a141' :  'hospital_date_of_first_diags_icd10_inst_0_ent_141',
                    'p41280_a142' :  'hospital_date_of_first_diags_icd10_inst_0_ent_142',
                    'p41280_a143' :  'hospital_date_of_first_diags_icd10_inst_0_ent_143',
                    'p41280_a144' :  'hospital_date_of_first_diags_icd10_inst_0_ent_144',
                    'p41280_a145' :  'hospital_date_of_first_diags_icd10_inst_0_ent_145',
                    'p41280_a146' :  'hospital_date_of_first_diags_icd10_inst_0_ent_146',
                    'p41280_a147' :  'hospital_date_of_first_diags_icd10_inst_0_ent_147',
                    'p41280_a148' :  'hospital_date_of_first_diags_icd10_inst_0_ent_148',
                    'p41280_a149' :  'hospital_date_of_first_diags_icd10_inst_0_ent_149',
                    'p41280_a150' :  'hospital_date_of_first_diags_icd10_inst_0_ent_150',
                    'p41280_a151' :  'hospital_date_of_first_diags_icd10_inst_0_ent_151',
                    'p41280_a152' :  'hospital_date_of_first_diags_icd10_inst_0_ent_152',
                    'p41280_a153' :  'hospital_date_of_first_diags_icd10_inst_0_ent_153',
                    'p41280_a154' :  'hospital_date_of_first_diags_icd10_inst_0_ent_154',
                    'p41280_a155' :  'hospital_date_of_first_diags_icd10_inst_0_ent_155',
                    'p41280_a156' :  'hospital_date_of_first_diags_icd10_inst_0_ent_156',
                    'p41280_a157' :  'hospital_date_of_first_diags_icd10_inst_0_ent_157',
                    'p41280_a158' :  'hospital_date_of_first_diags_icd10_inst_0_ent_158',
                    'p41280_a159' :  'hospital_date_of_first_diags_icd10_inst_0_ent_159',
                    'p41280_a160' :  'hospital_date_of_first_diags_icd10_inst_0_ent_160',
                    'p41280_a161' :  'hospital_date_of_first_diags_icd10_inst_0_ent_161',
                    'p41280_a162' :  'hospital_date_of_first_diags_icd10_inst_0_ent_162',
                    'p41280_a163' :  'hospital_date_of_first_diags_icd10_inst_0_ent_163',
                    'p41280_a164' :  'hospital_date_of_first_diags_icd10_inst_0_ent_164',
                    'p41280_a165' :  'hospital_date_of_first_diags_icd10_inst_0_ent_165',
                    'p41280_a166' :  'hospital_date_of_first_diags_icd10_inst_0_ent_166',
                    'p41280_a167' :  'hospital_date_of_first_diags_icd10_inst_0_ent_167',
                    'p41280_a168' :  'hospital_date_of_first_diags_icd10_inst_0_ent_168',
                    'p41280_a169' :  'hospital_date_of_first_diags_icd10_inst_0_ent_169',
                    'p41280_a170' :  'hospital_date_of_first_diags_icd10_inst_0_ent_170',
                    'p41280_a171' :  'hospital_date_of_first_diags_icd10_inst_0_ent_171',
                    'p41280_a172' :  'hospital_date_of_first_diags_icd10_inst_0_ent_172',
                    'p41280_a173' :  'hospital_date_of_first_diags_icd10_inst_0_ent_173',
                    'p41280_a174' :  'hospital_date_of_first_diags_icd10_inst_0_ent_174',
                    'p41280_a175' :  'hospital_date_of_first_diags_icd10_inst_0_ent_175',
                    'p41280_a176' :  'hospital_date_of_first_diags_icd10_inst_0_ent_176',
                    'p41280_a177' :  'hospital_date_of_first_diags_icd10_inst_0_ent_177',
                    'p41280_a178' :  'hospital_date_of_first_diags_icd10_inst_0_ent_178',
                    'p41280_a179' :  'hospital_date_of_first_diags_icd10_inst_0_ent_179',
                    'p41280_a180' :  'hospital_date_of_first_diags_icd10_inst_0_ent_180',
                    'p41280_a181' :  'hospital_date_of_first_diags_icd10_inst_0_ent_181',
                    'p41280_a182' :  'hospital_date_of_first_diags_icd10_inst_0_ent_182',
                    'p41280_a183' :  'hospital_date_of_first_diags_icd10_inst_0_ent_183',
                    'p41280_a184' :  'hospital_date_of_first_diags_icd10_inst_0_ent_184',
                    'p41280_a185' :  'hospital_date_of_first_diags_icd10_inst_0_ent_185',
                    'p41280_a186' :  'hospital_date_of_first_diags_icd10_inst_0_ent_186',
                    'p41280_a187' :  'hospital_date_of_first_diags_icd10_inst_0_ent_187',
                    'p41280_a188' :  'hospital_date_of_first_diags_icd10_inst_0_ent_188',
                    'p41280_a189' :  'hospital_date_of_first_diags_icd10_inst_0_ent_189',
                    'p41280_a190' :  'hospital_date_of_first_diags_icd10_inst_0_ent_190',
                    'p41280_a191' :  'hospital_date_of_first_diags_icd10_inst_0_ent_191',
                    'p41280_a192' :  'hospital_date_of_first_diags_icd10_inst_0_ent_192',
                    'p41280_a193' :  'hospital_date_of_first_diags_icd10_inst_0_ent_193',
                    'p41280_a194' :  'hospital_date_of_first_diags_icd10_inst_0_ent_194',
                    'p41280_a195' :  'hospital_date_of_first_diags_icd10_inst_0_ent_195',
                    'p41280_a196' :  'hospital_date_of_first_diags_icd10_inst_0_ent_196',
                    'p41280_a197' :  'hospital_date_of_first_diags_icd10_inst_0_ent_197',
                    'p41280_a198' :  'hospital_date_of_first_diags_icd10_inst_0_ent_198',
                    'p41280_a199' :  'hospital_date_of_first_diags_icd10_inst_0_ent_199',
                    'p41280_a200' :  'hospital_date_of_first_diags_icd10_inst_0_ent_200',
                    'p41280_a201' :  'hospital_date_of_first_diags_icd10_inst_0_ent_201',
                    'p41280_a202' :  'hospital_date_of_first_diags_icd10_inst_0_ent_202',
                    'p41280_a203' :  'hospital_date_of_first_diags_icd10_inst_0_ent_203',
                    'p41280_a204' :  'hospital_date_of_first_diags_icd10_inst_0_ent_204',
                    'p41280_a205' :  'hospital_date_of_first_diags_icd10_inst_0_ent_205',
                    'p41280_a206' :  'hospital_date_of_first_diags_icd10_inst_0_ent_206',
                    'p41280_a207' :  'hospital_date_of_first_diags_icd10_inst_0_ent_207',
                    'p41280_a208' :  'hospital_date_of_first_diags_icd10_inst_0_ent_208',
                    'p41280_a209' :  'hospital_date_of_first_diags_icd10_inst_0_ent_209',
                    'p41280_a210' :  'hospital_date_of_first_diags_icd10_inst_0_ent_210',
                    'p41280_a211' :  'hospital_date_of_first_diags_icd10_inst_0_ent_211',
                    'p41280_a212' :  'hospital_date_of_first_diags_icd10_inst_0_ent_212',
                    'p41280_a213' :  'hospital_date_of_first_diags_icd10_inst_0_ent_213',
                    'p41280_a214' :  'hospital_date_of_first_diags_icd10_inst_0_ent_214',
                    'p41280_a215' :  'hospital_date_of_first_diags_icd10_inst_0_ent_215',
                    'p41280_a216' :  'hospital_date_of_first_diags_icd10_inst_0_ent_216',
                    'p41280_a217' :  'hospital_date_of_first_diags_icd10_inst_0_ent_217',
                    'p41280_a218' :  'hospital_date_of_first_diags_icd10_inst_0_ent_218',
                    'p41280_a219' :  'hospital_date_of_first_diags_icd10_inst_0_ent_219',
                    'p41280_a220' :  'hospital_date_of_first_diags_icd10_inst_0_ent_220',
                    'p41280_a221' :  'hospital_date_of_first_diags_icd10_inst_0_ent_221',
                    'p41280_a222' :  'hospital_date_of_first_diags_icd10_inst_0_ent_222',
                    'p41280_a223' : 'hospital_date_of_first_diags_icd10_inst_0_ent_223',
                    'p41280_a224' : 'hospital_date_of_first_diags_icd10_inst_0_ent_224',
                    'p41280_a225' : 'hospital_date_of_first_diags_icd10_inst_0_ent_225',   
                    'p41280_a226' : 'hospital_date_of_first_diags_icd10_inst_0_ent_226',
                    'p41280_a227' : 'hospital_date_of_first_diags_icd10_inst_0_ent_227',
                    'p41280_a228' : 'hospital_date_of_first_diags_icd10_inst_0_ent_228',
                    'p41280_a229' : 'hospital_date_of_first_diags_icd10_inst_0_ent_229',
                    'p41280_a230' : 'hospital_date_of_first_diags_icd10_inst_0_ent_230',
                    'p41280_a231' : 'hospital_date_of_first_diags_icd10_inst_0_ent_231',
                    'p41280_a232' : 'hospital_date_of_first_diags_icd10_inst_0_ent_232',
                    'p41280_a233' : 'hospital_date_of_first_diags_icd10_inst_0_ent_233',
                    'p41280_a234' : 'hospital_date_of_first_diags_icd10_inst_0_ent_234',
                    'p41280_a235' : 'hospital_date_of_first_diags_icd10_inst_0_ent_235',
                    'p41280_a236' : 'hospital_date_of_first_diags_icd10_inst_0_ent_236',
                    'p41280_a237' : 'hospital_date_of_first_diags_icd10_inst_0_ent_237',
                    'p41280_a238' : 'hospital_date_of_first_diags_icd10_inst_0_ent_238',
                    'p41280_a239' : 'hospital_date_of_first_diags_icd10_inst_0_ent_239',
                    'p41280_a240' : 'hospital_date_of_first_diags_icd10_inst_0_ent_240',
                    'p41280_a241' : 'hospital_date_of_first_diags_icd10_inst_0_ent_241',
                    'p41280_a242' : 'hospital_date_of_first_diags_icd10_inst_0_ent_242',


                    'p41281_a0' : 'hospital_date_of_first_diags_icd9_inst_0_ent_0',
                    'p41281_a1' : 'hospital_date_of_first_diags_icd9_inst_0_ent_1',
                    'p41281_a2' : 'hospital_date_of_first_diags_icd9_inst_0_ent_2',
                    'p41281_a3' : 'hospital_date_of_first_diags_icd9_inst_0_ent_3',
                    'p41281_a4' : 'hospital_date_of_first_diags_icd9_inst_0_ent_4',
                    'p41281_a5' : 'hospital_date_of_first_diags_icd9_inst_0_ent_5',
                    'p41281_a6' : 'hospital_date_of_first_diags_icd9_inst_0_ent_6',
                    'p41281_a7' : 'hospital_date_of_first_diags_icd9_inst_0_ent_7',
                    'p41281_a8' : 'hospital_date_of_first_diags_icd9_inst_0_ent_8',
                    'p41281_a9' : 'hospital_date_of_first_diags_icd9_inst_0_ent_9',
                    'p41281_a10' :  'hospital_date_of_first_diags_icd9_inst_0_ent_10', 
                    'p41281_a11' :  'hospital_date_of_first_diags_icd9_inst_0_ent_11', 
                    'p41281_a12' :  'hospital_date_of_first_diags_icd9_inst_0_ent_12', 
                    'p41281_a13' :  'hospital_date_of_first_diags_icd9_inst_0_ent_13', 
                    'p41281_a14' :  'hospital_date_of_first_diags_icd9_inst_0_ent_14', 
                    'p41281_a15' :  'hospital_date_of_first_diags_icd9_inst_0_ent_15', 
                    'p41281_a16' :  'hospital_date_of_first_diags_icd9_inst_0_ent_16', 
                    'p41281_a17' :  'hospital_date_of_first_diags_icd9_inst_0_ent_17', 
                    'p41281_a18' :  'hospital_date_of_first_diags_icd9_inst_0_ent_18', 
                    'p41281_a19' :  'hospital_date_of_first_diags_icd9_inst_0_ent_19', 
                    'p41281_a20' :  'hospital_date_of_first_diags_icd9_inst_0_ent_20', 
                    'p41281_a21' :  'hospital_date_of_first_diags_icd9_inst_0_ent_21', 
                    'p41281_a22' :  'hospital_date_of_first_diags_icd9_inst_0_ent_22', 
                    'p41281_a23' :  'hospital_date_of_first_diags_icd9_inst_0_ent_23', 
                    'p41281_a24' :  'hospital_date_of_first_diags_icd9_inst_0_ent_24', 
                    'p41281_a25' :  'hospital_date_of_first_diags_icd9_inst_0_ent_25', 
                    'p41281_a26' :  'hospital_date_of_first_diags_icd9_inst_0_ent_26', 
                    'p41281_a27' :  'hospital_date_of_first_diags_icd9_inst_0_ent_27', 
                    'p41281_a28' :  'hospital_date_of_first_diags_icd9_inst_0_ent_28', 
                    'p41281_a29' :  'hospital_date_of_first_diags_icd9_inst_0_ent_29', 
                    'p41281_a30' :  'hospital_date_of_first_diags_icd9_inst_0_ent_30', 
                    'p41281_a31' :  'hospital_date_of_first_diags_icd9_inst_0_ent_31', 
                    'p41281_a32' :  'hospital_date_of_first_diags_icd9_inst_0_ent_32', 
                    'p41281_a33' :  'hospital_date_of_first_diags_icd9_inst_0_ent_33', 
                    'p41281_a34' :  'hospital_date_of_first_diags_icd9_inst_0_ent_34', 
                    'p41281_a35' :  'hospital_date_of_first_diags_icd9_inst_0_ent_35', 
                    'p41281_a36' :  'hospital_date_of_first_diags_icd9_inst_0_ent_36', 
                    'p41281_a37' :  'hospital_date_of_first_diags_icd9_inst_0_ent_37', 
                    'p41281_a38' :  'hospital_date_of_first_diags_icd9_inst_0_ent_38', 
                    'p41281_a39' :  'hospital_date_of_first_diags_icd9_inst_0_ent_39', 
                    'p41281_a40' :  'hospital_date_of_first_diags_icd9_inst_0_ent_40', 
                    'p41281_a41' :  'hospital_date_of_first_diags_icd9_inst_0_ent_41', 
                    'p41281_a42' :  'hospital_date_of_first_diags_icd9_inst_0_ent_42', 
                    'p41281_a43' :  'hospital_date_of_first_diags_icd9_inst_0_ent_43', 
                    'p41281_a44' :  'hospital_date_of_first_diags_icd9_inst_0_ent_44', 
                    'p41281_a45' :  'hospital_date_of_first_diags_icd9_inst_0_ent_45', 
                    'p41281_a46' :  'hospital_date_of_first_diags_icd9_inst_0_ent_46', 
                 
                 
                    'p20001_i0_a0'  :  'sr_cancer_code_inst_0_ent_0', 
                    'p20001_i0_a1'  :  'sr_cancer_code_inst_0_ent_1', 
                    'p20001_i0_a2'  :  'sr_cancer_code_inst_0_ent_2', 
                    'p20001_i0_a3'  :  'sr_cancer_code_inst_0_ent_3', 
                    'p20001_i0_a4'  :  'sr_cancer_code_inst_0_ent_4', 
                    'p20001_i0_a5'  :  'sr_cancer_code_inst_0_ent_5', 
                    'p20001_i1_a0'  :  'sr_cancer_code_inst_1_ent_0', 
                    'p20001_i1_a1'  :  'sr_cancer_code_inst_1_ent_1', 
                    'p20001_i1_a2'  :  'sr_cancer_code_inst_1_ent_2', 
                    'p20001_i1_a3'  :  'sr_cancer_code_inst_1_ent_3', 
                    'p20001_i1_a4'  :  'sr_cancer_code_inst_1_ent_4', 
                    'p20001_i1_a5'  :  'sr_cancer_code_inst_1_ent_5', 
                    'p20001_i2_a0'  :  'sr_cancer_code_inst_2_ent_0', 
                    'p20001_i2_a1'  :  'sr_cancer_code_inst_2_ent_1', 
                    'p20001_i2_a2'  :  'sr_cancer_code_inst_2_ent_2', 
                    'p20001_i2_a3'  :  'sr_cancer_code_inst_2_ent_3', 
                    'p20001_i2_a4'  :  'sr_cancer_code_inst_2_ent_4', 
                    'p20001_i2_a5'  :  'sr_cancer_code_inst_2_ent_5', 
                    'p20001_i3_a0'  :  'sr_cancer_code_inst_3_ent_0', 
                    'p20001_i3_a1'  :  'sr_cancer_code_inst_3_ent_1', 
                    'p20001_i3_a2'  :  'sr_cancer_code_inst_3_ent_2', 
                    'p20001_i3_a3'  :  'sr_cancer_code_inst_3_ent_3', 
                    'p20001_i3_a4'  :  'sr_cancer_code_inst_3_ent_4', 
                    'p20001_i3_a5'  :  'sr_cancer_code_inst_3_ent_5', 
                    'p20002_i0_a0'  :  'sr_noncancer_code_inst_0_ent_0', 
                    'p20002_i0_a1'  :  'sr_noncancer_code_inst_0_ent_1', 
                    'p20002_i0_a10'  :  'sr_noncancer_code_inst_0_ent_10', 
                    'p20002_i0_a11'  :  'sr_noncancer_code_inst_0_ent_11', 
                    'p20002_i0_a12'  :  'sr_noncancer_code_inst_0_ent_12', 
                    'p20002_i0_a13'  :  'sr_noncancer_code_inst_0_ent_13', 
                    'p20002_i0_a14'  :  'sr_noncancer_code_inst_0_ent_14', 
                    'p20002_i0_a15'  :  'sr_noncancer_code_inst_0_ent_15', 
                    'p20002_i0_a16'  :  'sr_noncancer_code_inst_0_ent_16', 
                    'p20002_i0_a17'  :  'sr_noncancer_code_inst_0_ent_17', 
                    'p20002_i0_a18'  :  'sr_noncancer_code_inst_0_ent_18', 
                    'p20002_i0_a19'  :  'sr_noncancer_code_inst_0_ent_19', 
                    'p20002_i0_a2'  :  'sr_noncancer_code_inst_0_ent_2', 
                    'p20002_i0_a20'  :  'sr_noncancer_code_inst_0_ent_20', 
                    'p20002_i0_a21'  :  'sr_noncancer_code_inst_0_ent_21', 
                    'p20002_i0_a22'  :  'sr_noncancer_code_inst_0_ent_22', 
                    'p20002_i0_a23'  :  'sr_noncancer_code_inst_0_ent_23', 
                    'p20002_i0_a24'  :  'sr_noncancer_code_inst_0_ent_24', 
                    'p20002_i0_a25'  :  'sr_noncancer_code_inst_0_ent_25', 
                    'p20002_i0_a26'  :  'sr_noncancer_code_inst_0_ent_26', 
                    'p20002_i0_a27'  :  'sr_noncancer_code_inst_0_ent_27', 
                    'p20002_i0_a28'  :  'sr_noncancer_code_inst_0_ent_28', 
                    'p20002_i0_a29'  :  'sr_noncancer_code_inst_0_ent_29', 
                    'p20002_i0_a3'  :  'sr_noncancer_code_inst_0_ent_3', 
                    'p20002_i0_a30'  :  'sr_noncancer_code_inst_0_ent_30', 
                    'p20002_i0_a31'  :  'sr_noncancer_code_inst_0_ent_31', 
                    'p20002_i0_a32'  :  'sr_noncancer_code_inst_0_ent_32', 
                    'p20002_i0_a33'  :  'sr_noncancer_code_inst_0_ent_33', 
                    'p20002_i0_a4'  :  'sr_noncancer_code_inst_0_ent_4', 
                    'p20002_i0_a5'  :  'sr_noncancer_code_inst_0_ent_5', 
                    'p20002_i0_a6'  :  'sr_noncancer_code_inst_0_ent_6', 
                    'p20002_i0_a7'  :  'sr_noncancer_code_inst_0_ent_7', 
                    'p20002_i0_a8'  :  'sr_noncancer_code_inst_0_ent_8', 
                    'p20002_i0_a9'  :  'sr_noncancer_code_inst_0_ent_9', 
                    'p20002_i1_a0'  :  'sr_noncancer_code_inst_1_ent_0', 
                    'p20002_i1_a1'  :  'sr_noncancer_code_inst_1_ent_1', 
                    'p20002_i1_a10'  :  'sr_noncancer_code_inst_1_ent_10', 
                    'p20002_i1_a11'  :  'sr_noncancer_code_inst_1_ent_11', 
                    'p20002_i1_a12'  :  'sr_noncancer_code_inst_1_ent_12', 
                    'p20002_i1_a13'  :  'sr_noncancer_code_inst_1_ent_13', 
                    'p20002_i1_a14'  :  'sr_noncancer_code_inst_1_ent_14', 
                    'p20002_i1_a15'  :  'sr_noncancer_code_inst_1_ent_15', 
                    'p20002_i1_a16'  :  'sr_noncancer_code_inst_1_ent_16', 
                    'p20002_i1_a17'  :  'sr_noncancer_code_inst_1_ent_17', 
                    'p20002_i1_a18'  :  'sr_noncancer_code_inst_1_ent_18', 
                    'p20002_i1_a19'  :  'sr_noncancer_code_inst_1_ent_19', 
                    'p20002_i1_a2'  :  'sr_noncancer_code_inst_1_ent_2', 
                    'p20002_i1_a20'  :  'sr_noncancer_code_inst_1_ent_20', 
                    'p20002_i1_a21'  :  'sr_noncancer_code_inst_1_ent_21', 
                    'p20002_i1_a22'  :  'sr_noncancer_code_inst_1_ent_22', 
                    'p20002_i1_a23'  :  'sr_noncancer_code_inst_1_ent_23', 
                    'p20002_i1_a24'  :  'sr_noncancer_code_inst_1_ent_24', 
                    'p20002_i1_a25'  :  'sr_noncancer_code_inst_1_ent_25', 
                    'p20002_i1_a26'  :  'sr_noncancer_code_inst_1_ent_26', 
                    'p20002_i1_a27'  :  'sr_noncancer_code_inst_1_ent_27', 
                    'p20002_i1_a28'  :  'sr_noncancer_code_inst_1_ent_28', 
                    'p20002_i1_a29'  :  'sr_noncancer_code_inst_1_ent_29', 
                    'p20002_i1_a3'  :  'sr_noncancer_code_inst_1_ent_3', 
                    'p20002_i1_a30'  :  'sr_noncancer_code_inst_1_ent_30', 
                    'p20002_i1_a31'  :  'sr_noncancer_code_inst_1_ent_31', 
                    'p20002_i1_a32'  :  'sr_noncancer_code_inst_1_ent_32', 
                    'p20002_i1_a33'  :  'sr_noncancer_code_inst_1_ent_33', 
                    'p20002_i1_a4'  :  'sr_noncancer_code_inst_1_ent_4', 
                    'p20002_i1_a5'  :  'sr_noncancer_code_inst_1_ent_5', 
                    'p20002_i1_a6'  :  'sr_noncancer_code_inst_1_ent_6', 
                    'p20002_i1_a7'  :  'sr_noncancer_code_inst_1_ent_7', 
                    'p20002_i1_a8'  :  'sr_noncancer_code_inst_1_ent_8', 
                    'p20002_i1_a9'  :  'sr_noncancer_code_inst_1_ent_9', 
                    'p20002_i2_a0'  :  'sr_noncancer_code_inst_2_ent_0', 
                    'p20002_i2_a1'  :  'sr_noncancer_code_inst_2_ent_1', 
                    'p20002_i2_a10'  :  'sr_noncancer_code_inst_2_ent_10', 
                    'p20002_i2_a11'  :  'sr_noncancer_code_inst_2_ent_11', 
                    'p20002_i2_a12'  :  'sr_noncancer_code_inst_2_ent_12', 
                    'p20002_i2_a13'  :  'sr_noncancer_code_inst_2_ent_13', 
                    'p20002_i2_a14'  :  'sr_noncancer_code_inst_2_ent_14', 
                    'p20002_i2_a15'  :  'sr_noncancer_code_inst_2_ent_15', 
                    'p20002_i2_a16'  :  'sr_noncancer_code_inst_2_ent_16', 
                    'p20002_i2_a17'  :  'sr_noncancer_code_inst_2_ent_17', 
                    'p20002_i2_a18'  :  'sr_noncancer_code_inst_2_ent_18', 
                    'p20002_i2_a19'  :  'sr_noncancer_code_inst_2_ent_19', 
                    'p20002_i2_a2'  :  'sr_noncancer_code_inst_2_ent_2', 
                    'p20002_i2_a20'  :  'sr_noncancer_code_inst_2_ent_20', 
                    'p20002_i2_a21'  :  'sr_noncancer_code_inst_2_ent_21', 
                    'p20002_i2_a22'  :  'sr_noncancer_code_inst_2_ent_22', 
                    'p20002_i2_a23'  :  'sr_noncancer_code_inst_2_ent_23', 
                    'p20002_i2_a24'  :  'sr_noncancer_code_inst_2_ent_24', 
                    'p20002_i2_a25'  :  'sr_noncancer_code_inst_2_ent_25', 
                    'p20002_i2_a26'  :  'sr_noncancer_code_inst_2_ent_26', 
                    'p20002_i2_a27'  :  'sr_noncancer_code_inst_2_ent_27', 
                    'p20002_i2_a28'  :  'sr_noncancer_code_inst_2_ent_28', 
                    'p20002_i2_a29'  :  'sr_noncancer_code_inst_2_ent_29', 
                    'p20002_i2_a3'  :  'sr_noncancer_code_inst_2_ent_3', 
                    'p20002_i2_a30'  :  'sr_noncancer_code_inst_2_ent_30', 
                    'p20002_i2_a31'  :  'sr_noncancer_code_inst_2_ent_31', 
                    'p20002_i2_a32'  :  'sr_noncancer_code_inst_2_ent_32', 
                    'p20002_i2_a33'  :  'sr_noncancer_code_inst_2_ent_33', 
                    'p20002_i2_a4'  :  'sr_noncancer_code_inst_2_ent_4', 
                    'p20002_i2_a5'  :  'sr_noncancer_code_inst_2_ent_5', 
                    'p20002_i2_a6'  :  'sr_noncancer_code_inst_2_ent_6', 
                    'p20002_i2_a7'  :  'sr_noncancer_code_inst_2_ent_7', 
                    'p20002_i2_a8'  :  'sr_noncancer_code_inst_2_ent_8', 
                    'p20002_i2_a9'  :  'sr_noncancer_code_inst_2_ent_9', 
                    'p20002_i3_a0'  :  'sr_noncancer_code_inst_3_ent_0', 
                    'p20002_i3_a1'  :  'sr_noncancer_code_inst_3_ent_1', 
                    'p20002_i3_a10'  :  'sr_noncancer_code_inst_3_ent_10', 
                    'p20002_i3_a11'  :  'sr_noncancer_code_inst_3_ent_11', 
                    'p20002_i3_a12'  :  'sr_noncancer_code_inst_3_ent_12', 
                    'p20002_i3_a13'  :  'sr_noncancer_code_inst_3_ent_13', 
                    'p20002_i3_a14'  :  'sr_noncancer_code_inst_3_ent_14', 
                    'p20002_i3_a15'  :  'sr_noncancer_code_inst_3_ent_15', 
                    'p20002_i3_a16'  :  'sr_noncancer_code_inst_3_ent_16', 
                    'p20002_i3_a17'  :  'sr_noncancer_code_inst_3_ent_17', 
                    'p20002_i3_a18'  :  'sr_noncancer_code_inst_3_ent_18', 
                    'p20002_i3_a19'  :  'sr_noncancer_code_inst_3_ent_19', 
                    'p20002_i3_a2'  :  'sr_noncancer_code_inst_3_ent_2', 
                    'p20002_i3_a20'  :  'sr_noncancer_code_inst_3_ent_20', 
                    'p20002_i3_a21'  :  'sr_noncancer_code_inst_3_ent_21', 
                    'p20002_i3_a22'  :  'sr_noncancer_code_inst_3_ent_22', 
                    'p20002_i3_a23'  :  'sr_noncancer_code_inst_3_ent_23', 
                    'p20002_i3_a24'  :  'sr_noncancer_code_inst_3_ent_24', 
                    'p20002_i3_a25'  :  'sr_noncancer_code_inst_3_ent_25', 
                    'p20002_i3_a26'  :  'sr_noncancer_code_inst_3_ent_26', 
                    'p20002_i3_a27'  :  'sr_noncancer_code_inst_3_ent_27', 
                    'p20002_i3_a28'  :  'sr_noncancer_code_inst_3_ent_28', 
                    'p20002_i3_a29'  :  'sr_noncancer_code_inst_3_ent_29', 
                    'p20002_i3_a3'  :  'sr_noncancer_code_inst_3_ent_3', 
                    'p20002_i3_a30'  :  'sr_noncancer_code_inst_3_ent_30', 
                    'p20002_i3_a31'  :  'sr_noncancer_code_inst_3_ent_31', 
                    'p20002_i3_a32'  :  'sr_noncancer_code_inst_3_ent_32', 
                    'p20002_i3_a33'  :  'sr_noncancer_code_inst_3_ent_33', 
                    'p20002_i3_a4'  :  'sr_noncancer_code_inst_3_ent_4', 
                    'p20002_i3_a5'  :  'sr_noncancer_code_inst_3_ent_5', 
                    'p20002_i3_a6'  :  'sr_noncancer_code_inst_3_ent_6', 
                    'p20002_i3_a7'  :  'sr_noncancer_code_inst_3_ent_7', 
                    'p20002_i3_a8'  :  'sr_noncancer_code_inst_3_ent_8', 
                    'p20002_i3_a9'  :  'sr_noncancer_code_inst_3_ent_9', 
                    'p20006_i0_a0'  :  'sr_cancer_date_inst_0_ent_0', 
                    'p20006_i0_a1'  :  'sr_cancer_date_inst_0_ent_1', 
                    'p20006_i0_a2'  :  'sr_cancer_date_inst_0_ent_2', 
                    'p20006_i0_a3'  :  'sr_cancer_date_inst_0_ent_3', 
                    'p20006_i0_a4'  :  'sr_cancer_date_inst_0_ent_4', 
                    'p20006_i0_a5'  :  'sr_cancer_date_inst_0_ent_5', 
                    'p20006_i1_a0'  :  'sr_cancer_date_inst_1_ent_0', 
                    'p20006_i1_a1'  :  'sr_cancer_date_inst_1_ent_1', 
                    'p20006_i1_a2'  :  'sr_cancer_date_inst_1_ent_2', 
                    'p20006_i1_a3'  :  'sr_cancer_date_inst_1_ent_3', 
                    'p20006_i1_a4'  :  'sr_cancer_date_inst_1_ent_4', 
                    'p20006_i1_a5'  :  'sr_cancer_date_inst_1_ent_5', 
                    'p20006_i2_a0'  :  'sr_cancer_date_inst_2_ent_0', 
                    'p20006_i2_a1'  :  'sr_cancer_date_inst_2_ent_1', 
                    'p20006_i2_a2'  :  'sr_cancer_date_inst_2_ent_2', 
                    'p20006_i2_a3'  :  'sr_cancer_date_inst_2_ent_3', 
                    'p20006_i2_a4'  :  'sr_cancer_date_inst_2_ent_4', 
                    'p20006_i2_a5'  :  'sr_cancer_date_inst_2_ent_5', 
                    'p20006_i3_a0'  :  'sr_cancer_date_inst_3_ent_0', 
                    'p20006_i3_a1'  :  'sr_cancer_date_inst_3_ent_1', 
                    'p20006_i3_a2'  :  'sr_cancer_date_inst_3_ent_2', 
                    'p20006_i3_a3'  :  'sr_cancer_date_inst_3_ent_3', 
                    'p20006_i3_a4'  :  'sr_cancer_date_inst_3_ent_4', 
                    'p20006_i3_a5'  :  'sr_cancer_date_inst_3_ent_5', 
                    'p20008_i0_a0'  :  'sr_noncancer_date_inst_0_ent_0', 
                    'p20008_i0_a1'  :  'sr_noncancer_date_inst_0_ent_1', 
                    'p20008_i0_a10'  :  'sr_noncancer_date_inst_0_ent_10', 
                    'p20008_i0_a11'  :  'sr_noncancer_date_inst_0_ent_11', 
                    'p20008_i0_a12'  :  'sr_noncancer_date_inst_0_ent_12', 
                    'p20008_i0_a13'  :  'sr_noncancer_date_inst_0_ent_13', 
                    'p20008_i0_a14'  :  'sr_noncancer_date_inst_0_ent_14', 
                    'p20008_i0_a15'  :  'sr_noncancer_date_inst_0_ent_15', 
                    'p20008_i0_a16'  :  'sr_noncancer_date_inst_0_ent_16', 
                    'p20008_i0_a17'  :  'sr_noncancer_date_inst_0_ent_17', 
                    'p20008_i0_a18'  :  'sr_noncancer_date_inst_0_ent_18', 
                    'p20008_i0_a19'  :  'sr_noncancer_date_inst_0_ent_19', 
                    'p20008_i0_a2'  :  'sr_noncancer_date_inst_0_ent_2', 
                    'p20008_i0_a20'  :  'sr_noncancer_date_inst_0_ent_20', 
                    'p20008_i0_a21'  :  'sr_noncancer_date_inst_0_ent_21', 
                    'p20008_i0_a22'  :  'sr_noncancer_date_inst_0_ent_22', 
                    'p20008_i0_a23'  :  'sr_noncancer_date_inst_0_ent_23', 
                    'p20008_i0_a24'  :  'sr_noncancer_date_inst_0_ent_24', 
                    'p20008_i0_a25'  :  'sr_noncancer_date_inst_0_ent_25', 
                    'p20008_i0_a26'  :  'sr_noncancer_date_inst_0_ent_26', 
                    'p20008_i0_a27'  :  'sr_noncancer_date_inst_0_ent_27', 
                    'p20008_i0_a28'  :  'sr_noncancer_date_inst_0_ent_28', 
                    'p20008_i0_a29'  :  'sr_noncancer_date_inst_0_ent_29', 
                    'p20008_i0_a3'  :  'sr_noncancer_date_inst_0_ent_3', 
                    'p20008_i0_a30'  :  'sr_noncancer_date_inst_0_ent_30', 
                    'p20008_i0_a31'  :  'sr_noncancer_date_inst_0_ent_31', 
                    'p20008_i0_a32'  :  'sr_noncancer_date_inst_0_ent_32', 
                    'p20008_i0_a33'  :  'sr_noncancer_date_inst_0_ent_33', 
                    'p20008_i0_a4'  :  'sr_noncancer_date_inst_0_ent_4', 
                    'p20008_i0_a5'  :  'sr_noncancer_date_inst_0_ent_5', 
                    'p20008_i0_a6'  :  'sr_noncancer_date_inst_0_ent_6', 
                    'p20008_i0_a7'  :  'sr_noncancer_date_inst_0_ent_7', 
                    'p20008_i0_a8'  :  'sr_noncancer_date_inst_0_ent_8', 
                    'p20008_i0_a9'  :  'sr_noncancer_date_inst_0_ent_9', 
                    'p20008_i1_a0'  :  'sr_noncancer_date_inst_1_ent_0', 
                    'p20008_i1_a1'  :  'sr_noncancer_date_inst_1_ent_1', 
                    'p20008_i1_a10'  :  'sr_noncancer_date_inst_1_ent_10', 
                    'p20008_i1_a11'  :  'sr_noncancer_date_inst_1_ent_11', 
                    'p20008_i1_a12'  :  'sr_noncancer_date_inst_1_ent_12', 
                    'p20008_i1_a13'  :  'sr_noncancer_date_inst_1_ent_13', 
                    'p20008_i1_a14'  :  'sr_noncancer_date_inst_1_ent_14', 
                    'p20008_i1_a15'  :  'sr_noncancer_date_inst_1_ent_15', 
                    'p20008_i1_a16'  :  'sr_noncancer_date_inst_1_ent_16', 
                    'p20008_i1_a17'  :  'sr_noncancer_date_inst_1_ent_17', 
                    'p20008_i1_a18'  :  'sr_noncancer_date_inst_1_ent_18', 
                    'p20008_i1_a19'  :  'sr_noncancer_date_inst_1_ent_19', 
                    'p20008_i1_a2'  :  'sr_noncancer_date_inst_1_ent_2', 
                    'p20008_i1_a20'  :  'sr_noncancer_date_inst_1_ent_20', 
                    'p20008_i1_a21'  :  'sr_noncancer_date_inst_1_ent_21', 
                    'p20008_i1_a22'  :  'sr_noncancer_date_inst_1_ent_22', 
                    'p20008_i1_a23'  :  'sr_noncancer_date_inst_1_ent_23', 
                    'p20008_i1_a24'  :  'sr_noncancer_date_inst_1_ent_24', 
                    'p20008_i1_a25'  :  'sr_noncancer_date_inst_1_ent_25', 
                    'p20008_i1_a26'  :  'sr_noncancer_date_inst_1_ent_26', 
                    'p20008_i1_a27'  :  'sr_noncancer_date_inst_1_ent_27', 
                    'p20008_i1_a28'  :  'sr_noncancer_date_inst_1_ent_28', 
                    'p20008_i1_a29'  :  'sr_noncancer_date_inst_1_ent_29', 
                    'p20008_i1_a3'  :  'sr_noncancer_date_inst_1_ent_3', 
                    'p20008_i1_a30'  :  'sr_noncancer_date_inst_1_ent_30', 
                    'p20008_i1_a31'  :  'sr_noncancer_date_inst_1_ent_31', 
                    'p20008_i1_a32'  :  'sr_noncancer_date_inst_1_ent_32', 
                    'p20008_i1_a33'  :  'sr_noncancer_date_inst_1_ent_33', 
                    'p20008_i1_a4'  :  'sr_noncancer_date_inst_1_ent_4', 
                    'p20008_i1_a5'  :  'sr_noncancer_date_inst_1_ent_5', 
                    'p20008_i1_a6'  :  'sr_noncancer_date_inst_1_ent_6', 
                    'p20008_i1_a7'  :  'sr_noncancer_date_inst_1_ent_7', 
                    'p20008_i1_a8'  :  'sr_noncancer_date_inst_1_ent_8', 
                    'p20008_i1_a9'  :  'sr_noncancer_date_inst_1_ent_9', 
                    'p20008_i2_a0'  :  'sr_noncancer_date_inst_2_ent_0', 
                    'p20008_i2_a1'  :  'sr_noncancer_date_inst_2_ent_1', 
                    'p20008_i2_a10'  :  'sr_noncancer_date_inst_2_ent_10', 
                    'p20008_i2_a11'  :  'sr_noncancer_date_inst_2_ent_11', 
                    'p20008_i2_a12'  :  'sr_noncancer_date_inst_2_ent_12', 
                    'p20008_i2_a13'  :  'sr_noncancer_date_inst_2_ent_13', 
                    'p20008_i2_a14'  :  'sr_noncancer_date_inst_2_ent_14', 
                    'p20008_i2_a15'  :  'sr_noncancer_date_inst_2_ent_15', 
                    'p20008_i2_a16'  :  'sr_noncancer_date_inst_2_ent_16', 
                    'p20008_i2_a17'  :  'sr_noncancer_date_inst_2_ent_17', 
                    'p20008_i2_a18'  :  'sr_noncancer_date_inst_2_ent_18', 
                    'p20008_i2_a19'  :  'sr_noncancer_date_inst_2_ent_19', 
                    'p20008_i2_a2'  :  'sr_noncancer_date_inst_2_ent_2', 
                    'p20008_i2_a20'  :  'sr_noncancer_date_inst_2_ent_20', 
                    'p20008_i2_a21'  :  'sr_noncancer_date_inst_2_ent_21', 
                    'p20008_i2_a22'  :  'sr_noncancer_date_inst_2_ent_22', 
                    'p20008_i2_a23'  :  'sr_noncancer_date_inst_2_ent_23', 
                    'p20008_i2_a24'  :  'sr_noncancer_date_inst_2_ent_24', 
                    'p20008_i2_a25'  :  'sr_noncancer_date_inst_2_ent_25', 
                    'p20008_i2_a26'  :  'sr_noncancer_date_inst_2_ent_26', 
                    'p20008_i2_a27'  :  'sr_noncancer_date_inst_2_ent_27', 
                    'p20008_i2_a28'  :  'sr_noncancer_date_inst_2_ent_28', 
                    'p20008_i2_a29'  :  'sr_noncancer_date_inst_2_ent_29', 
                    'p20008_i2_a3'  :  'sr_noncancer_date_inst_2_ent_3', 
                    'p20008_i2_a30'  :  'sr_noncancer_date_inst_2_ent_30', 
                    'p20008_i2_a31'  :  'sr_noncancer_date_inst_2_ent_31', 
                    'p20008_i2_a32'  :  'sr_noncancer_date_inst_2_ent_32', 
                    'p20008_i2_a33'  :  'sr_noncancer_date_inst_2_ent_33', 
                    'p20008_i2_a4'  :  'sr_noncancer_date_inst_2_ent_4', 
                    'p20008_i2_a5'  :  'sr_noncancer_date_inst_2_ent_5', 
                    'p20008_i2_a6'  :  'sr_noncancer_date_inst_2_ent_6', 
                    'p20008_i2_a7'  :  'sr_noncancer_date_inst_2_ent_7', 
                    'p20008_i2_a8'  :  'sr_noncancer_date_inst_2_ent_8', 
                    'p20008_i2_a9'  :  'sr_noncancer_date_inst_2_ent_9', 
                    'p20008_i3_a0'  :  'sr_noncancer_date_inst_3_ent_0', 
                    'p20008_i3_a1'  :  'sr_noncancer_date_inst_3_ent_1', 
                    'p20008_i3_a10'  :  'sr_noncancer_date_inst_3_ent_10', 
                    'p20008_i3_a11'  :  'sr_noncancer_date_inst_3_ent_11', 
                    'p20008_i3_a12'  :  'sr_noncancer_date_inst_3_ent_12', 
                    'p20008_i3_a13'  :  'sr_noncancer_date_inst_3_ent_13', 
                    'p20008_i3_a14'  :  'sr_noncancer_date_inst_3_ent_14', 
                    'p20008_i3_a15'  :  'sr_noncancer_date_inst_3_ent_15', 
                    'p20008_i3_a16'  :  'sr_noncancer_date_inst_3_ent_16', 
                    'p20008_i3_a17'  :  'sr_noncancer_date_inst_3_ent_17', 
                    'p20008_i3_a18'  :  'sr_noncancer_date_inst_3_ent_18', 
                    'p20008_i3_a19'  :  'sr_noncancer_date_inst_3_ent_19', 
                    'p20008_i3_a2'  :  'sr_noncancer_date_inst_3_ent_2', 
                    'p20008_i3_a20'  :  'sr_noncancer_date_inst_3_ent_20', 
                    'p20008_i3_a21'  :  'sr_noncancer_date_inst_3_ent_21', 
                    'p20008_i3_a22'  :  'sr_noncancer_date_inst_3_ent_22', 
                    'p20008_i3_a23'  :  'sr_noncancer_date_inst_3_ent_23', 
                    'p20008_i3_a24'  :  'sr_noncancer_date_inst_3_ent_24', 
                    'p20008_i3_a25'  :  'sr_noncancer_date_inst_3_ent_25', 
                    'p20008_i3_a26'  :  'sr_noncancer_date_inst_3_ent_26', 
                    'p20008_i3_a27'  :  'sr_noncancer_date_inst_3_ent_27', 
                    'p20008_i3_a28'  :  'sr_noncancer_date_inst_3_ent_28', 
                    'p20008_i3_a29'  :  'sr_noncancer_date_inst_3_ent_29', 
                    'p20008_i3_a3'  :  'sr_noncancer_date_inst_3_ent_3', 
                    'p20008_i3_a30'  :  'sr_noncancer_date_inst_3_ent_30', 
                    'p20008_i3_a31'  :  'sr_noncancer_date_inst_3_ent_31', 
                    'p20008_i3_a32'  :  'sr_noncancer_date_inst_3_ent_32', 
                    'p20008_i3_a33'  :  'sr_noncancer_date_inst_3_ent_33', 
                    'p20008_i3_a4'  :  'sr_noncancer_date_inst_3_ent_4', 
                    'p20008_i3_a5'  :  'sr_noncancer_date_inst_3_ent_5', 
                    'p20008_i3_a6'  :  'sr_noncancer_date_inst_3_ent_6', 
                    'p20008_i3_a7'  :  'sr_noncancer_date_inst_3_ent_7', 
                    'p20008_i3_a8'  :  'sr_noncancer_date_inst_3_ent_8', 
                    'p20008_i3_a9'  :  'sr_noncancer_date_inst_3_ent_9', 

            }

add_info = pd.DataFrame.from_dict(add_info_dict, orient = 'index')
add_info = add_info.reset_index()
add_info.columns = ['id', 'descr']


dat = dat.rename(columns = add_info_dict)

In [None]:
# Limit to just people with titer data (which should speed up all downstream 
# calculations)
dat = dat.loc[dat['eid'].isin(pat_ls), :]

# 9429
print(len(pat_ls))

(9427, 859)
print(dat.shape)

# So it looks like 2 people have no diagnoses from any non-gp sources because 
# pat_list does have the 9,429 people with titers

### Split diagnoses up into their source

In [None]:
# Split all diagnoses up into their source.
death = dat.loc[:, (dat.columns == 'eid') | (dat.columns.str.contains('death'))]
can = dat.loc[:, (dat.columns == 'eid') | (dat.columns.str.contains('cancer'))]
hosp = dat.loc[:, (dat.columns == 'eid') | (dat.columns.str.contains('hospital'))]
sr =  dat.loc[:, (dat.columns == 'eid') | (dat.columns.str.contains('sr_non'))]

## Death Registry

In [None]:
death_10 = death.loc[:, ((death.columns == 'eid') | (death.columns.str.contains('icd10')) |
                        (death.columns.str.contains('date')) | 
                        (death.columns.str.contains('age')))]

# There actually isn't any ICD9 death data (it's not available in this encoding)
death_9 = death.loc[:, ((death.columns == 'eid') | (death.columns.str.contains('icd9')) |
                        (death.columns.str.contains('date')) | 
                        (death.columns.str.contains('age')))]

In [None]:
# Verify that people who have 2 death dates listed have the same date in both

# 0
sum(death_10.loc[death_10['death_date_inst_1_ent_0'].notnull(), 'death_date_inst_0_ent_0'] != \
    death_10.loc[death_10['death_date_inst_1_ent_0'].notnull(), 'death_date_inst_1_ent_0'])

In [None]:
death_10 = death_10.set_index('eid')

# 9427
print(f"Num rows in death: {len(death_10)}")

# Drop any rows where people have NAs for each column
death_10 = death_10.dropna(axis = 0, how = 'all')

# 842
print(f"Num rows left in death: {len(death_10)}")

### Loop through people with Ab titers collecting death info

In [None]:
peep_list = death_10.index.tolist()

death_sum_ls = []
death_clean_row_ls = []
for curr_p in tqdm.tqdm(peep_list):

    prim_str = ""
    sec_str  = ""
    dod = ""
    age = -99


    curr_data = death_10.loc[curr_p, :]
    curr_data = curr_data[~curr_data.isna()]

    prim_ls = curr_data[curr_data.index.str.contains('primary')].values.tolist()
    prim_ls = [f"{curr_icd[:3]}.{curr_icd[3:]}" if len(curr_icd) > 3 else curr_icd for curr_icd in prim_ls]
    
    sec_ls  = curr_data[curr_data.index.str.contains('secondary')].values.tolist()
    sec_ls = [f"{curr_icd[:3]}.{curr_icd[3:]}" if len(curr_icd) > 3 else curr_icd for curr_icd in sec_ls]
    
    prim_str = ', '.join(prim_ls)
    sec_str = ', '.join(sec_ls)

    if 'death_date_inst_0_ent_0' in curr_data.index.tolist():
        dod = curr_data['death_date_inst_0_ent_0']
        
    if 'death_age_inst_0_ent_0' in curr_data.index.tolist():
        age = curr_data['death_age_inst_0_ent_0']

    death_sum_ls.append([curr_p, dod, age, prim_str, sec_str, 'ICD10', 'ICD10'])
    
    for x in prim_ls:
        death_clean_row_ls.append([curr_p, dod, age, x, 'primary', 'ICD10', 'ICD10'])
    
    for x in sec_ls:
        death_clean_row_ls.append([curr_p, dod, age, x, 'secondary', 'ICD10', 'ICD10'])
        
        
death_sum = pd.DataFrame(death_sum_ls)
death_sum.columns = ['eid', 'date_of_death', 'age_at_death', 'primary_cause',
                       'secondary_cause', 'primary_encoding', 'secondary_encoding']


death_clean = pd.DataFrame(death_clean_row_ls)
death_clean.columns = ['eid', 'date_of_death', 'age_at_death', 'code',
                       'cause_level', 'primary_encoding', 'secondary_encoding']

In [None]:
death_sum.to_csv(f'{OUT_DIR}/death/death_summary.tsv', sep = '\t', index = False)
death_clean.to_csv(f'{OUT_DIR}/death/death_prepped_for_phecode.tsv', sep = '\t', index = False)

## Cancer Registry

In [None]:
# Set eid as the index temporarily
can = can.set_index('eid')

# 9427
print(f"Num rows in cancer: {len(can)}")

# Drop any rows where people have NAs for each column
can = can.dropna(axis = 0, how = 'all')

# 7820
print(f"Num rows left in cancer: {len(can)}")

### Loop through people with Ab titers collecting cancer info

In [None]:
peep_list = can.index.tolist()


can_clean_ls = []
for curr_p in tqdm.tqdm(peep_list):
    
    curr_data = can.loc[curr_p, :]
    curr_data = curr_data[~curr_data.isna()]

    curr_date_cols = curr_data.index[curr_data.index.str.startswith('cancer_diag_date_')].to_list()
    for curr_date_col in curr_date_cols:

        curr_instance  = curr_date_col.replace('cancer_diag_date_', '')
        curr_diag_date = curr_data[curr_date_col]

        age_temp = f"cancer_diag_age_{curr_instance}"
        curr_diag_age  = curr_data[age_temp]

        icd10_temp = f"cancer_icd10_{curr_instance}"
        icd9_temp = f"cancer_icd9_{curr_instance}"

        if icd10_temp in curr_data.index:
            curr_icd = curr_data[icd10_temp]
            curr_icd = fix_icd10(str(curr_icd))
            can_clean_ls.append([curr_p, curr_diag_date, curr_diag_age, curr_icd,'ICD10'])

        elif icd9_temp in curr_data.index:
            curr_icd = curr_data[icd9_temp]
            curr_icd = fix_icd9(str(curr_icd))
            can_clean_ls.append([curr_p, curr_diag_date, curr_diag_age, curr_icd,'ICD9'])
        else:
            print(f'{curr_p}: {curr_instance} not in 9 or 10 data')
            
can_clean = pd.DataFrame(can_clean_ls)
can_clean.columns = ['eid', 'date_of_diag', 'age_at_diag', 'diag_code', 'diag_encoding']

In [None]:
can_clean.to_csv(f'{OUT_DIR}/cancer/cancer_prepped_for_phecode.tsv', sep = '\t', index = False)

## Hospital Inpatient

In [None]:
# Set eid as the index temporarily
hosp = hosp.set_index('eid')

# 9427
print(f"Num rows in Hospital: {len(hosp)}")

# Drop any rows where people have NAs for each column
hosp = hosp.dropna(axis = 0, how = 'all')

# 8424
print(f"Num rows left in hospital: {len(hosp)}")

### Loop through people with Ab titers collecting hospital info

In [None]:
peep_list = hosp.index.tolist()


# Only insterested in 41270 (corr date: 41280) and 41271 (corr date: 41281)
hosp_cols_of_interest_tup = ('hospital_diags_icd10', 'hospital_date_of_first_diags_icd10',
                              'hospital_diags_icd9', 'hospital_date_of_first_diags_icd9')

hosp_clean_ls = []
for curr_p in tqdm.tqdm(peep_list):

    curr_data = hosp.loc[curr_p, :]
    curr_data = curr_data[~curr_data.isna()]
    curr_data = curr_data.loc[curr_data.index.str.startswith(hosp_cols_of_interest_tup)]


    # ICD10
    diag_ls = curr_data.loc[curr_data.index.str.contains('hospital_diags_icd10')].tolist()
    
    # If no diags found for this encoding/vocab
    if len(diag_ls) == 0:
        continue
    diag_ls = ast.literal_eval(diag_ls[0])

    for curr_ind in range(0, len(diag_ls), 1):
        date_col = f'hospital_date_of_first_diags_icd10_inst_0_ent_{curr_ind}'
        curr_date = curr_data[date_col]

        curr_diag = diag_ls[curr_ind]
        curr_diag = fix_icd10(str(curr_diag))
        hosp_clean_ls.append([curr_p, curr_date, curr_diag,'ICD10'])  

    # ICD9
    diag_ls = curr_data.loc[curr_data.index.str.contains('hospital_diags_icd9')].tolist()
    if len(diag_ls) == 0:
        continue
    diag_ls = ast.literal_eval(diag_ls[0])


    for curr_ind in range(0, len(diag_ls), 1):
        date_col = f'hospital_date_of_first_diags_icd9_inst_0_ent_{curr_ind}'
        curr_date = curr_data[date_col]

        curr_diag = diag_ls[curr_ind]
        curr_diag = fix_icd9(str(curr_diag))
        hosp_clean_ls.append([curr_p, curr_date, curr_diag,'ICD9'])  

hosp_clean = pd.DataFrame(hosp_clean_ls)
hosp_clean.columns = ['eid', 'date_of_diag', 'diag_code', 'diag_encoding']

In [None]:
hosp_clean.to_csv(f'{OUT_DIR}/hosp/hosp_prepped_for_phecode.tsv', sep = '\t', index = False)

## Self-report

In [None]:
sr = sr.set_index('eid')

# 9427
print(f"Num rows in death: {len(sr)}")

# Drop any rows where people have NAs for each column
sr = sr.dropna(axis = 0, how = 'all')

# 7358
print(f"Num rows left in death: {len(sr)}")

### Convert self-report from UKB encoding to ICD10

In [None]:
# UKB when creating first occurence only used non-cancer SR not the cancer too
sr_non_map = pd.read_csv(f'{UKB_RAW_DIR}/sr_noncancer_encoding.tsv', sep = '\t')
sr_to_icd10_3char = pd.read_csv(f'{UKB_RAW_DIR}/sr_code_to_icd10_3char.tsv', sep = '\t')


# So I guess it's based on the coding column which does make sense
# 140 doing it on coding
# 249 doing it on node_id
sr_to_icd10_3char.columns = ['coding', '3char_ICD10_on_code']
sr_non_map = sr_non_map.merge(sr_to_icd10_3char, how = 'left', on = 'coding')

sr_to_icd10_3char.columns = ['node_id', '3char_ICD10_on_node']
sr_non_map = sr_non_map.merge(sr_to_icd10_3char, how = 'left', on = 'node_id')

### Loop through people with Ab titers collecting self-report info

In [None]:
peep_list = sr.index.tolist()

sr_clean_row_ls = []
for curr_p in tqdm.tqdm(peep_list):


    curr_data = sr.loc[curr_p, :]
    curr_data = curr_data[~curr_data.isna()]

    codes = curr_data[curr_data.index.str.startswith('sr_noncancer_code_')]
    dates = curr_data[curr_data.index.str.startswith('sr_noncancer_date_')]

    for curr_ind in range(0, len(codes), 1):

        curr_code = codes.iloc[curr_ind]
        curr_code = int(curr_code)

        curr_icd = sr_non_map.loc[sr_non_map['coding'] == curr_code, '3char_ICD10_on_code'].values.tolist()
        
        if len(curr_icd) > 0:
            curr_icd = curr_icd[0]
        
        
        if pd.isnull(curr_icd):
            #print(f"Found NA code!\n{sr_non_map.loc[sr_non_map['coding'] == curr_code, :]}")
            continue
        
        
        curr_date = dates.iloc[curr_ind]

        # Exclude record if year < 1930 or -1 or -3 per UKB doc
        # https://biobank.ndph.ox.ac.uk/showcase/ukb/docs/first_occurrences_outcomes.pdf
        if math.isclose(curr_date, -1) or math.isclose(curr_date, -3) or curr_date < 1930:
            continue

        curr_date_str = fl_to_dt(curr_date)

        sr_clean_row_ls.append([curr_p, curr_icd, curr_date_str, 'ICD10'])
    
sr_clean = pd.DataFrame(sr_clean_row_ls)
sr_clean.columns = ['eid', 'curr_icd', 'date', 'encoding']

In [None]:
sr_clean.to_csv(f'{OUT_DIR}/sr/sr_prepped_for_phecode.tsv', sep = '\t', index = False)