# Curate list of all codes used to define disease groups

In [1]:
import pandas as pd
import numpy as np
from simpledbf import Dbf5
import sys
import pylab as plt
import seaborn as sns

# own codes
sys.path.insert(1,'../')
import phenotypes as pheno_info

/scratch/c.c21013066/data/ukbiobank/codings


In [2]:
data_path = '/scratch/c.c21013066/data/ukbiobank'

In [3]:
# table of codes for disease subset
diseases = ['ParkinsonDisease','AllCauseParkinsonism','AlzheimerDisease','AllCauseDementia','Dystonia','Osteoarthritis','Depression']
selfes = [1,1,999,0,999,3,4]
icd10s = [11,13,20,21,17,45,37]
icd9s = [0,2,3,6,8,10,9]
tables = []
codes = ['UK Biobank Self Report','ICD 10','ICD 9','Read V2','Read CTV3']
for disease,sel,icd10,icd9 in zip(diseases,selfes,icd10s,icd9s):
    print(disease,sel,icd10,icd9)
    
    try:
        selfcodes = pheno_info.DIAGNOSESSELF[sel]
        selfcodes = pd.DataFrame(selfcodes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        selfcodes['Code Type'] = 'UK Biobank Self Report'
    except:
        selfcodes = pd.DataFrame([['UK Biobank Self Report','']],columns=['Code Type','Code'])
    try:
        icd10codes = pheno_info.DIAGNOSESICD10[icd10]
        icd10codes = pd.DataFrame(icd10codes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        icd10codes['Code Type'] = 'ICD 10'
    except:
        icd10codes = pd.DataFrame([['ICD 10','']],columns=['Code Type','Code'])
    try:
        icd9codes = pheno_info.DIAGNOSESICD9[icd9]
        icd9codes = pd.DataFrame(icd9codes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        icd9codes['Code Type'] = 'ICD 9'
    except:
        icd9codes = pd.DataFrame([['ICD 9','']],columns=['Code Type','Code'])

    gp_codesr2 = pd.read_csv(f'{data_path}/codings/readv2ICD10_gp_{disease}.csv')
    gp_codesr2 = gp_codesr2.rename(columns={'READ_CODE':'Code','Unnamed: 0':'Code Type'}).drop(columns=gp_codesr2.columns[2:])
    gp_codesr2['Code Type'] = 'Read V2'
    if gp_codesr2.empty:
        print(gp_codesr2)
        gp_codesr2 = gp_codesr2.append({'Code Type': 'Read V2','Code':''},ignore_index=True)
        print(gp_codesr2)
    gp_codesr3 = pd.read_csv(f'{data_path}/codings/readv3ICD10_gp_{disease}.csv')
    gp_codesr3 = gp_codesr3.rename(columns={'READ_CODE':'Code','Unnamed: 0':'Code Type'}).drop(columns=gp_codesr3.columns[2:])
    gp_codesr3['Code Type'] = 'Read CTV3'
    own_codes = pd.concat([selfcodes,icd10codes,icd9codes,gp_codesr2,gp_codesr3])
    table = own_codes.groupby('Code Type')['Code'].apply(lambda x: ', '.join(x)).to_frame().T
    table['disease'] = disease
    tables.append(table[np.hstack([codes,'disease'])])
codes_table = pd.concat(tables)

ParkinsonDisease 1 11 0
AllCauseParkinsonism 1 13 2
AlzheimerDisease 999 20 3
AllCauseDementia 0 21 6
Dystonia 999 17 8
Empty DataFrame
Columns: [Code Type, Code]
Index: []
  Code Type Code
0   Read V2     
Osteoarthritis 3 45 10
Depression 4 37 9


In [5]:
with pd.option_context("max_colwidth", 2000):
    print(codes_table.set_index('disease').to_latex())
codes_table.set_index('disease').to_csv(f'{data_path}/codings/disease_codes.csv')

\begin{tabular}{llllll}
\toprule
Code Type & UK Biobank Self Report &                                                                                                                                                                                     ICD 10 &                                                 ICD 9 &                                                                                                                                                                                                                                                                                                                                                                                          Read V2 &                                                                                                                                                                                                                                                                                                           

In [6]:
# table of codes for disease subset
diseases = ['Depression','RBD','UrinaryIncontinence','ErectileDysfunction','Constipation',
           'Anxiety','OrthostaticHypotension','Hyposmia']
selfes = [4,999,5,6,7,8,999,999]
icd10s = [37,19,46,47,36,38,48,40]
icd9s = [9,999,11,12,13,14,15,16]
tables = []
codes = ['UK Biobank Self Report','ICD 10','ICD 9','Read V2','Read CTV3']
for disease,sel,icd10,icd9 in zip(diseases,selfes,icd10s,icd9s):
    print(disease,sel,icd10,icd9)
    
    try:
        selfcodes = pheno_info.DIAGNOSESSELF[sel]
        selfcodes = pd.DataFrame(selfcodes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        selfcodes['Code Type'] = 'UK Biobank Self Report'
    except:
        selfcodes = pd.DataFrame([['UK Biobank Self Report','']],columns=['Code Type','Code'])
    try:
        icd10codes = pheno_info.DIAGNOSESICD10[icd10]
        icd10codes = pd.DataFrame(icd10codes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        icd10codes['Code Type'] = 'ICD 10'
    except:
        icd10codes = pd.DataFrame([['ICD 10','']],columns=['Code Type','Code'])
    try:
        icd9codes = pheno_info.DIAGNOSESICD9[icd9]
        icd9codes = pd.DataFrame(icd9codes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        icd9codes['Code Type'] = 'ICD 9'
    except:
        icd9codes = pd.DataFrame([['ICD 9','']],columns=['Code Type','Code'])

    gp_codesr2 = pd.read_csv(f'{data_path}/codings/readv2ICD10_gp_{disease}.csv')
    gp_codesr2 = gp_codesr2.rename(columns={'READ_CODE':'Code','Unnamed: 0':'Code Type'}).drop(columns=gp_codesr2.columns[2:])
    gp_codesr2['Code Type'] = 'Read V2'
    if gp_codesr2.empty:
        print(gp_codesr2)
        gp_codesr2 = gp_codesr2.append({'Code Type': 'Read V2','Code':''},ignore_index=True)
        print(gp_codesr2)
    gp_codesr3 = pd.read_csv(f'{data_path}/codings/readv3ICD10_gp_{disease}.csv')
    gp_codesr3 = gp_codesr3.rename(columns={'READ_CODE':'Code','Unnamed: 0':'Code Type'}).drop(columns=gp_codesr3.columns[2:])
    gp_codesr3['Code Type'] = 'Read CTV3'
    own_codes = pd.concat([selfcodes,icd10codes,icd9codes,gp_codesr2,gp_codesr3])
    table = own_codes.groupby('Code Type')['Code'].apply(lambda x: ', '.join(x)).to_frame().T
    table['disease'] = disease
    tables.append(table[np.hstack([codes,'disease'])])
codes_table = pd.concat(tables)

Depression 4 37 9
RBD 999 19 999
UrinaryIncontinence 5 46 11
ErectileDysfunction 6 47 12
Constipation 7 36 13
Anxiety 8 38 14
OrthostaticHypotension 999 48 15
Hyposmia 999 40 16


In [8]:
with pd.option_context("max_colwidth", 2000):
    print(codes_table.set_index('disease').to_latex())
codes_table.set_index('disease').to_csv(f'{data_path}/codings/prodromal_codes.csv')

\begin{tabular}{llllll}
\toprule
Code Type & UK Biobank Self Report &                                                    ICD 10 &                         ICD 9 &                                                                      Read V2 &                                                                                                                                                                                                                                                                                                                                                                                                             Read CTV3 \\
disease                &                        &                                                           &                               &                                                                              &                                                                                                                                  

In [10]:
codes_table

Code Type,UK Biobank Self Report,ICD 10,ICD 9,Read V2,Read CTV3,disease
Code,1286.0,"F204, F32, F328, F33, F330, F331, F332, F333, ...","2962, 2963, 3004, 3119","Eu204, Eu32., Eu32A, Eu32B, Eu32y, Eu330, Eu33...","2257., 2257., E0043, E0043, E1131, E1132, E113...",Depression
Code,,G478,,"Fy05., Fy06., Fyu58","E274A, E274E, Fyu58, X008C, X008D, X008E, X008...",RBD
Code,1202.0,"N394, R32",7883,"Kyu5A, R083., R0830, R0831, R0832, R083z","1A22., 1A23., 1A23., 1A26., 3940., Kyu5A, R083...",UrinaryIncontinence
Code,1518.0,"N484, F522",3027,"Eu522, K27y1, K27y7","E2273, E2273, Eu522, K27y1, X400F, X400F, X400...",ErectileDysfunction
Code,1599.0,K590,"56409, 56402","J520., J5200, J5201, J5202, J5203, J5204, J520...","19EA., J520., J5200, J5201, J5202, J520y, J520...",Constipation
Code,1287.0,"F412, F413, F418, F419, F480, F488, F489, F064","3000, 3001, 3002, 3005, 3009","Eu054, Eu412, Eu413, Eu41y, Eu41z, Eu460, Eu46...","1682., E...., E20.., E20.., E200., E200., E200...",Anxiety
Code,,I951,4580,G870.,"G870., XM02W",OrthostaticHypotension
Code,,R430,7811,R0110,"2BP3., R0110, X008M, X008N, X008O, XE0rs, XM0C...",Hyposmia


In [3]:
# table of codes for HC selection
diseases = ['neurology']
selfes = [2]
icd10s = [12]
icd9s = [7]
tables = []
codes = ['UK Biobank Self Report','ICD 10','ICD 9','Read V2','Read CTV3']
for disease,sel,icd10,icd9 in zip(diseases,selfes,icd10s,icd9s):
    print(disease,sel,icd10,icd9)
    
    try:
        selfcodes = pheno_info.DIAGNOSESSELF[sel]
        selfcodes = pd.DataFrame(selfcodes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        selfcodes['Code Type'] = 'UK Biobank Self Report'
    except:
        selfcodes = pd.DataFrame([['UK Biobank Self Report','']],columns=['Code Type','Code'])
    try:
        icd10codes = pheno_info.DIAGNOSESICD10[icd10]
        icd10codes = pd.DataFrame(icd10codes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        icd10codes['Code Type'] = 'ICD 10'
    except:
        icd10codes = pd.DataFrame([['ICD 10','']],columns=['Code Type','Code'])
    try:
        icd9codes = pheno_info.DIAGNOSESICD9[icd9]
        icd9codes = pd.DataFrame(icd9codes).rename(columns={'name':'Code Type','codings':'Code'}).drop(columns='source')
        icd9codes['Code Type'] = 'ICD 9'
    except:
        icd9codes = pd.DataFrame([['ICD 9','']],columns=['Code Type','Code'])

    gp_codesr2 = pd.read_csv(f'{data_path}/codings/readv2ICD10_gp_{disease}.csv')
    gp_codesr2 = gp_codesr2.rename(columns={'READ_CODE':'Code','Unnamed: 0':'Code Type'}).drop(columns=gp_codesr2.columns[2:])
    gp_codesr2['Code Type'] = 'Read V2'
    if gp_codesr2.empty:
        print(gp_codesr2)
        gp_codesr2 = gp_codesr2.append({'Code Type': 'Read V2','Code':''},ignore_index=True)
        print(gp_codesr2)
    gp_codesr3 = pd.read_csv(f'{data_path}/codings/readv3ICD10_gp_{disease}.csv')
    gp_codesr3 = gp_codesr3.rename(columns={'READ_CODE':'Code','Unnamed: 0':'Code Type'}).drop(columns=gp_codesr3.columns[2:])
    gp_codesr3['Code Type'] = 'Read CTV3'
    own_codes = pd.concat([selfcodes,icd10codes,icd9codes,gp_codesr2,gp_codesr3])
    table = own_codes.groupby('Code Type')['Code'].apply(lambda x: ', '.join(x)).to_frame().T
    table['disease'] = disease
    tables.append(table[np.hstack([codes,'disease'])])
codes_table = pd.concat(tables)

neurology 2 12 7


In [9]:
codes_table['ICD 10'].sort_values().values

array(['F200, F481, F315, F648, F71, G003, F453, F127, G545, G444, F45, G730, F720, F59, F061, G251, G09, F120, F781, G400, G804, F918, F606, F145, G514, G252, G373, G249, F528, G700, G460, F071, F010, F519, F413, F430, F068, F138, G119, F514, G47, F700, G98, F002, F39, F220, F913, F316, G938, G80, G541, F000, G121, G731, F186, F608, F400, G441, F940, G11, F019, F802, F99, F30, F730, F011, F708, F20, F07, F942, G904, G542, G243, G51, F799, F83, G834, F164, F172, G969, G232, G214, G218, G26, F152, F941, G04, G543, F520, F658, G819, F531, G233, G724, F185, F432, G05, F323, F662, F649, G932, F341, G618, G32, F110, F183, F629, F505, G35, G371, G452, F51, F154, F02, G432, F339, G128, G553, F728, F721, F653, G991, G244, G979, G629, F809, G811, G701, G562, F428, G909, F458, F100, G576, G910, G062, F332, F603, G256, F147, F412, G95, F621, F175, G902, F638, G522, F17, F252, F050, F813, F111, F179, F329, G448, F196, G608, G630, F176, G370, F063, G110, F90, G13, F639, G603, G823, F13, F251, G021,