# Revised case normalization for Hirslanden Aarau 2018

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys

import pandas as pd
import numpy as np

from loguru import logger

sys.path.insert(0, '/home/jovyan/work')

from src.revised_case_normalization.py.global_configs import *
from src.revised_case_normalization.py.normalize import normalize, remove_leading_zeros
from src.service import bfs_cases_db_service as bfs_db
from src.service.bfs_cases_db_service import session, get_sociodemographics_for_hospital_year, get_earliest_revisions_for_aimedic_ids, get_codes, apply_revisions

In [2]:
file_info = FileInfo(
        os.path.join(ROOT_DIR, 'raw_data/HI_Aarau_Birshof_ST. Anna.xlsx'),
        'Hirslanden Aarau',
        '2018',
        ['Aarau 2018'])

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/HI_Aarau_Birshof_ST. Anna.xlsx', hospital_name_db='Hirslanden Aarau', year='2018', sheets=['Aarau 2018'])


In [3]:
df_revised_case_d2d = normalize(file_info, 0)

2022-10-24 14:43:50.470 | INFO     | src.revised_case_normalization.py.normalize:normalize:35 - Read 17 cases for Hirslanden Aarau 2018
2022-10-24 14:43:50.473 | INFO     | src.revised_case_normalization.py.normalize:normalize:57 - TYPES:
tranche                   string
datum                     string
case_id                   object
patient_id                object
kkik                      string
gender                    string
age_years                  int64
bfs_code                  string
duration_of_stay           int64
pflegetage neu            string
pccl                       int64
pccl neu                  string
old_pd                    string
primary_diagnosis         string
added_icds                string
removed_icds              string
added_chops               string
removed_chops             string
drg                       string
drg neu                   string
cw alt                    string
cw neu                    string
cw-änderung möglich       string
cw

In [4]:
df_revised_case_d2d.head()

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,41623461,1059029,W,77,2,41623461,I7024,I7024,M100,3,F59E,[J4481],[J4483],[],[]
1,41665678,2043896,M,82,17,41665678,I5001,I5001,M100,3,F62C,[R18],[],[],[]
2,41698586,22177655,M,93,2,41698586,I7024,I7024,M100,3,F59E,[I743],[],[],[]
3,41719219,1339311,M,91,4,41719219,I495,I495,M100,3,F71B,[N182],[],[],[]
4,41777951,22177655,M,93,2,41777951,I7024,I7024,M100,0,F59E,"[N184, T828]",[],[],[]


# Match revised DtoD data with the BfS data from the database

In [5]:
cases_in_db = get_sociodemographics_for_hospital_year(file_info.hospital_name_db, int(file_info.year))
cases_in_db[NORM_CASE_ID_COL] = cases_in_db[CASE_ID_COL].apply(remove_leading_zeros)
print(cases_in_db.head())

2022-10-24 14:43:51.844 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:124 - Read 10112 rows from the DB, for the hospital 'Hirslanden Aarau' in 2018


   aimedic_id     case_id        patient_id  age_years  age_days  admission_weight  gestation_age gender admission_date grouper_admission_type discharge_date grouper_discharge_type  duration_of_stay  ventilation_hours case_id_norm
0      115452  0041415230  A0E36E42C141D541         29         0                 0              0      M     2018-01-16                     01     2018-01-18                     00                 2                  0     41415230
1      115453  0041453333  10CA4E4A4D6A6648         34         0                 0              0      W     2018-01-19                     01     2018-01-20                     00                 1                  0     41453333
2      115454  0041473239  E7A902368D82844A         63         0                 0              0      M     2018-03-19                     01     2018-03-23                     00                 4                  0     41473239
3      115455  0041506268  5FCEADE16E4FD6EC         44         0            

In [6]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

cols_to_join.remove(PATIENT_ID_COL)

joined = pd.merge(df_revised_case_d2d, cases_in_db,
                  how='left',
                  on=cols_to_join,
                  suffixes=('', '_db'))

In [7]:
# Extract grouper relevant columns

joined = joined[GROUPER_INPUT_BFS + [NEW_PRIMARY_DIAGNOSIS_COL, ADDED_ICD_CODES, REMOVED_ICD_CODES, ADDED_CHOP_CODES, REMOVED_CHOP_CODES]]

unmatched = joined[joined[AIMEDIC_ID_COL].isna()]
num_unmatched = unmatched.shape[0]
print(f'{num_unmatched} rows could not be matched, given {sorted(cols_to_join)}')

joined

0 rows could not be matched, given ['age_years', 'case_id_norm', 'duration_of_stay', 'gender']


Unnamed: 0,aimedic_id,age_years,age_days,admission_weight,gestation_age,gender,admission_date,grouper_admission_type,discharge_date,grouper_discharge_type,duration_of_stay,ventilation_hours,primary_diagnosis,added_icds,removed_icds,added_chops,removed_chops
0,115875,77,0,0,0,W,2018-01-10,1,2018-01-12,0,2,0,I7024,[J4481],[J4483],[],[]
1,116851,82,0,0,0,M,2018-01-30,1,2018-02-16,0,17,0,I5001,[R18],[],[],[]
2,117722,93,0,0,0,M,2018-03-15,1,2018-03-17,0,2,0,I7024,[I743],[],[],[]
3,118215,91,0,0,0,M,2018-03-17,1,2018-03-21,0,4,0,I495,[N182],[],[],[]
4,120279,93,0,0,0,M,2018-05-31,1,2018-06-02,0,2,0,I7024,"[N184, T828]",[],[],[]
5,116472,64,0,0,0,M,2018-01-17,1,2018-01-25,0,8,8,I5014,[E1191],[E1190],[],[]
6,117999,58,0,0,0,W,2018-03-09,1,2018-03-14,0,5,0,N390,[B370],[],[],[]
7,120078,79,0,0,0,W,2018-05-23,1,2018-06-01,0,9,0,N390,[E440],[],[890A32],[]
8,119991,70,0,0,0,M,2018-05-19,1,2018-05-25,0,6,0,N1781,[J9610],[],[],[]
9,121288,51,0,0,0,M,2018-07-04,1,2018-07-12,0,8,8,I208,[J9580],[],[],[]


In [8]:
# Format admission_date and discharge_date the (SwissDRG Batchgrouper Format 2017 (YYYYMMDD)

joined['admission_date'] = joined['admission_date'].astype(str)
joined['admission_date'] = joined['admission_date'].str.replace("-","")

joined['discharge_date'] = joined['discharge_date'].astype(str)
joined['discharge_date'] = joined['discharge_date'].str.replace("-","")                                            
joined.head()


Unnamed: 0,aimedic_id,age_years,age_days,admission_weight,gestation_age,gender,admission_date,grouper_admission_type,discharge_date,grouper_discharge_type,duration_of_stay,ventilation_hours,primary_diagnosis,added_icds,removed_icds,added_chops,removed_chops
0,115875,77,0,0,0,W,20180110,1,20180112,0,2,0,I7024,[J4481],[J4483],[],[]
1,116851,82,0,0,0,M,20180130,1,20180216,0,17,0,I5001,[R18],[],[],[]
2,117722,93,0,0,0,M,20180315,1,20180317,0,2,0,I7024,[I743],[],[],[]
3,118215,91,0,0,0,M,20180317,1,20180321,0,4,0,I495,[N182],[],[],[]
4,120279,93,0,0,0,M,20180531,1,20180602,0,2,0,I7024,"[N184, T828]",[],[],[]


In [9]:
original_revision_ids = get_earliest_revisions_for_aimedic_ids(joined[AIMEDIC_ID_COL].values.tolist())
original_cases = get_codes(original_revision_ids)

In [10]:
original_cases

Unnamed: 0,aimedic_id,revision_id,old_pd,secondary_diagnoses,primary_procedure,secondary_procedures
0,115875,115875,I7024,"[I7020, Z9588, I1090, N184, J4483, ...]",395014,"[395011, 397510, 004C12, 005599, 0043, ...]"
1,116472,116472,I5014,"[I5001, I2513, I2522, N184, I1320, ...]",99B712,"[8945, 860A06, 948X40]"
2,116851,116851,I5001,"[J91, I5013, I7020, L304, N185, ...]",887211,"[887963, 887964, 887975, 3491]"
3,117722,117722,I7024,"[Z9588, N184, Z922]",395011,"[397511, 395021, 397510, 0042, 004B18, ...]"
4,117999,117999,N390,"[B965, R400, G3520, F446, Z936]",992217,[]
5,118215,118215,I495,"[I484, I480, I1090, I2519, I340, ...]",893909,[]
6,119991,119991,N1781,"[Y579, I1100, I480, I5001, G2010, ...]",992909,[]
7,120078,120078,N390,"[B962, A4158, R650, E1190, E788, ...]",992217,[]
8,120279,120279,I7024,"[Z9588, Z921, Z922]",395014,"[004C12, 395011, 0043, 004B1A, 004B46, ...]"
9,121288,121288,I208,"[I2513, D684, U6912, E788]",361226,"[361121, 361122, 361C14, 3963, 3964, ...]"


In [11]:
revised_cases = apply_revisions(original_cases, joined)

In [12]:
# Formatting primary_procedure and secondary_procedures to fit SwissDRG Batchgrouper Format 2017
# NOTE: Sideness and procedure date are not taken into account

# Formatting primary procedure column
revised_cases["primary_procedure"] = [procedure + "::" for procedure in revised_cases["primary_procedure"]] 


# Formatting secondary procedure column
revised_cases["secondary_procedures"] = revised_cases['secondary_procedures'].map(str)
revised_cases["secondary_procedures"] = revised_cases["secondary_procedures"].str.strip("[]")
revised_cases["secondary_procedures"] = revised_cases["secondary_procedures"].str.replace("'","").str.replace(",","::|").str.replace(" ","")

# Formatting grouper procedures column
revised_cases["grouper_procedures"] = revised_cases['primary_procedure'].map(str) + "|" + revised_cases['secondary_procedures'].map(str)
revised_cases["grouper_procedures"] = revised_cases["grouper_procedures"].str.rstrip("::|") + "::"

revised_cases


Unnamed: 0,aimedic_id,primary_diagnosis,secondary_diagnoses,primary_procedure,secondary_procedures,grouper_procedures
0,115875,I7024,"[I7020, Z9588, I1090, N184, N390, ...]",395014::,395011::|397510::|004C12::|005599::|0043::|004B18::|004B1A::|0046::|393019,395014::|395011::|397510::|004C12::|005599::|0043::|004B18::|004B1A::|0046::|393019::
1,116472,I5014,"[I5001, I2513, I2522, N184, I1320, ...]",99B712::,8945::|860A06::|948X40,99B712::|8945::|860A06::|948X40::
2,116851,I5001,"[J91, I5013, I7020, L304, N185, ...]",887211::,887963::|887964::|887975::|3491,887211::|887963::|887964::|887975::|3491::
3,117722,I7024,"[Z9588, N184, Z922, I743]",395011::,397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012,395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::
4,117999,N390,"[B965, R400, G3520, F446, Z936, ...]",992217::,,992217::
5,118215,I495,"[I484, I480, I1090, I2519, I340, ...]",893909::,,893909::
6,119991,N1781,"[Y579, I1100, I480, I5001, G2010, ...]",992909::,,992909::
7,120078,N390,"[B962, A4158, R650, E1190, E788, ...]",992217::,890A32,992217::|890A32::
8,120279,I7024,"[Z9588, Z921, Z922, N184, T828]",395014::,004C12::|395011::|0043::|004B1A::|004B46::|004B18::|004B45::|395021::|397511::|005599::|0045::|8...,395014::|004C12::|395011::|0043::|004B1A::|004B46::|004B18::|004B45::|395021::|397511::|005599::...
9,121288,I208,"[I2513, D684, U6912, E788, J9580]",361226::,361121::|361122::|361C14::|3963::|3964::|396122::|372A21::|990A::|99B711,361226::|361121::|361122::|361C14::|3963::|3964::|396122::|372A21::|990A::|99B711::


In [13]:
# Formatting primary_diagnosis and secondary_diagnosis to fit SwissDRG Batchgrouper Format 2017

revised_cases["secondary_diagnoses"] = revised_cases['secondary_diagnoses'].map(str)
revised_cases["secondary_diagnoses"] = revised_cases["secondary_diagnoses"].str.strip("[]")
revised_cases["secondary_diagnoses"] = revised_cases["secondary_diagnoses"].str.replace("'","").str.replace(",","|").str.replace(" ","")
revised_cases["grouper_diagnoses"] = revised_cases['primary_diagnosis'].map(str) + "|" + revised_cases['secondary_diagnoses'].map(str)



In [14]:
revised_cases

Unnamed: 0,aimedic_id,primary_diagnosis,secondary_diagnoses,primary_procedure,secondary_procedures,grouper_procedures,grouper_diagnoses
0,115875,I7024,I7020|Z9588|I1090|N184|N390|B962|J4481,395014::,395011::|397510::|004C12::|005599::|0043::|004B18::|004B1A::|0046::|393019,395014::|395011::|397510::|004C12::|005599::|0043::|004B18::|004B1A::|0046::|393019::,I7024|I7020|Z9588|I1090|N184|N390|B962|J4481
1,116472,I5014,I5001|I2513|I2522|N184|I1320|E875|L0301|E788|M1000|G4731|D508|K5730|Z4500|Z922|F328|Z950|Z955|Z9...,99B712::,8945::|860A06::|948X40,99B712::|8945::|860A06::|948X40::,I5014|I5001|I2513|I2522|N184|I1320|E875|L0301|E788|M1000|G4731|D508|K5730|Z4500|Z922|F328|Z950|Z...
2,116851,I5001,J91|I5013|I7020|L304|N185|I480|I1100|E1120|N083|E790|N40|I340|I361|R600|L998|I351|I2728|N1789|Y5...,887211::,887963::|887964::|887975::|3491,887211::|887963::|887964::|887975::|3491::,I5001|J91|I5013|I7020|L304|N185|I480|I1100|E1120|N083|E790|N40|I340|I361|R600|L998|I351|I2728|N1...
3,117722,I7024,Z9588|N184|Z922|I743,395011::,397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012,395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::,I7024|Z9588|N184|Z922|I743
4,117999,N390,B965|R400|G3520|F446|Z936|B370,992217::,,992217::,N390|B965|R400|G3520|F446|Z936|B370
5,118215,I495,I484|I480|I1090|I2519|I340|I351|I2728|J90|Z950|Z951|Z921|N182,893909::,,893909::,I495|I484|I480|I1090|I2519|I340|I351|I2728|J90|Z950|Z951|Z921|N182
6,119991,N1781,Y579|I1100|I480|I5001|G2010|M4507|M8189|Z921|J9610,992909::,,992909::,N1781|Y579|I1100|I480|I5001|G2010|M4507|M8189|Z921|J9610
7,120078,N390,B962|A4158|R650|E1190|E788|N183|J459|E038|E440,992217::,890A32,992217::|890A32::,N390|B962|A4158|R650|E1190|E788|N183|J459|E038|E440
8,120279,I7024,Z9588|Z921|Z922|N184|T828,395014::,004C12::|395011::|0043::|004B1A::|004B46::|004B18::|004B45::|395021::|397511::|005599::|0045::|8...,395014::|004C12::|395011::|0043::|004B1A::|004B46::|004B18::|004B45::|395021::|397511::|005599::...,I7024|Z9588|Z921|Z922|N184|T828
9,121288,I208,I2513|D684|U6912|E788|J9580,361226::,361121::|361122::|361C14::|3963::|3964::|396122::|372A21::|990A::|99B711,361226::|361121::|361122::|361C14::|3963::|3964::|396122::|372A21::|990A::|99B711::,I208|I2513|D684|U6912|E788|J9580


In [15]:
# Extract and reorder relevant columns from BFS DB data to retrieve SwissDRG Grouper Format (SwissDRG Batchgrouper Format 2017)

joined_grouper = joined[GROUPER_INPUT_BFS]
joined_grouper.head()

Unnamed: 0,aimedic_id,age_years,age_days,admission_weight,gestation_age,gender,admission_date,grouper_admission_type,discharge_date,grouper_discharge_type,duration_of_stay,ventilation_hours
0,115875,77,0,0,0,W,20180110,1,20180112,0,2,0
1,116851,82,0,0,0,M,20180130,1,20180216,0,17,0
2,117722,93,0,0,0,M,20180315,1,20180317,0,2,0
3,118215,91,0,0,0,M,20180317,1,20180321,0,4,0
4,120279,93,0,0,0,M,20180531,1,20180602,0,2,0


In [16]:
# Extract and reorder relevant columns from revised cases to retrieve SwissDRG Grouper Format (SwissDRG Batchgrouper Format 2017)

revised_cases_grouper = revised_cases[GROUPER_INPUT_REVISED_CASES]
revised_cases_grouper

Unnamed: 0,aimedic_id,grouper_procedures,grouper_diagnoses
0,115875,395014::|395011::|397510::|004C12::|005599::|0043::|004B18::|004B1A::|0046::|393019::,I7024|I7020|Z9588|I1090|N184|N390|B962|J4481
1,116472,99B712::|8945::|860A06::|948X40::,I5014|I5001|I2513|I2522|N184|I1320|E875|L0301|E788|M1000|G4731|D508|K5730|Z4500|Z922|F328|Z950|Z...
2,116851,887211::|887963::|887964::|887975::|3491::,I5001|J91|I5013|I7020|L304|N185|I480|I1100|E1120|N083|E790|N40|I340|I361|R600|L998|I351|I2728|N1...
3,117722,395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::,I7024|Z9588|N184|Z922|I743
4,117999,992217::,N390|B965|R400|G3520|F446|Z936|B370
5,118215,893909::,I495|I484|I480|I1090|I2519|I340|I351|I2728|J90|Z950|Z951|Z921|N182
6,119991,992909::,N1781|Y579|I1100|I480|I5001|G2010|M4507|M8189|Z921|J9610
7,120078,992217::|890A32::,N390|B962|A4158|R650|E1190|E788|N183|J459|E038|E440
8,120279,395014::|004C12::|395011::|0043::|004B1A::|004B46::|004B18::|004B45::|395021::|397511::|005599::...,I7024|Z9588|Z921|Z922|N184|T828
9,121288,361226::|361121::|361122::|361C14::|3963::|3964::|396122::|372A21::|990A::|99B711::,I208|I2513|D684|U6912|E788|J9580


In [17]:
# Join revised diagnoses and procedures to cases in db:

grouper_input_data = pd.merge(joined_grouper,revised_cases_grouper, 
                  how='inner',
                  on='aimedic_id',
                 suffixes=('', '_db'))

In [18]:
# Added empty medication column (Placeholder!!)

grouper_input_data["medication"] = "" 

grouper_input_data.head()

Unnamed: 0,aimedic_id,age_years,age_days,admission_weight,gestation_age,gender,admission_date,grouper_admission_type,discharge_date,grouper_discharge_type,duration_of_stay,ventilation_hours,grouper_procedures,grouper_diagnoses,medication
0,115875,77,0,0,0,W,20180110,1,20180112,0,2,0,395014::|395011::|397510::|004C12::|005599::|0043::|004B18::|004B1A::|0046::|393019::,I7024|I7020|Z9588|I1090|N184|N390|B962|J4481,
1,116851,82,0,0,0,M,20180130,1,20180216,0,17,0,887211::|887963::|887964::|887975::|3491::,I5001|J91|I5013|I7020|L304|N185|I480|I1100|E1120|N083|E790|N40|I340|I361|R600|L998|I351|I2728|N1...,
2,117722,93,0,0,0,M,20180315,1,20180317,0,2,0,395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::,I7024|Z9588|N184|Z922|I743,
3,118215,91,0,0,0,M,20180317,1,20180321,0,4,0,893909::,I495|I484|I480|I1090|I2519|I340|I351|I2728|J90|Z950|Z951|Z921|N182,
4,120279,93,0,0,0,M,20180531,1,20180602,0,2,0,395014::|004C12::|395011::|0043::|004B1A::|004B46::|004B18::|004B45::|395021::|397511::|005599::...,I7024|Z9588|Z921|Z922|N184|T828,


In [19]:
# Format to string file for grouper

grouper_input_data_string = grouper_input_data.astype(str)

grouper_input_data_string = grouper_input_data_string.apply(';'.join, axis=1)


0    115875;77;0;0;0;W;20180110;01;20180112;00;2;0;395014::|395011::|397510::|004C12::|005599::|0043:...
1    116851;82;0;0;0;M;20180130;01;20180216;00;17;0;887211::|887963::|887964::|887975::|3491::;I5001|...
2    117722;93;0;0;0;M;20180315;01;20180317;00;2;0;395011::|397511::|395021::|397510::|0042::|004B18:...
3    118215;91;0;0;0;M;20180317;01;20180321;00;4;0;893909::;I495|I484|I480|I1090|I2519|I340|I351|I272...
4    120279;93;0;0;0;M;20180531;01;20180602;00;2;0;395014::|004C12::|395011::|0043::|004B1A::|004B46:...
dtype: object

In [20]:
# TODO:
# - Add medication 
# - Add and compare CHOP Codes for sideness and procedure date