# Revised case normalization for St. Anna 2018

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys
import itertools
import pandas as pd
import numpy as np

from loguru import logger

sys.path.insert(0, '/home/jovyan/work')

from src.revised_case_normalization.py.global_configs import *
from src.revised_case_normalization.py.normalize import normalize, remove_leading_zeros
from src.service import bfs_cases_db_service as bfs_db
from src.service.bfs_cases_db_service import session, get_sociodemographics_for_hospital_year, get_earliest_revisions_for_aimedic_ids, get_codes, apply_revisions, get_primary_procedures_codes, get_primary_procedures_codes_side_date, get_secondary_procedures_codes_side_date, get_secondary_procedures_codes
from src.revised_case_normalization.py.format_for_grouper import format_for_grouper

In [2]:
file_info = FileInfo(
        os.path.join(ROOT_DIR, 'raw_data/HI_Aarau_Birshof_ST. Anna.xlsx'),
        'Hirslanden St. Anna',
        '2018',
        ['KOPIE_Änderungen_ST. Anna_2018'])

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/HI_Aarau_Birshof_ST. Anna.xlsx', hospital_name_db='Hirslanden St. Anna', year='2018', sheets=['KOPIE_Änderungen_ST. Anna_2018'])


In [3]:
df_revised_case_d2d = normalize(file_info, 0)


2022-11-01 13:18:45.308 | INFO     | src.revised_case_normalization.py.normalize:normalize:35 - Read 51 cases for Hirslanden St. Anna 2018
2022-11-01 13:18:45.311 | INFO     | src.revised_case_normalization.py.normalize:normalize:57 - TYPES:
tranche                   string
datum                     string
case_id                   object
patient_id                object
gender                    string
age_years                  int64
bfs_code                  string
duration_of_stay           int64
pflegetage neu            string
pccl                       int64
pccl neu                  string
old_pd                    string
primary_diagnosis         string
added_icds                string
removed_icds              string
added_chops               string
removed_chops             string
drg                       string
drg neu                   string
cw alt                    string
cw neu                    string
cw-änderung möglich       string
cw änderung akzeptiert    string

In [4]:
df_revised_case_d2d.head()

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,9358410,EB395DA315D5B285,W,79,9,9358410,M169,M169,M200,3,I46C,[D62],[D648],[],[]
1,9364697,6586D083924C0E99,W,68,16,9364697,M4806,M4806,M200,3,I09C,[E1191],[E1190],[],[]
2,9366989,9FBF3AE4CEA4A950,M,27,16,9366989,M511,M511,M200,3,I10C,[B956],[],[],[]
3,9371704,FDE409238A9BEC4F,M,65,13,9371704,I5001,I5001,M100,3,F49C,[N183],[],[],[]
4,9375929,036F0863E37E3493,M,74,18,9375929,M4806,M4806,M200,3,I27C,[T846],[T844],[],[]


# Match revised DtoD data with the BfS data from the database

In [5]:
cases_in_db = get_sociodemographics_for_hospital_year(file_info.hospital_name_db, int(file_info.year))
cases_in_db[NORM_CASE_ID_COL] = cases_in_db[CASE_ID_COL].apply(remove_leading_zeros)
print(cases_in_db.head())

2022-11-01 13:18:46.306 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:125 - Read 12990 rows from the DB, for the hospital 'Hirslanden St. Anna' in 2018


   aimedic_id     case_id        patient_id  age_years  age_days  admission_weight  gestation_age gender admission_date grouper_admission_type discharge_date grouper_discharge_type  duration_of_stay  ventilation_hours case_id_norm
0      138498  0009264434  E9E14AD826B70BFA         59         0                 0              0      M     2018-03-14                     01     2018-03-17                     00                 3                  0      9264434
1      138499  0009268866  0EE461E7ED376814         58         0                 0              0      M     2018-01-11                     01     2018-01-16                     00                 5                  0      9268866
2      138500  0009286976  AD0FB688816E4930         49         0                 0              0      W     2018-01-10                     01     2018-01-11                     00                 1                  0      9286976
3      138501  0009306256  8AA6481F39077A85         29         0            

In [6]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

cols_to_join.remove(PATIENT_ID_COL)

# Merge cases in db with the revised cases

joined = pd.merge(df_revised_case_d2d, cases_in_db,
                  how='left',
                  on=cols_to_join,
                  suffixes=('', '_db'))

joined

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops,aimedic_id,case_id_db,patient_id_db,age_days,admission_weight,gestation_age,admission_date,grouper_admission_type,discharge_date,grouper_discharge_type,ventilation_hours
0,9358410,EB395DA315D5B285,W,79,9,9358410,M169,M169,M200,3,I46C,[D62],[D648],[],[],138741,9358410,EB395DA315D5B285,0,0,0,2018-02-01,1,2018-02-10,0,0
1,9364697,6586D083924C0E99,W,68,16,9364697,M4806,M4806,M200,3,I09C,[E1191],[E1190],[],[],138969,9364697,6586D083924C0E99,0,0,0,2018-01-14,1,2018-01-30,0,0
2,9366989,9FBF3AE4CEA4A950,M,27,16,9366989,M511,M511,M200,3,I10C,[B956],[],[],[],139157,9366989,9FBF3AE4CEA4A950,0,0,0,2017-12-25,1,2018-01-10,0,0
3,9371704,FDE409238A9BEC4F,M,65,13,9371704,I5001,I5001,M100,3,F49C,[N183],[],[],[],139683,9371704,FDE409238A9BEC4F,0,0,0,2018-01-12,1,2018-01-25,0,0
4,9375929,036F0863E37E3493,M,74,18,9375929,M4806,M4806,M200,3,I27C,[T846],[T844],[],[],140167,9375929,036F0863E37E3493,0,0,0,2018-03-27,1,2018-04-28,0,0
5,9377457,B172FAC0D995D8A0,M,77,5,9377457,I5000,I5000,M100,3,F62C,"[K746, E870]",[],[],[],140323,9377457,B172FAC0D995D8A0,0,0,0,2018-01-27,1,2018-02-01,0,0
6,9381325,E3D0603E52140F32,W,55,15,9381325,J4410,J4410,M100,3,E65B,[J9610],[],[],[],140730,9381325,E3D0603E52140F32,0,0,0,2018-02-07,6,2018-02-22,0,0
7,9382082,4ED15568FB7BDE9E,M,82,8,9382082,J100,J100,M100,3,E77E,[E870],[],[],[],140838,9382082,4ED15568FB7BDE9E,0,0,0,2018-02-12,1,2018-02-20,0,0
8,9384055,44197C8BC8569771,W,87,14,9384055,S8180,S8180,M200,3,X06B,[E871],[],[],[],141112,9384055,44197C8BC8569771,0,0,0,2018-02-19,1,2018-03-05,0,0
9,9391002,8BD23E59AF760596,M,64,14,9391002,J128,J128,M100,3,E77E,[C9000],[],[],[],141794,9391002,8BD23E59AF760596,0,0,0,2018-03-08,1,2018-03-22,0,0


In [7]:
# Print out how many rows could not be matched
unmatched = joined[joined[AIMEDIC_ID_COL].isna()]
num_unmatched = unmatched.shape[0]
logger.info(f'{num_unmatched} rows could not be matched, given {sorted(cols_to_join)}')

2022-11-01 13:18:46.369 | INFO     | __main__:<module>:4 - 0 rows could not be matched, given ['age_years', 'case_id_norm', 'duration_of_stay', 'gender']


In [8]:
#joined = df_joined.copy()

# Select cases in which the aimedic_id is not an NA
joined = joined[joined['aimedic_id'].notna()]
joined = joined.replace(np.nan, "")
joined['aimedic_id'] = joined['aimedic_id'].astype(int)

# set type of age_days, admission_weight and gestation_age to integer (to avoid float format)
joined['age_days'] = joined['age_days'].astype(int)
joined['admission_weight'] = joined['admission_weight'].astype(int)
joined['gestation_age'] = joined['gestation_age'].astype(int)

# Formatting baby data

joined["baby_data"] = joined['admission_weight'].map(str) + "|" + joined['gestation_age'].map(str)
joined["baby_data"] = joined["baby_data"].replace("0|0", "")

# Extract grouper relevant columns
joined = joined[
        GROUPER_INPUT_BFS + [NEW_PRIMARY_DIAGNOSIS_COL, ADDED_ICD_CODES, REMOVED_ICD_CODES, ADDED_CHOP_CODES,
                             REMOVED_CHOP_CODES]]

# Format admission_date and discharge_date the (SwissDRG Batchgrouper Format 2017 (YYYYMMDD)
joined['admission_date'] = joined['admission_date'].astype(str).str.replace("-", "")
joined['discharge_date'] = joined['discharge_date'].astype(str).str.replace("-", "")

In [9]:
original_revision_ids = get_earliest_revisions_for_aimedic_ids(joined[AIMEDIC_ID_COL].values.tolist())

original_cases = get_codes(original_revision_ids)

In [10]:
revised_cases = apply_revisions(original_cases, joined)

In [11]:
# PRIMARY Procedures: Retrieve Side and DATE

original_cases_primary_procedures = get_primary_procedures_codes(original_revision_ids)
primary_procedures_revised = revised_cases[["aimedic_id", "primary_procedure"]]
primary_procedures_revised = get_primary_procedures_codes_side_date(original_cases_primary_procedures, primary_procedures_revised)

In [12]:
revised_cases['primary_procedure_grouper'] = primary_procedures_revised.primary_procedure_grouper_bfs.apply(lambda case: case + "::" if len(case) <= 6 else case)

In [13]:
# SECONDARY PROCEDURES

In [14]:
original_cases_secondary_procedures = get_secondary_procedures_codes(original_revision_ids)
secondary_procedures_revised = revised_cases[["aimedic_id", "secondary_procedures"]]

In [15]:
secondary_procedures_revised = get_secondary_procedures_codes_side_date(original_cases_secondary_procedures, secondary_procedures_revised)
revised_cases = pd.merge(revised_cases, secondary_procedures_revised, on="aimedic_id", how="left")

revised_cases.rename(columns = {'secondary_procedures_grouper_bfs':'secondary_procedures_grouper'}, inplace = True)

In [16]:
revised_cases['grouper_procedures'] = revised_cases['primary_procedure_grouper'].map(str) + "|" + revised_cases[
        'secondary_procedures_grouper'].map(str)

In [17]:
# Diagnosis

revised_cases["secondary_diagnoses"] = revised_cases['secondary_diagnoses'].map(str).str.strip("[]")
revised_cases["secondary_diagnoses"] = revised_cases["secondary_diagnoses"].str.replace("'", "").str.replace(",","|").str.replace(" ", "")

revised_cases["grouper_diagnoses"] = revised_cases['primary_diagnosis'].map(str) + "|" + revised_cases[
        "secondary_diagnoses"].map(str)


In [18]:
# SAME AS BEFORE

# Extract and reorder relevant columns from BFS DB data
joined_grouper = joined[GROUPER_INPUT_BFS]

# Extract and reorder relevant columns from revised cases
revised_cases_grouper = revised_cases[GROUPER_INPUT_REVISED_CASES]

# Join revised diagnoses and procedures to cases in db:

grouper_input_data = pd.merge(joined_grouper, revised_cases_grouper, how='inner', on='aimedic_id',
                                  suffixes=('', '_db'))

# Added empty medication column (Placeholder!!)

grouper_input_data["medication"] = ""

# Format to string file for grouper with necessary ";" delimiter

grouper_input_data_string = grouper_input_data.astype(str).apply(';'.join, axis=1)

grouper_input_data_string

0     138741;79;0;;W;20180201;01;20180210;00;9;0;M169|I2513|Z950|Z955|I420|E210|M8189|E1190|E788|F328|...
1     138969;68;0;;W;20180114;01;20180130;00;16;0;M4806|G553|T844|M4786|M513|G961|I1090|D699|Y828|E119...
2     139157;27;0;;M;20171225;01;20180110;00;16;0;M511|G9780|T814|G9788|T813|G474|B353|Y849|B956;862A2...
3     139683;65;0;;M;20180112;01;20180125;00;13;0;I5001|I5019|I2519|Z955|I2522|I340|I361|I481|Z921|Z95...
4     140167;74;0;;M;20180327;01;20180428;00;18;0;M4806|G553|I1090|Y828|B957|T846;848010::20180327|030...
5     140323;77;0;;M;20180127;01;20180201;00;5;0;I5000|I425|Z950|Z921|N183|J4489|K746|E870;893909::201...
6     140730;55;0;;W;20180207;06;20180222;00;15;0;J4410|J101|F328|K219|D508|J9610;998426::20180207|948...
7     140838;82;0;;M;20180212;01;20180220;00;8;0;J100|J91|J9600|B023|I2519|I489|Z921|Z953|E870;992218:...
8     141112;87;0;;W;20180219;01;20180305;00;14;0;S8180|X599|L908|E440|F328|I1090|E871;867A9E:L:201802...
9     141794;64;0;;M;20180308;01;20180322;00;1

In [19]:
# Format joined dataset to the SwissDRG Batchgrouper 2017 Format 

#grouper_input_data_string = format_for_grouper(joined)

#grouper_input_data_string

grouper_input_data_string[17]

'143608;85;0;;M;20180429;01;20180507;00;8;0;S722|X599|Z9664|G2090|F03|F058|T509|R451|R443|R391|I519|Z880|G248|M5486|D62;793511:R:20180429|948X40::20180429|9900::20180429|99B811::20180429|883830:R:20180429|990410:B:20180429;'

In [20]:
import subprocess


example_batch_line = """0044007489;57;0;0|0;W;20190101;01;20190101;00;1;0;C20|Z432|N40|I440|I493;465110::20190101|4823::20190101|009A13::20190101;"""

# add date to procedures
# change ; with | at one spot
test_df_line = """17722;93;;;M;20180315;01;20180317;00;2;0;I7024|Z9588|N184|Z922|I743;395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::;"""

grouper_result = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupeOne",
                                grouper_input_data_string[6]
                                #test_df_line,
                                ]).decode("utf-8")


grouperResults = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupMany",
                                f'{grouper_input_data_string[1]}#{grouper_input_data_string[6]}#{grouper_input_data_string[16]}',
                                "#"
                                #test_df_line,
                                ]).decode("utf-8")

print(grouperResults)

[{"DRG" : "I06C", "MDC" : "08", "PCCL" : 4, "GST" : "NORMAL_GROUP"}, {"DRG" : "E65A", "MDC" : "04", "PCCL" : 4, "GST" : "NORMAL_GROUP"}, {"DRG" : "F18A", "MDC" : "05", "PCCL" : 4, "GST" : "NORMAL_GROUP"}]


In [21]:
# TODO:
# - Add medication 
# - Add and compare CHOP Codes for sideness and procedure date