# Revised case normalization for KSSG SRRWS 2019

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys

import pandas as pd
import numpy as np

from loguru import logger

sys.path.insert(0, '/home/jovyan/work')

from src.revised_case_normalization.py.global_configs import *
from src.revised_case_normalization.py.normalize import normalize, remove_leading_zeros
from src.service import bfs_cases_db_service as bfs_db
from src.service.bfs_cases_db_service import session, get_sociodemographics_for_hospital_year, get_earliest_revisions_for_aimedic_ids, get_codes, apply_revisions
from src.revised_case_normalization.py.format_for_grouper import format_for_grouper

In [2]:
file_info = FileInfo(
         os.path.join(ROOT_DIR, 'raw_data/USZ_2018-2019_20200730.xlsx'),
         'USZ',
         '2019',
         ['Gesamtauffällige_USZ_2019'])

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/USZ_2018-2019_20200730.xlsx', hospital_name_db='USZ', year='2019', sheets=['Gesamtauffällige_USZ_2019'])


In [3]:
df_revised_case_d2d = normalize(file_info, 0)

2022-10-28 10:05:18.636 | INFO     | src.revised_case_normalization.py.normalize:normalize:35 - Read 150 cases for USZ 2019
2022-10-28 10:05:18.639 | INFO     | src.revised_case_normalization.py.normalize:normalize:57 - TYPES:
datum/ lieferung     string
case_id              object
patient_id           object
kkik                 string
gender               string
age_years             int64
bfs_code             string
duration_of_stay      int64
pflegetage neu       string
pccl                  int64
pccl neu             string
old_pd               string
primary_diagnosis    string
added_icds           string
removed_icds         string
added_chops          string
removed_chops        string
drg                  string
drg neu              string
cw alt               string
cw neu               string
cw-änderung          string
kommentar            string
case_id_norm         object
dtype: object
2022-10-28 10:05:18.651 | INFO     | src.utils.dataframe_utils:validate_icd_codes:68 - 

In [4]:
df_revised_case_d2d.head()

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,6400282213,3ED8F10117FCF5F5,M,54,13,6400282213,J4400,J4400,M200,3,E06A,[F058],[],[],[]
1,6400287076,55C5A17F71DADEA0,M,62,5,6400287076,J068,J068,M800,3,D62C,"[I8028, C770, C787]",[],[],[]
2,6400306817,0150AC67E3196B63,W,74,36,6400306817,R11,M8088,M100,3,G72C,[R64],[],[],[]
3,6400316927,153929A139272585,W,79,6,6400316927,G3531,G3531,M100,3,B68C,[R471],[],[],[]
4,6400278895,014A89BB816D9215,W,71,44,6400278895,T827,T827,M200,3,F75D,[D508],[D648],[],[]


# Match revised DtoD data with the BfS data from the database

In [5]:
cases_in_db = get_sociodemographics_for_hospital_year(file_info.hospital_name_db, int(file_info.year))
cases_in_db[NORM_CASE_ID_COL] = cases_in_db[CASE_ID_COL].apply(remove_leading_zeros)
print(cases_in_db.head())

2022-10-28 10:05:20.259 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:124 - Read 14430 rows from the DB, for the hospital 'USZ' in 2019


   aimedic_id     case_id        patient_id  age_years  age_days  admission_weight  gestation_age gender admission_date grouper_admission_type discharge_date grouper_discharge_type  duration_of_stay  ventilation_hours case_id_norm
0      564429  6400338029  E4F4ECE39AB53C9C         56         0                 0              0      M     2019-12-03                     01     2019-12-07                     00                 4                  0   6400338029
1      564430  6400331968  3CF54273AF86873D         67         0                 0              0      W     2019-12-30                     01     2019-12-31                     00                 1                  0   6400331968
2      564431  6400340252  26A4B9A2ED096E0B         33         0                 0              0      W     2019-09-09                     01     2019-09-12                     00                 3                  0   6400340252
3      564432  6400338455  062551800ED625B7         66         0            

In [6]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

cols_to_join.remove(PATIENT_ID_COL)

# Merge cases in db with the revised cases

joined = pd.merge(df_revised_case_d2d, cases_in_db,
                  how='left',
                  on=cols_to_join,
                  suffixes=('', '_db'))

In [7]:
# Print out how many rows could not be matched
unmatched = joined[joined[AIMEDIC_ID_COL].isna()]
num_unmatched = unmatched.shape[0]
logger.info(f'{num_unmatched} rows could not be matched, given {sorted(cols_to_join)}')

2022-10-28 10:05:20.286 | INFO     | __main__:<module>:4 - 76 rows could not be matched, given ['age_years', 'case_id_norm', 'duration_of_stay', 'gender']


In [8]:
# Format joined dataset to the SwissDRG Batchgrouper 2017 Format 

grouper_input_data_string = format_for_grouper(joined)

grouper_input_data_string

2022-10-28 10:05:21.087 | INFO     | src.revised_case_normalization.py.format_for_grouper:format_for_grouper:80 - Formatted 72 cases


0     567005;52;0.0;0.0|0.0;M;20191111;01;20191118;00;7;0.0;K432|T8611|Z940|D90|I2513|I1000|E784|Z855|...
1     567244;61;0.0;0.0|0.0;W;20191023;06;20191028;00;5;0.0;C712|G941|G936|G935|G819;015910::|012421::...
2     571629;81;0.0;0.0|0.0;W;20191102;06;20191113;00;11;0.0;G4533|U5010|I724|D62|I725|R31|I2511|Z955|...
3     569583;67;0.0;0.0|0.0;M;20191113;01;20191127;00;14;0.0;C679|C778|N132|E1191|S366|Y69|E871;577110...
4     568184;48;0.0;0.0|0.0;M;20191110;01;20191120;00;10;0.0;I7022|Z9588|Z921|T823|I652|I2511|I2522|Z9...
                                                     ...                                                 
67    574732;44;0.0;0.0|0.0;M;20190918;01;20190926;00;8;0.0;M511|G551|M9953|G573|B182|Z21|F4541|F321|U...
68    575252;56;0.0;0.0|0.0;M;20190923;01;20191025;00;32;0.0;C9000|G832|R202|M8955|M5413|Z857|I420|L30...
69    572959;90;0.0;0.0|0.0;M;20190903;01;20191003;00;30;0.0;K550|K631|K5730|Z9588|I2512|I2522|Z955|I1...
70    564903;84;0.0;0.0|0.0;M;20190810;01;2019

In [9]:
import subprocess


example_batch_line = """257678;49;0;;M;20190531;01;20190607;00;7;0;A46|I5014|I1100|I480|Z921|F102|E039|K746;::;"""

# add date to procedures
# change ; with | at one spot
test_df_line = """17722;93;;;M;20180315;01;20180317;00;2;0;I7024|Z9588|N184|Z922|I743;395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::;"""

grouper_result = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupeOne",
                                grouper_input_data_string[6]
                                #test_df_line,
                                ]).decode("utf-8")


grouperResults = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupMany",
                                f'{grouper_input_data_string[1]}#{grouper_input_data_string[6]}#{grouper_input_data_string[13]}',
                                "#"
                                #test_df_line,
                                ]).decode("utf-8")

print(grouperResults)

[{"DRG" : "960Z", "MDC" : "01", "PCCL" : 0, "GST" : "INVALID_AGE"}, {"DRG" : "960Z", "MDC" : "04", "PCCL" : 0, "GST" : "INVALID_AGE"}, {"DRG" : "960Z", "MDC" : "11", "PCCL" : 0, "GST" : "INVALID_AGE"}]


In [10]:
# TODO:
# - Add medication 
# - Add and compare CHOP Codes for sideness and procedure date