# Revised case normalization for KSSG Linth 2019

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys

import pandas as pd
import numpy as np

from loguru import logger

sys.path.insert(0, '/home/jovyan/work')

from src.revised_case_normalization.py.global_configs import *
from src.revised_case_normalization.py.normalize import normalize, remove_leading_zeros
from src.service import bfs_cases_db_service as bfs_db
from src.service.bfs_cases_db_service import session, get_sociodemographics_for_hospital_year, get_earliest_revisions_for_aimedic_ids, get_codes, apply_revisions
from src.revised_case_normalization.py.format_for_grouper import format_for_grouper

In [2]:
file_info = FileInfo(
        os.path.join(ROOT_DIR, 'raw_data/Linth_Toggenburg_SRRWS_2019.xlsx'),
        'KSSG Linth',
        '2019',
        ['Änderungen_Spital_Linth_2019'])

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/Linth_Toggenburg_SRRWS_2019.xlsx', hospital_name_db='KSSG Linth', year='2019', sheets=['Änderungen_Spital_Linth_2019'])


In [3]:
df_revised_case_d2d = normalize(file_info, 0)

2022-10-28 10:02:58.440 | INFO     | src.revised_case_normalization.py.normalize:normalize:35 - Read 15 cases for KSSG Linth 2019
2022-10-28 10:02:58.444 | INFO     | src.revised_case_normalization.py.normalize:normalize:57 - TYPES:
tranche                                string
datum                                  string
case_id                                object
patient_id                             object
kkik                                   string
gender                                 string
age_years                               int64
bfs_code                               string
duration_of_stay                        int64
pflegetage neu                         string
pccl                                    int64
pccl neu                               string
old_pd                                 string
primary_diagnosis                      string
added_icds                             string
removed_icds                           string
added_chops                    

In [4]:
df_revised_case_d2d.head()

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,3108134918,DDB3BDB39DE3F695,M,49,7,3108134918,A46,A46,M100,3,J64C,[K746],[],[],[]
1,3108136936,1C6A9D7FAA9A7548,W,89,5,3108136936,K226,K226,M100,2,G46C,[N184],[D6833],[],[]
2,3108141419,69231F9C61534D42,M,69,10,3108141419,C183,C183,M100,2,G60B,[E43],[],[],[]
3,3150158985,29F13DDB90E1976D,W,70,2,3150158985,K922,K922,M100,3,G48C,"[D62, D684]",[D500],[],[]
4,3150164388,11721C6EB40D5BB5,M,78,5,3150164388,K250,K250,M100,2,G46C,[D62],[],[],[]


# Match revised DtoD data with the BfS data from the database

In [5]:
cases_in_db = get_sociodemographics_for_hospital_year(file_info.hospital_name_db, int(file_info.year))
cases_in_db[NORM_CASE_ID_COL] = cases_in_db[CASE_ID_COL].apply(remove_leading_zeros)
print(cases_in_db.head())

2022-10-28 10:02:59.591 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:124 - Read 5110 rows from the DB, for the hospital 'KSSG Linth' in 2019


   aimedic_id     case_id        patient_id  age_years  age_days  admission_weight  gestation_age gender admission_date grouper_admission_type discharge_date grouper_discharge_type  duration_of_stay  ventilation_hours case_id_norm
0      255491  3108130426  FA4FEB649DF59BF5         64         0                 0              0      W     2019-02-02                     01     2019-02-08                     00                 6                  0   3108130426
1      255492  3150153245  6DEC4D4214A30CBC         18         0                 0              0      W     2019-02-04                     01     2019-02-06                     00                 2                  0   3150153245
2      255493  3108130687  7E86E2BF62F384CD         20         0                 0              0      M     2019-02-08                     01     2019-02-09                     00                 1                  0   3108130687
3      255494  3108131372  6BD39AF8128666A9         62         0            

In [6]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

cols_to_join.remove(PATIENT_ID_COL)

# Merge cases in db with the revised cases

joined = pd.merge(df_revised_case_d2d, cases_in_db,
                  how='left',
                  on=cols_to_join,
                  suffixes=('', '_db'))

joined

In [None]:
sys.

In [7]:
# Print out how many rows could not be matched
unmatched = joined[joined[AIMEDIC_ID_COL].isna()]
num_unmatched = unmatched.shape[0]
logger.info(f'{num_unmatched} rows could not be matched, given {sorted(cols_to_join)}')

2022-10-28 10:02:59.621 | INFO     | __main__:<module>:4 - 0 rows could not be matched, given ['age_years', 'case_id_norm', 'duration_of_stay', 'gender']


In [8]:
# Format joined dataset to the SwissDRG Batchgrouper 2017 Format 

grouper_input_data_string = format_for_grouper(joined)

grouper_input_data_string

2022-10-28 10:03:00.200 | INFO     | src.revised_case_normalization.py.format_for_grouper:format_for_grouper:81 - Formatted 15 cases


0                 257678;49;0;;M;20190531;01;20190607;00;7;0;A46|I5014|I1100|I480|Z921|F102|E039|K746;::;
1     257737;89;0;;W;20190718;01;20190723;00;5;0;K226|K254|I480|I1190|I2519|Z954|Z950|E780|F329|M8098|...
2     260484;69;0;;M;20191023;01;20191102;00;10;0;C183|Z850|C772|C786|I1190|I2512|Z955|E789|E538|E43;4...
3     259560;70;0;;W;20190618;01;20190620;06;2;0;K922|E1190|I1090|E039|E788|R571|D62|D684;990410::|451...
4     257321;78;0;;M;20191214;01;20191219;00;5;0;K250|D6835|T810|Y849|N1781|N182|I2519|I480|Z955|Z950|...
5     257673;83;0;;M;20190626;01;20190702;00;6;0;K565|S3640|Y69|T855|Y828|R060|I2511|Z951|Z953|Z9588|E...
6     259149;77;0;;W;20191121;01;20191231;00;40;0;T818|Y849|T810|L308|I2519|Z955|E788|I5019|Z854|D62|R...
7     257691;84;0;;M;20190401;01;20190410;00;9;0;T846|Y828|B957|B968|J984|I480|Z921|N183|N40|M8098|E44...
8     258973;97;0;;W;20190719;01;20190724;00;5;0;M6285|Y349|D62|E440|E559|E538|F03|I1090|D6835|F058;83...
9     255998;70;0;;W;20190919;01;20190924;00;5

In [9]:
import subprocess


example_batch_line = """257678;49;0;;M;20190531;01;20190607;00;7;0;A46|I5014|I1100|I480|Z921|F102|E039|K746;::;"""

# add date to procedures
# change ; with | at one spot
test_df_line = """17722;93;;;M;20180315;01;20180317;00;2;0;I7024|Z9588|N184|Z922|I743;395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::;"""

grouper_result = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupeOne",
                                grouper_input_data_string[6]
                                #test_df_line,
                                ]).decode("utf-8")


grouperResults = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupMany",
                                f'{grouper_input_data_string[1]}#{grouper_input_data_string[6]}#{grouper_input_data_string[13]}',
                                "#"
                                #test_df_line,
                                ]).decode("utf-8")

print(grouperResults)

[{"DRG" : "G48C", "MDC" : "06", "PCCL" : 3, "GST" : "NORMAL_GROUP"}, {"DRG" : "X62A", "MDC" : "21B", "PCCL" : 4, "GST" : "NORMAL_GROUP"}, {"DRG" : "H63B", "MDC" : "07", "PCCL" : 4, "GST" : "NORMAL_GROUP"}]


In [10]:
# TODO:
# - Add medication 
# - Add and compare CHOP Codes for sideness and procedure date