# Revised case normalization for KSSG SRRWS 2019

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys

import pandas as pd
import numpy as np

from loguru import logger

sys.path.insert(0, '/home/jovyan/work')

from src.revised_case_normalization.py.global_configs import *
from src.revised_case_normalization.py.normalize import normalize, remove_leading_zeros
from src.service import bfs_cases_db_service as bfs_db
from src.service.bfs_cases_db_service import session, get_sociodemographics_for_hospital_year, get_earliest_revisions_for_aimedic_ids, get_codes, apply_revisions
from src.revised_case_normalization.py.format_for_grouper import format_for_grouper

In [2]:
file_info = FileInfo(
         os.path.join(ROOT_DIR, 'raw_data/Linth_Toggenburg_SRRWS_2019.xlsx'),
         'KSSG SRRWS',
         '2019',
         ['Änderungen SRRWS_2019'])

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/Linth_Toggenburg_SRRWS_2019.xlsx', hospital_name_db='KSSG SRRWS', year='2019', sheets=['Änderungen SRRWS_2019'])


In [3]:
df_revised_case_d2d = normalize(file_info, 0)

2022-10-28 12:30:17.304 | INFO     | src.revised_case_normalization.py.normalize:normalize:35 - Read 48 cases for KSSG SRRWS 2019
2022-10-28 12:30:17.307 | INFO     | src.revised_case_normalization.py.normalize:normalize:57 - TYPES:
tranche                   string
case_id                   object
patient_id                object
kkik                      string
gender                    string
age_years                  int64
bfs_code                  string
duration_of_stay           int64
pflegetage neu            string
pccl                       int64
pccl neu                  string
old_pd                    string
primary_diagnosis         string
added_icds                string
removed_icds              string
added_chops               string
removed_chops             string
drg                       string
drg neu                   string
cw alt                    string
cw neu                    string
cw-änderung möglich       string
cw änderung akzeptiert    string
kommenta

In [4]:
df_revised_case_d2d.head()

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,2010216467,971401A627916522,W,67,16,2010216467,M7285,M7285,M200,2,I27D,"[D62, L0311]",[D648],[],[]
1,2010230894,0FD7D457C22B4ECD,M,75,10,2010230894,S510,S510,M100,3,X06B,[R18],[],[],[]
2,2010233003,D6D96CE0A68739FB,W,70,14,2010233003,K432,K432,M200,3,G21B,[D62],[D649],[],[]
3,2010234179,FB8869E03D85B3F5,M,59,7,2010234179,K868,K868,M100,3,H62B,[I5014],[I509],[],[]
4,2070263179,A8239CB0553D4656,W,79,8,2070263179,I441,I441,M100,3,F12D,[J9580],[],[],[]


# Match revised DtoD data with the BfS data from the database

In [5]:
cases_in_db = get_sociodemographics_for_hospital_year(file_info.hospital_name_db, int(file_info.year))
cases_in_db[NORM_CASE_ID_COL] = cases_in_db[CASE_ID_COL].apply(remove_leading_zeros)
print(cases_in_db.head())

cases_in_db.dtypes

2022-10-28 12:30:18.450 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:124 - Read 15005 rows from the DB, for the hospital 'KSSG SRRWS' in 2019


   aimedic_id     case_id        patient_id  age_years  age_days  admission_weight  gestation_age gender admission_date grouper_admission_type discharge_date grouper_discharge_type  duration_of_stay  ventilation_hours case_id_norm
0      297259  2070262755  8B44BDC832C5451F         65         0                 0              0      M     2019-01-09                     01     2019-01-12                     00                 3                  0   2070262755
1      297260  2010215646  81890D5F20771E3C         80         0                 0              0      W     2019-01-09                     01     2019-02-04                     00                26                  0   2010215646
2      297261  2070264919  E6441AAE25BA49E6         82         0                 0              0      W     2019-02-01                     01     2019-02-05                     00                 4                  0   2070264919
3      297262  2070266822  2EAC70CD7BFA4C05         62         0            

aimedic_id                 int64
case_id                   object
patient_id                object
age_years                  int64
age_days                   int64
admission_weight           int64
gestation_age              int64
gender                    object
admission_date            object
grouper_admission_type    object
discharge_date            object
grouper_discharge_type    object
duration_of_stay           int64
ventilation_hours          int64
case_id_norm              object
dtype: object

In [9]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

cols_to_join.remove(PATIENT_ID_COL)

# Merge cases in db with the revised cases

joined = pd.merge(df_revised_case_d2d, cases_in_db,
                  how='left',
                  on=cols_to_join,
                  suffixes=('', '_db'))


aimedic_id                 int64
case_id                   object
patient_id                object
age_years                  int64
age_days                   int64
admission_weight           int64
gestation_age              int64
gender                    object
admission_date            object
grouper_admission_type    object
discharge_date            object
grouper_discharge_type    object
duration_of_stay           int64
ventilation_hours          int64
case_id_norm              object
dtype: object

In [None]:
# Print out how many rows could not be matched
unmatched = joined[joined[AIMEDIC_ID_COL].isna()]
num_unmatched = unmatched.shape[0]
logger.info(f'{num_unmatched} rows could not be matched, given {sorted(cols_to_join)}')


In [None]:
# Format joined dataset to the SwissDRG Batchgrouper 2017 Format 

grouper_input_data_string = format_for_grouper(joined)


grouper_input_data_string

In [None]:
import subprocess


example_batch_line = """257678;49;0;;M;20190531;01;20190607;00;7;0;A46|I5014|I1100|I480|Z921|F102|E039|K746;::;"""

# add date to procedures
# change ; with | at one spot
test_df_line = """17722;93;;;M;20180315;01;20180317;00;2;0;I7024|Z9588|N184|Z922|I743;395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::;"""

grouper_result = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupeOne",
                                grouper_input_data_string[6]
                                #test_df_line,
                                ]).decode("utf-8")


grouperResults = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupMany",
                                f'{grouper_input_data_string[1]}#{grouper_input_data_string[6]}#{grouper_input_data_string[13]}',
                                "#"
                                #test_df_line,
                                ]).decode("utf-8")

print(grouperResults)

In [None]:
# TODO:
# - Add medication 
# - Add and compare CHOP Codes for sideness and procedure date