# Revised case normalization for St. Anna 2019

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys

import pandas as pd
import numpy as np

from loguru import logger

sys.path.insert(0, '/home/jovyan/work')

from src.revised_case_normalization.py.global_configs import *
from src.revised_case_normalization.py.normalize import normalize, remove_leading_zeros
from src.service import bfs_cases_db_service as bfs_db
from src.service.bfs_cases_db_service import session, get_sociodemographics_for_hospital_year, get_earliest_revisions_for_aimedic_ids, get_codes, apply_revisions
#from src.revised_case_normalization.py.format_for_grouper import format_for_grouper

In [2]:
file_info = FileInfo(
        os.path.join(ROOT_DIR, 'raw_data/HI_Aarau_Birshof_ST. Anna.xlsx'),
        'Hirslanden St. Anna',
        '2018',
        ['KOPIE_Änderungen_ST. Anna_2018'])

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/HI_Aarau_Birshof_ST. Anna.xlsx', hospital_name_db='Hirslanden St. Anna', year='2018', sheets=['KOPIE_Änderungen_ST. Anna_2018'])


In [3]:
df_revised_case_d2d = normalize(file_info, 0)

2022-11-02 14:21:57.388 | INFO     | src.revised_case_normalization.py.normalize:normalize:35 - Read 51 cases for Hirslanden St. Anna 2018
2022-11-02 14:21:57.395 | INFO     | src.revised_case_normalization.py.normalize:normalize:57 - TYPES:
tranche                   string
datum                     string
case_id                   object
patient_id                object
gender                    string
age_years                  int64
bfs_code                  string
duration_of_stay           int64
pflegetage neu            string
pccl                       int64
pccl neu                  string
old_pd                    string
primary_diagnosis         string
added_icds                string
removed_icds              string
added_chops               string
removed_chops             string
drg                       string
drg neu                   string
cw alt                    string
cw neu                    string
cw-änderung möglich       string
cw änderung akzeptiert    string

In [4]:
df_revised_case_d2d

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,9358410,EB395DA315D5B285,W,79,9,9358410,M169,M169,M200,3,I46C,[D62],[D648],[],[]
1,9364697,6586D083924C0E99,W,68,16,9364697,M4806,M4806,M200,3,I09C,[E1191],[E1190],[],[]
2,9366989,9FBF3AE4CEA4A950,M,27,16,9366989,M511,M511,M200,3,I10C,[B956],[],[],[]
3,9371704,FDE409238A9BEC4F,M,65,13,9371704,I5001,I5001,M100,3,F49C,[N183],[],[],[]
4,9375929,036F0863E37E3493,M,74,18,9375929,M4806,M4806,M200,3,I27C,[T846],[T844],[],[]
5,9377457,B172FAC0D995D8A0,M,77,5,9377457,I5000,I5000,M100,3,F62C,"[K746, E870]",[],[],[]
6,9381325,E3D0603E52140F32,W,55,15,9381325,J4410,J4410,M100,3,E65B,[J9610],[],[],[]
7,9382082,4ED15568FB7BDE9E,M,82,8,9382082,J100,J100,M100,3,E77E,[E870],[],[],[]
8,9384055,44197C8BC8569771,W,87,14,9384055,S8180,S8180,M200,3,X06B,[E871],[],[],[]
9,9391002,8BD23E59AF760596,M,64,14,9391002,J128,J128,M100,3,E77E,[C9000],[],[],[]


# Match revised DtoD data with the BfS data from the database

In [5]:
cases_in_db = get_sociodemographics_for_hospital_year(file_info.hospital_name_db, int(file_info.year))
cases_in_db[NORM_CASE_ID_COL] = cases_in_db[CASE_ID_COL].apply(remove_leading_zeros)
print(cases_in_db.head())

cases_in_db.dtypes

2022-11-02 14:21:59.026 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:125 - Read 12990 rows from the DB, for the hospital 'Hirslanden St. Anna' in 2018


   aimedic_id     case_id        patient_id  age_years  age_days  admission_weight  gestation_age gender admission_date grouper_admission_type discharge_date grouper_discharge_type  duration_of_stay  ventilation_hours case_id_norm
0      138498  0009264434  E9E14AD826B70BFA         59         0                 0              0      M     2018-03-14                     01     2018-03-17                     00                 3                  0      9264434
1      138499  0009268866  0EE461E7ED376814         58         0                 0              0      M     2018-01-11                     01     2018-01-16                     00                 5                  0      9268866
2      138500  0009286976  AD0FB688816E4930         49         0                 0              0      W     2018-01-10                     01     2018-01-11                     00                 1                  0      9286976
3      138501  0009306256  8AA6481F39077A85         29         0            

aimedic_id                 int64
case_id                   object
patient_id                object
age_years                  int64
age_days                   int64
admission_weight           int64
gestation_age              int64
gender                    object
admission_date            object
grouper_admission_type    object
discharge_date            object
grouper_discharge_type    object
duration_of_stay           int64
ventilation_hours          int64
case_id_norm              object
dtype: object

In [6]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

cols_to_join.remove(PATIENT_ID_COL)

# Merge cases in db with the revised cases

joined = pd.merge(df_revised_case_d2d, cases_in_db,
                  how='left',
                  on=cols_to_join,
                  suffixes=('', '_db'))

In [7]:

# Select cases in which the aimedic_id is not an NA
joined = joined[joined['aimedic_id'].notna()]
joined = joined.replace(np.nan, "")
joined['aimedic_id'] = joined['aimedic_id'].astype(int)

# set type of age_days, admission_weight and gestation_age to integer (to avoid float format)
joined['age_days'] = joined['age_days'].astype(int)
joined['admission_weight'] = joined['admission_weight'].astype(int)
joined['gestation_age'] = joined['gestation_age'].astype(int)

# Formatting baby data

joined["baby_data"] = joined['admission_weight'].map(str) + "|" + joined['gestation_age'].map(str)
joined["baby_data"] = joined["baby_data"].replace("0|0", "")

# Extract grouper relevant columns
joined = joined[
        GROUPER_INPUT_BFS + [NEW_PRIMARY_DIAGNOSIS_COL, ADDED_ICD_CODES, REMOVED_ICD_CODES, ADDED_CHOP_CODES,
                             REMOVED_CHOP_CODES]]

# Format admission_date and discharge_date the (SwissDRG Batchgrouper Format 2017 (YYYYMMDD)
joined['admission_date'] = joined['admission_date'].astype(str).str.replace("-", "")
joined['discharge_date'] = joined['discharge_date'].astype(str).str.replace("-", "")

original_revision_ids = get_earliest_revisions_for_aimedic_ids(joined[AIMEDIC_ID_COL].values.tolist())
original_cases = get_codes(original_revision_ids)


In [8]:
## apply_revisions function

joined = pd.merge(original_cases, joined, on=AIMEDIC_ID_COL, how='left')

joined['secondary_procedures_split'] = joined['secondary_procedures'].map(str).str.replace("'","") 

def _split_chop_codes(codes_list: list[str]) -> list[list[str]]:
    """From a list of CHOP codes, which are formatted as '<code>:<side>:<date>', split them into their components.

    @param codes_list: The list of CHOP codes.
    @return: A list of the info for each code, split into strings.
    """
    return [code_with_colons.split(':') for code_with_colons in codes_list]

joined['secondary_procedures_split'] = _split_chop_codes(joined['secondary_procedures_split'])

joined

Unnamed: 0,aimedic_id,revision_id,old_pd,secondary_diagnoses,primary_procedure,secondary_procedures,age_years,age_days,baby_data,gender,admission_date,grouper_admission_type,discharge_date,grouper_discharge_type,duration_of_stay,ventilation_hours,primary_diagnosis,added_icds,removed_icds,added_chops,removed_chops,secondary_procedures_split
0,138741,138741,M169,"[I2513, Z950, Z955, I420, E210, ...]",815111:L:20180201,"[009940::20180201, 009960::20180201, 9900::20180201, 948X40::20180202, 990410::20180207]",79,0,,W,20180201,1,20180210,0,9,0,M169,[D62],[D648],[],[],"[[009940, , 20180201, 009960, , 20180201, 9900, ...]"
1,138969,138969,M4806,"[G553, T844, M4786, M513, G961, ...]",8138::20180115,"[816399::20180115, 030935::20180115, 036::20180115, 030991::20180115, 845110::20180115, ...]",68,0,,W,20180114,1,20180130,0,16,0,M4806,[E1191],[E1190],[],[],"[[816399, , 20180115, 030935, , 20180115, 036, ...]"
2,139157,139157,M511,"[G9780, T814, G9788, T813, G474, ...]",862A2E::20171225,"[035913::20171225, 030934::20171225, 848199::20171225, 99221A::20171225, 99B812::20171225, ...]",27,0,,M,20171225,1,20180110,0,16,0,M511,[B956],[],[],[],"[[035913, , 20171225, 030934, , 20171225, 848199, ...]"
3,139683,139683,I5001,"[I5019, I2519, Z955, I2522, I340, ...]",8856::20180122,"[887221::20180118, 3723::20180122, 8854::20180122, 393013:R:20180122]",65,0,,M,20180112,1,20180125,0,13,0,I5001,[N183],[],[],[],"[[887221, , 20180118, 3723, , 20180122, 8854, ...]"
4,140167,140167,M4806,"[G553, I1090, T844, Y828, B957]",848010::20180327,"[030991::20180327, 030934::20180327, 009920::20180327, 862A2E::20180413, 832121::20180413, ...]",74,0,,M,20180327,1,20180428,0,18,0,M4806,[T846],[T844],[],[],"[[030991, , 20180327, 030934, , 20180327, 009920, ...]"
5,140323,140323,I5000,"[I425, Z950, Z921, N183, J4489]",893909::20180127,[],77,0,,M,20180127,1,20180201,0,5,0,I5000,"[K746, E870]",[],[],[],[[]]
6,140730,140730,J4410,"[J101, F328, K219, D508]",998426::20180207,"[948X40::20180212, 992909::20180212]",55,0,,W,20180207,6,20180222,0,15,0,J4410,[J9610],[],[],[],"[[948X40, , 20180212, 992909, , 20180212]]"
7,140838,140838,J100,"[J91, J9600, B023, I2519, I489, ...]",992218::20180212,[998425::20180212],82,0,,M,20180212,1,20180220,0,8,0,J100,[E870],[],[],[],"[[998425, , 20180212]]"
8,141112,141112,S8180,"[X599, L908, E440, F328, I1090]",867A9E:L:20180219,"[9938::20180219, 890A32::20180223]",87,0,,W,20180219,1,20180305,0,14,0,S8180,[E871],[],[],[],"[[9938, , 20180219, 890A32, , 20180223]]"
9,141794,141794,J128,"[J9600, K528, Y579, E871, G6288, ...]",998425::20180308,"[992217::20180309, 890A11::20180314]",64,0,,M,20180308,1,20180322,0,14,0,J128,[C9000],[],[],[],"[[992217, , 20180309, 890A11, , 20180314]]"


In [9]:
sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
# review apply_revisions Function

joined = pd.merge(original_cases, joined, on=AIMEDIC_ID_COL, how='left')

joined.head()


#revised_codes = list(joined[SECONDARY_PROCEDURES_COL])
#revised_codes
#        for code_to_add in joined[ADDED_CHOP_CODES]:
#            revised_codes.append(code_to_add)

#        for code_to_remove in joined[REMOVED_CHOP_CODES]:
            # We need to check whether the code is present in this list, because it may appear as primary procedure
#            if code_to_remove in revised_codes:
#                revised_codes.remove(code_to_remove)

#        joined[SECONDARY_PROCEDURES_COL] = revised_codes
#        return joined
    
#joined = joined.apply(revise_secondary_procedure_codes, axis=1)
#revised_cases = joined[[AIMEDIC_ID_COL, NEW_PRIMARY_DIAGNOSIS_COL, SECONDARY_DIAGNOSES_COL, PRIMARY_PROCEDURE_COL, SECONDARY_PROCEDURES_COL]]

In [None]:
sys.exit()

In [None]:
# Print out how many rows could not be matched
unmatched = joined[joined[AIMEDIC_ID_COL].isna()]
num_unmatched = unmatched.shape[0]
logger.info(f'{num_unmatched} rows could not be matched, given {sorted(cols_to_join)}')


In [None]:
# Format joined dataset to the SwissDRG Batchgrouper 2017 Format 

grouper_input_data_string = format_for_grouper(joined)


grouper_input_data_string[17]

In [None]:
import subprocess


example_batch_line = """257678;49;0;;M;20190531;01;20190607;00;7;0;A46|I5014|I1100|I480|Z921|F102|E039|K746;::;"""

# add date to procedures
# change ; with | at one spot
test_df_line = """17722;93;;;M;20180315;01;20180317;00;2;0;I7024|Z9588|N184|Z922|I743;395011::|397511::|395021::|397510::|0042::|004B18::|004B1A::|884911::|005599::|0046::|393012::;"""

grouper_result = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupeOne",
                                grouper_input_data_string[6]
                                #test_df_line,
                                ]).decode("utf-8")


grouperResults = subprocess.check_output(["java",
                                "-cp",
                                "/home/jovyan/work/resources/aimedic-grouper-assembly-0.0.0-SNAPSHOT.jar",
                                "ch.aimedic.grouper.BatchGroupMany",
                                f'{grouper_input_data_string[17]}#{grouper_input_data_string[6]}#{grouper_input_data_string[13]}',
                                "#"
                                #test_df_line,
                                ]).decode("utf-8")

print(grouperResults)

In [None]:
# TODO:
# - Add medication 
# - Add and compare CHOP Codes for sideness and procedure date