# Revised case normalization for USZ 2019

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys
sys.path.insert(0, '/home/jovyan/work')
from loguru import logger
import pandas as pd
from src.service.aimedic_grouper import group_batch_group_cases

from src.revised_case_normalization.notebook_functions.global_configs import *
from src.revised_case_normalization.notebook_functions.normalize import normalize
from src.revised_case_normalization.notebook_functions.revise import revise
from src.revised_case_normalization.notebook_functions.group import group
from src.revised_case_normalization.notebook_functions.update_db import update_db

In [2]:
file_info = FileInfo(os.path.join(ROOT_DIR, 'raw_data/USZ_2018-2019_20200730.xlsx'),
                     'USZ', '2019', 'Gesamtauffällige_USZ_2019')

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/USZ_2018-2019_20200730.xlsx', hospital_name_db='USZ', year='2019', sheet='Gesamtauffällige_USZ_2019')


In [3]:
revised_cases_df = normalize(file_info)
revised_cases_df

2022-11-09 16:54:54.498 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:35 - Read 150 cases for USZ 2019
2022-11-09 16:54:54.502 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:59 - TYPES:
datum/ lieferung     string
case_id              object
patient_id           object
kkik                 string
gender               string
age_years             int64
bfs_code             string
duration_of_stay      int64
pflegetage neu       string
pccl                  int64
pccl neu             string
old_pd               string
primary_diagnosis    string
added_icds           string
removed_icds         string
added_chops          string
removed_chops        string
drg                  string
drg neu              string
cw alt               string
cw neu               string
cw-änderung          string
kommentar            string
case_id_norm         object
dtype: object
2022-11-09 16:54:54.513 | INFO     | src.utils.datafram

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,6400282213,3ED8F10117FCF5F5,M,54,13,6400282213,J4400,J4400,M200,3,E06A,[F058],[],[],[]
1,6400287076,55C5A17F71DADEA0,M,62,5,6400287076,J068,J068,M800,3,D62C,"[I8028, C770, C787]",[],[],[]
2,6400306817,0150AC67E3196B63,W,74,36,6400306817,R11,M8088,M100,3,G72C,[R64],[],[],[]
3,6400316927,153929A139272585,W,79,6,6400316927,G3531,G3531,M100,3,B68C,[R471],[],[],[]
4,6400278895,014A89BB816D9215,W,71,44,6400278895,T827,T827,M200,3,F75D,[D508],[D648],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,6400305282,B2EFB1C7F864D6BE,M,54,4,6400305282,I1001,R522,M100,2,F67B,[],[],[],[]
146,6400306506,2BD4038A71FFAFCE,M,51,7,6400306506,C61,A410,M200,2,M60B,"[R651, C795, C774, N184]",[],[],[]
147,6400307264,45BD6CAFB4B36938,W,84,14,6400307264,L8924,L8924,M200,2,J02B,[B965],[],[],[]
148,6400308115,CC100ED2DB478B7E,M,46,8,6400308115,K830,K830,M100,2,H64B,[K831],[],[5185::20190610],[]


# Match revised DtoD data with the BfS data from the database

In [4]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)
# Ignore the patient ID in this dataset, because it is encrypted
cols_to_join.remove(PATIENT_ID_COL)

revised_cases, unmatched = revise(file_info, revised_cases_df, validation_cols=cols_to_join)
revised_cases.head()

2022-11-09 16:54:57.739 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:90 - Read 14430 rows from the DB, for the hospital 'USZ' in 2019


Unnamed: 0,aimedic_id,case_id,primary_diagnosis,secondary_diagnoses,primary_procedure,secondary_procedures,gender,age_years,age_days,gestation_age,duration_of_stay,ventilation_hours,grouper_admission_type,admission_date,admission_weight,grouper_discharge_type,discharge_date
0,340174,6400339053,K768,"[E854, D62, E875, D638, N1783, ...]",502311::20191115,"[99B812::20191116, 99C121::20191125, 990410::20191115, 8847::20191114, 880110::20191114, ...]",W,76,0,0,13,0,6,2019-11-14,0,0,2019-11-27
1,340178,6400340009,J702,"[C342, Z9680, J459, E891, B965]",874199::20191225,[998425::20191227],M,77,0,0,5,0,1,2019-12-24,0,0,2019-12-29
2,340424,6400343280,J690,"[F051, F03, R33, G408, E038, ...]",99C124::20191129,[9915::20191129],M,84,0,0,8,0,1,2019-11-22,0,7,2019-11-30
3,340455,6400343130,J101,"[D7011, T8609, Z9480, Y849, C9200]",998426::20191119,[],M,56,0,0,7,0,1,2019-11-18,0,0,2019-11-25
4,340641,6400314953,M5416,"[R33, G6288, Z4500, E1191, I1190, ...]",99B810::20190810,"[99C122::20190822, 8703::20190810, 883850::20190810, 883840::20190812, 891520::20190813, ...]",M,84,0,0,13,0,1,2019-08-10,0,0,2019-08-23


In [5]:
if unmatched.shape[0] > 0:
    unmatched

In [6]:
def format_for_grouper_one_case(row: pd.Series) -> pd.Series:

    """This function formats a single case for the SwissDRG grouper and is applied to the previously generated dataframes
       in function 'format_for_grouper'.
       Documentation on the grouper format: https://grouper-docs.swissdrg.org/batchgrouper2017-format.html

       @return: A series of a single revised case in the SwissDRG grouper format 2017.
       """

    aimedic_id = int(row[AIMEDIC_ID_COL])
    case_id = int(row[CASE_ID_COL])

    age_years = int(row[AGE_COL])
    age_days = int(row[AGE_DAYS_COL])
    admission_weight = int(row[ADMISSION_WEIGHT_COL])
    gestation_age = int(row[GESTATION_AGE_COL])

    duration_of_stay = int(row[DURATION_OF_STAY_COL])
    ventilation_hours = int(row[VENTILATION_HOURS_COL])

    if admission_weight == 0 and gestation_age == 0:
        baby_data = ''
    else:
        baby_data = f'{admission_weight}|{gestation_age}'

    gender = row[GENDER_COL]

    admission_date = str(row[ADMISSION_DATE_COL]).replace("-", "")
    admission_type = row[ADMISSION_TYPE_COL]
    discharge_date = str(row[DISCHARGE_DATE_COL]).replace("-", "")
    discharge_type = row[DISCHARGE_TYPE_COL]

    primary_procedure = row[PRIMARY_PROCEDURE_COL]
    secondary_procedures = '|'.join(row[SECONDARY_PROCEDURES_COL])
    procedures = f'{primary_procedure}|{secondary_procedures}'

    primary_diagnosis = str(row[NEW_PRIMARY_DIAGNOSIS_COL])
    secondary_diagnoses = '|'.join(row[SECONDARY_DIAGNOSES_COL])
    diagnoses = f'{primary_diagnosis}|{secondary_diagnoses}'

    medications = ''

    row[GROUPER_FORMAT_COL] = ';'.join([str(aimedic_id), str(case_id), str(age_years), str(age_days), baby_data, gender,
                                        admission_date, admission_type, discharge_date, discharge_type,
                                        str(duration_of_stay), str(ventilation_hours),
                                        diagnoses, procedures, medications])
    return row


In [7]:

def format_for_grouper(revised_cases: pd.DataFrame) -> pd.DataFrame:

    """Given the previously generated dataframes, this function formats the datasets for the SwissDRG grouper.

    @return: A string of the revised cases in the SwissDRG grouper format 2017
    """
    revised_cases_formatted = revised_cases.apply(format_for_grouper_one_case, axis=1)
    return revised_cases_formatted

In [8]:
grouper_data = format_for_grouper(revised_cases)
grouper_data

Unnamed: 0,aimedic_id,case_id,primary_diagnosis,secondary_diagnoses,primary_procedure,secondary_procedures,gender,age_years,age_days,gestation_age,duration_of_stay,ventilation_hours,grouper_admission_type,admission_date,admission_weight,grouper_discharge_type,discharge_date,batchgrouper_format
0,340174,6400339053,K768,"[E854, D62, E875, D638, N1783, ...]",502311::20191115,"[99B812::20191116, 99C121::20191125, 990410::20191115, 8847::20191114, 880110::20191114, ...]",W,76,0,0,13,0,06,2019-11-14,0,00,2019-11-27,340174;6400339053;76;0;;W;20191114;06;20191127;00;13;0;K768|E854|D62|E875|D638|N1783|N184|I431|N...
1,340178,6400340009,J702,"[C342, Z9680, J459, E891, B965]",874199::20191225,[998425::20191227],M,77,0,0,5,0,01,2019-12-24,0,00,2019-12-29,340178;6400340009;77;0;;M;20191224;01;20191229;00;5;0;J702|C342|Z9680|J459|E891|B965;874199::201...
2,340424,6400343280,J690,"[F051, F03, R33, G408, E038, ...]",99C124::20191129,[9915::20191129],M,84,0,0,8,0,01,2019-11-22,0,07,2019-11-30,340424;6400343280;84;0;;M;20191122;01;20191130;07;8;0;J690|F051|F03|R33|G408|E038|F258|R139|J960...
3,340455,6400343130,J101,"[D7011, T8609, Z9480, Y849, C9200]",998426::20191119,[],M,56,0,0,7,0,01,2019-11-18,0,00,2019-11-25,340455;6400343130;56;0;;M;20191118;01;20191125;00;7;0;J101|D7011|T8609|Z9480|Y849|C9200;998426::...
4,340641,6400314953,M5416,"[R33, G6288, Z4500, E1191, I1190, ...]",99B810::20190810,"[99C122::20190822, 8703::20190810, 883850::20190810, 883840::20190812, 891520::20190813, ...]",M,84,0,0,13,0,01,2019-08-10,0,00,2019-08-23,340641;6400314953;84;0;;M;20190810;01;20190823;00;13;0;M5416|R33|G6288|Z4500|E1191|I1190|I489|Z9...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,353878,6400332602,E871,"[I440, I447, I471, E784, E871, ...]",887211::20190906,[891499::20190904],W,91,0,0,5,0,01,2019-09-04,0,00,2019-09-09,353878;6400332602;91;0;;W;20190904;01;20190909;00;5;0;E871|I440|I447|I471|E784|E871|I5012|I361|Z...
68,354152,6400339976,J690,"[R130, I693, R53, Z921, E43, ...]",99C11D::20191226,[966::20191226],M,91,0,0,5,0,01,2019-12-22,0,00,2019-12-27,354152;6400339976;91;0;;M;20191222;01;20191227;00;5;0;J690|R130|I693|R53|Z921|E43|L8907|L8914|F0...
69,354259,6400331761,K703,"[K766, I1000, I982, F102, R18]",890714::20191227,"[451310::20191224, 4523::20191224, 887211::20191224, 887214::20191224, 874199::20191223, ...]",W,72,0,0,10,0,11,2019-12-20,0,00,2019-12-30,354259;6400331761;72;0;;W;20191220;11;20191230;00;10;0;K703|K766|I1000|I982|F102|R18;890714::201...
70,354430,6400325852,C530,"[T835, E876, R073, Y828, B962, ...]",6862::20191030,"[99C11D::20191111, 948X41::20191119, 877610::20191113, 5732::20191113, 8993::20191112, ...]",W,62,0,0,23,0,01,2019-10-29,0,00,2019-11-21,354430;6400325852;62;0;;W;20191029;01;20191121;00;23;0;C530|T835|E876|R073|Y828|B962|U8121|C7982...


In [9]:
batchgrouper = grouper_data['batchgrouper_format']
print(batchgrouper[0].count(";"))

14


In [10]:
batchgrouper.head()

0    340174;6400339053;76;0;;W;20191114;06;20191127;00;13;0;K768|E854|D62|E875|D638|N1783|N184|I431|N...
1    340178;6400340009;77;0;;M;20191224;01;20191229;00;5;0;J702|C342|Z9680|J459|E891|B965;874199::201...
2    340424;6400343280;84;0;;M;20191122;01;20191130;07;8;0;J690|F051|F03|R33|G408|E038|F258|R139|J960...
3    340455;6400343130;56;0;;M;20191118;01;20191125;00;7;0;J101|D7011|T8609|Z9480|Y849|C9200;998426::...
4    340641;6400314953;84;0;;M;20190810;01;20190823;00;13;0;M5416|R33|G6288|Z4500|E1191|I1190|I489|Z9...
Name: batchgrouper_format, dtype: object

In [11]:
sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
revisions_update, diagnoses_update, procedures_update = group(revised_cases)
revisions_update[REVISION_DATE_COL] = '2022-12-31'

In [None]:
revisions_update

In [None]:
update_db(revisions_update, diagnoses_update, procedures_update)