# Revised case normalization for USZ 2019

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys
sys.path.insert(0, '/home/jovyan/work')
from loguru import logger
import pandas as pd
from src.service.aimedic_grouper import group_batch_group_cases

from src.revised_case_normalization.notebook_functions.global_configs import *
from src.revised_case_normalization.notebook_functions.normalize import normalize
from src.revised_case_normalization.notebook_functions.revise import revise
from src.revised_case_normalization.notebook_functions.group import group
from src.revised_case_normalization.notebook_functions.update_db import update_db

In [2]:
file_info = FileInfo(os.path.join(ROOT_DIR, 'raw_data/USZ_2018-2019_20200730.xlsx'),
                     'USZ', '2019', 'Gesamtauffällige_USZ_2019')

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/USZ_2018-2019_20200730.xlsx', hospital_name_db='USZ', year='2019', sheet='Gesamtauffällige_USZ_2019')


In [3]:
revised_cases_df = normalize(file_info)
revised_cases_df

2022-11-09 15:58:00.948 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:35 - Read 150 cases for USZ 2019
2022-11-09 15:58:00.952 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:59 - TYPES:
datum/ lieferung     string
case_id              object
patient_id           object
kkik                 string
gender               string
age_years             int64
bfs_code             string
duration_of_stay      int64
pflegetage neu       string
pccl                  int64
pccl neu             string
old_pd               string
primary_diagnosis    string
added_icds           string
removed_icds         string
added_chops          string
removed_chops        string
drg                  string
drg neu              string
cw alt               string
cw neu               string
cw-änderung          string
kommentar            string
case_id_norm         object
dtype: object
2022-11-09 15:58:00.962 | INFO     | src.utils.datafram

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,6400282213,3ED8F10117FCF5F5,M,54,13,6400282213,J4400,J4400,M200,3,E06A,[F058],[],[],[]
1,6400287076,55C5A17F71DADEA0,M,62,5,6400287076,J068,J068,M800,3,D62C,"[I8028, C770, C787]",[],[],[]
2,6400306817,0150AC67E3196B63,W,74,36,6400306817,R11,M8088,M100,3,G72C,[R64],[],[],[]
3,6400316927,153929A139272585,W,79,6,6400316927,G3531,G3531,M100,3,B68C,[R471],[],[],[]
4,6400278895,014A89BB816D9215,W,71,44,6400278895,T827,T827,M200,3,F75D,[D508],[D648],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,6400305282,B2EFB1C7F864D6BE,M,54,4,6400305282,I1001,R522,M100,2,F67B,[],[],[],[]
146,6400306506,2BD4038A71FFAFCE,M,51,7,6400306506,C61,A410,M200,2,M60B,"[R651, C795, C774, N184]",[],[],[]
147,6400307264,45BD6CAFB4B36938,W,84,14,6400307264,L8924,L8924,M200,2,J02B,[B965],[],[],[]
148,6400308115,CC100ED2DB478B7E,M,46,8,6400308115,K830,K830,M100,2,H64B,[K831],[],[5185::20190610],[]


# Match revised DtoD data with the BfS data from the database

In [4]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)
# Ignore the patient ID in this dataset, because it is encrypted
cols_to_join.remove(PATIENT_ID_COL)

revised_cases, unmatched = revise(file_info, revised_cases_df, validation_cols=cols_to_join)
revised_cases.head()

2022-11-09 15:58:02.202 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:90 - Read 14430 rows from the DB, for the hospital 'USZ' in 2019


Unnamed: 0,aimedic_id,case_id,primary_diagnosis,secondary_diagnoses,primary_procedure,secondary_procedures,gender,age_years,age_days,gestation_age,duration_of_stay,ventilation_hours,grouper_admission_type,admission_date,admission_weight,grouper_discharge_type,discharge_date
0,340174,6400339053,K768,"[E854, D62, E875, D638, N1783, ...]",502311::20191115,"[99B812::20191116, 99C121::20191125, 990410::20191115, 8847::20191114, 880110::20191114, ...]",W,76,0,0,13,0,6,2019-11-14,0,0,2019-11-27
1,340178,6400340009,J702,"[C342, Z9680, J459, E891, B965]",874199::20191225,[998425::20191227],M,77,0,0,5,0,1,2019-12-24,0,0,2019-12-29
2,340424,6400343280,J690,"[F051, F03, R33, G408, E038, ...]",99C124::20191129,[9915::20191129],M,84,0,0,8,0,1,2019-11-22,0,7,2019-11-30
3,340455,6400343130,J101,"[D7011, T8609, Z9480, Y849, C9200]",998426::20191119,[],M,56,0,0,7,0,1,2019-11-18,0,0,2019-11-25
4,340641,6400314953,M5416,"[R33, G6288, Z4500, E1191, I1190, ...]",99B810::20190810,"[99C122::20190822, 8703::20190810, 883850::20190810, 883840::20190812, 891520::20190813, ...]",M,84,0,0,13,0,1,2019-08-10,0,0,2019-08-23


In [5]:
if unmatched.shape[0] > 0:
    unmatched

In [6]:
revisions_update, diagnoses_update, procedures_update = group(revised_cases)
revisions_update[REVISION_DATE_COL] = '2022-12-31'

2022-11-09 15:58:02.595 | INFO     | src.revised_case_normalization.notebook_functions.group:group:78 - Grouping 72 cases ...


KeyError: "Key 'procedures' not found. If specifying a record_path, all elements of data should have the path."

In [None]:
revisions_update

In [None]:
update_db(revisions_update, diagnoses_update, procedures_update)