# Revised case normalization for Hirslanden Aarau 2018

Notes:

- 17 of 17 revised DtoD cases were joined with the BFS cases from the DB.



In [1]:
import sys

import pandas as pd
import numpy as np

from loguru import logger

sys.path.insert(0, '/home/jovyan/work')

from src.revised_case_normalization.py.global_configs import *
from src.revised_case_normalization.py.normalize import normalize, remove_leading_zeros
from src.service import bfs_cases_db_service as bfs_db
from src.service.bfs_cases_db_service import session, get_sociodemographics_for_hospital_year

In [2]:
file_info = FileInfo(
        os.path.join(ROOT_DIR, 'raw_data/HI_Aarau_Birshof_ST. Anna.xlsx'),
        'Hirslanden Aarau',
        '2018',
        ['Aarau 2018'])

print(file_info)

FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/HI_Aarau_Birshof_ST. Anna.xlsx', hospital_name_db='Hirslanden Aarau', year='2018', sheets=['Aarau 2018'])


In [3]:
df_revised_case_d2d = normalize(file_info, 0)

2022-10-20 09:07:47.475 | INFO     | src.revised_case_normalization.py.normalize:normalize:35 - Read 17 cases for Hirslanden Aarau 2018
2022-10-20 09:07:47.480 | INFO     | src.revised_case_normalization.py.normalize:normalize:57 - TYPES:
tranche                   string
datum                     string
case_id                   object
patient_id                object
kkik                      string
gender                    string
age_years                  int64
bfs_code                  string
duration_of_stay           int64
pflegetage neu            string
pccl                       int64
pccl neu                  string
old_pd                    string
new_pd                    string
added_icds                string
removed_icds              string
added_chops               string
removed_chops             string
drg                       string
drg neu                   string
cw alt                    string
cw neu                    string
cw-änderung möglich       string
cw

In [4]:
df_revised_case_d2d.head()

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,new_pd,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,41623461,1059029,W,77,2,41623461,I7024,I7024,M100,3,F59E,[J4481],[J4483],[],[]
1,41665678,2043896,M,82,17,41665678,I5001,I5001,M100,3,F62C,[R18],[],[],[]
2,41698586,22177655,M,93,2,41698586,I7024,I7024,M100,3,F59E,[I743],[],[],[]
3,41719219,1339311,M,91,4,41719219,I495,I495,M100,3,F71B,[N182],[],[],[]
4,41777951,22177655,M,93,2,41777951,I7024,I7024,M100,0,F59E,"[N184, T828]",[],[],[]


# Match revised DtoD data with the BfS data from the database

In [5]:
cases_in_db = get_sociodemographics_for_hospital_year(file_info.hospital_name_db, int(file_info.year))
cases_in_db[NORM_CASE_ID_COL] = cases_in_db[CASE_ID_COL].apply(remove_leading_zeros)
print(cases_in_db.head())

2022-10-20 09:07:49.346 | INFO     | __main__:<module>:7 - Read 10112 rows from the DB, for the hospital 'Hirslanden Aarau' in 2018


   aimedic_id     case_id        patient_id gender  age_years  \
0      115452  0041415230  A0E36E42C141D541      M         29   
1      115453  0041453333  10CA4E4A4D6A6648      W         34   
2      115454  0041473239  E7A902368D82844A      M         63   
3      115455  0041506268  5FCEADE16E4FD6EC      M         44   
4      115456  0041511524  A2B2DB76985BA881      M         43   

   duration_of_stay case_id_norm  
0                 2     41415230  
1                 1     41453333  
2                 4     41473239  
3                 3     41506268  
4                 2     41511524  


In [16]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

cols_to_join.remove(PATIENT_ID_COL)

joined = pd.merge(df_revised_case_d2d, cases_in_db,
                  how='left',
                  on=cols_to_join,
                  suffixes=('', '_db'))

joined = joined[[AIMEDIC_ID, NEW_PRIMARY_DIAGNOSIS_COL, ADDED_ICD_CODES, REMOVED_ICD_CODES, ADDED_CHOP_CODES, REMOVED_CHOP_CODES]]

unmatched = joined[joined[AIMEDIC_ID].isna()]
num_unmatched = unmatched.shape[0]
print(f'{num_unmatched} rows could not be matched, given {sorted(cols_to_join)}')

joined




0 rows could not be matched, given ['age_years', 'case_id_norm', 'duration_of_stay', 'gender']


Unnamed: 0,aimedic_id,new_pd,added_icds,removed_icds,added_chops,removed_chops
0,115875,I7024,[J4481],[J4483],[],[]
1,116851,I5001,[R18],[],[],[]
2,117722,I7024,[I743],[],[],[]
3,118215,I495,[N182],[],[],[]
4,120279,I7024,"[N184, T828]",[],[],[]
5,116472,I5014,[E1191],[E1190],[],[]
6,117999,N390,[B370],[],[],[]
7,120078,N390,[E440],[],[890A32],[]
8,119991,N1781,[J9610],[],[],[]
9,121288,I208,[J9580],[],[],[]
