# Revised case normalization for Winterthur 2017

Notes:

- 216 out of 218 revised cases from DtoD can be matched with the BFS DB Data of Winterthur 2017.

In [1]:
import sys
sys.path.insert(0, '/home/jovyan/work')

from src.revised_case_normalization.notebook_functions.global_configs import *
from src.revised_case_normalization.notebook_functions.normalize import normalize
from src.revised_case_normalization.notebook_functions.revise import revise
from src.revised_case_normalization.notebook_functions.group import group
from src.revised_case_normalization.notebook_functions.update_db import update_db

In [2]:

file_info = FileInfo(
         os.path.join(ROOT_DIR, 'raw_data/Winterthur.xlsx'),
         'KSW',
         '2017',
         'Änderungen _Winterthur_2017')

print(file_info)


FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/Winterthur.xlsx', hospital_name_db='KSW', year='2017', sheet='Änderungen _Winterthur_2017')


In [3]:
revised_cases_df = normalize(file_info)
revised_cases_df

2022-11-14 09:56:19.228 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:35 - Read 218 cases for KSW 2017
2022-11-14 09:56:19.232 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:63 - TYPES:
tranche                   string
datum                     string
fid                       string
case_id                   object
patient_id                object
kkik                      string
gender                    string
age_years                  int64
bfs_code                  string
duration_of_stay           int64
pflegetage neu            string
pccl                       int64
pccl neu                  string
old_pd                    string
primary_diagnosis         string
added_icds                string
removed_icds              string
added_chops               string
removed_chops             string
drg                       string
drg neu                   string
cw alt                    string
cw neu         

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,20066273027,D1B4E3C92520FC01,M,35,4,20066273027,C61,C61,M100,3,M60B,[C787],[],[],[]
1,20093870027,13320B741B5845D4,M,84,17,20093870027,S7210,S7210,M200,3,I46C,[T840],[],[],[]
2,20158223027,BA43D16D9B8B472A,M,75,11,20158223027,C787,C787,M200,3,H01B,[D684],[],[],[]
3,20219240027,7D29F200ABFD59CD,M,87,14,20219240027,S7201,S7201,M100,3,I46C,[B370],[],[],[]
4,20237303027,2BDCB561504F6656,W,83,7,20237303027,S7201,S7201,M200,3,I46C,[N182],[],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,21190583027,4B61EE1D52949A9D,M,59,4,21190583027,K922,K922,M100,2,G46C,[I81],[],[],[]
214,21305870027,25DA94C53B280F17,W,62,4,21305870027,C343,C343,M100,3,E08B,[C771],[],[],[]
215,22510853027,FA8BE33FAB475106,M,77,4,22510853027,B022,B022,M100,3,B72C,[E871],[],[],[]
216,22881800027,0A54ABA2AA2F5F6D,W,64,4,22881800027,C793,C793,M100,3,B66C,[C780],[],[],[]


# Match revised DtoD data with the BfS data from the database

In [4]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)
# Ignore the patient ID in this dataset, because it is encrypted
# cols_to_join.remove(PATIENT_ID_COL)

revised_cases, unmatched = revise(file_info, revised_cases_df, validation_cols=cols_to_join)
revised_cases.head()

2022-11-14 09:56:21.473 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:90 - Read 26985 rows from the DB, for the hospital 'KSW' in 2017


Unnamed: 0,aimedic_id,case_id,primary_diagnosis,secondary_diagnoses,primary_procedure,secondary_procedures,gender,age_years,age_days,gestation_age,duration_of_stay,ventilation_hours,grouper_admission_type,admission_date,admission_weight,grouper_discharge_type,discharge_date
0,88497,21712640027,N390,"[B962, E86, N1791, N183, I1090, ...]",,[],W,90,0,0,1,0,1,2017-04-23,0,0,2017-04-24
1,88511,20197613027,I480,"[I1190, Z954, I7020, Z9588, E1190, ...]",890A42::20170926,[9969::20171002],W,91,0,0,10,0,1,2017-09-25,0,0,2017-10-05
2,88636,21945623027,N40,"[N3941, N184, E871, I1000, K920]",6022::20170802,[5718::20170802],M,76,0,0,3,0,1,2017-08-02,0,0,2017-08-05
3,88652,21395996027,K565,"[Z960, I1090, Z855, R18, A099, ...]",5459::20171221,"[5425::20171221, 9607::20171220, 9915::20171222]",M,71,0,0,11,0,1,2017-12-20,0,0,2017-12-31
4,88658,20418260027,J100,"[N390, B962, R471, E876, E871, ...]",998411::20170123,[990410::20170121],W,85,0,0,6,0,1,2017-01-21,0,0,2017-01-27


In [5]:
if unmatched.shape[0] > 0:
    display(unmatched)

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops,aimedic_id,case_id_db,age_days,admission_weight,gestation_age,admission_date,grouper_admission_type,discharge_date,grouper_discharge_type,ventilation_hours
83,21524636027,AC6005CB8E5BC82C,M,86,13,21524636027,D466,D466,M100,3,R61E,"[I5014, I5001]",[],[],[],,,,,,,,,,
122,22129106027,CE19F1ACF3C71462,M,77,14,22129106027,I5001,I5001,M100,3,F12F,"[I361, E871]",[],[],[],,,,,,,,,,


In [6]:
revisions_update, diagnoses_update, procedures_update = group(revised_cases)
revisions_update[REVISION_DATE_COL] = '2022-12-31'

2022-11-14 09:56:22.159 | INFO     | src.revised_case_normalization.notebook_functions.group:group:83 - Grouping 216 cases ...
2022-11-14 09:56:23.251 | SUCCESS  | src.revised_case_normalization.notebook_functions.group:group:88 - Grouped 216 cases into: 216 revisions, 1966 diagnoses rows, 759 procedure rows


In [7]:
revisions_update

Unnamed: 0,aimedic_id,drg,drg_cost_weight,effective_cost_weight,pccl,revision_date
0,88497,L63D,0.67,0.27,3,2022-12-31
1,88511,F71B,0.74,0.74,3,2022-12-31
2,88636,M02A,1.24,1.24,4,2022-12-31
3,88652,G04B,3.21,3.21,4,2022-12-31
4,88658,E77D,1.18,1.18,4,2022-12-31
...,...,...,...,...,...,...
211,114770,I10A,2.67,2.67,4,2022-12-31
212,114831,X06A,2.96,2.96,4,2022-12-31
213,114935,F75A,1.60,1.60,4,2022-12-31
214,115253,I46A,2.66,2.66,4,2022-12-31


In [8]:
update_db(revisions_update, diagnoses_update, procedures_update)

2022-11-14 09:56:23.267 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_revisions:249 - Trying to insert 216 cases into the 'Revisions' table ...
2022-11-14 09:56:24.527 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_revisions:275 - Deleted 216 rows from the "Revisions" table, which is about to be updated
2022-11-14 09:56:24.858 | SUCCESS  | src.service.bfs_cases_db_service:insert_revised_cases_into_revisions:286 - Inserted 216 cases into the 'Revisions' table
2022-11-14 09:56:24.865 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_diagnoses:298 - Trying to insert 1966 rows into the 'Diagnoses' table ...
2022-11-14 09:56:33.588 | SUCCESS  | src.service.bfs_cases_db_service:insert_revised_cases_into_diagnoses:336 - Inserted 1966 rows into the 'Diagnoses' table
2022-11-14 09:56:33.596 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_procedures:347 - Trying to insert 759 rows into the 'Procedure