# Revised case normalization for Winterthur 2018

Notes:

- no 'Admno' or 'Fall Nummer' to be matched to DB

In [1]:
import sys
sys.path.insert(0, '/home/jovyan/work')

import pandas as pd
from src.revised_case_normalization.notebook_functions.global_configs import *
from src.revised_case_normalization.notebook_functions.normalize import normalize
from src.revised_case_normalization.notebook_functions.revise import revise, get_sociodemographics_for_hospital_year
from src.revised_case_normalization.notebook_functions.group import group
from src.revised_case_normalization.notebook_functions.update_db import update_db

In [2]:
file_info = FileInfo(
         os.path.join(ROOT_DIR, 'raw_data/Winterthur.xlsx'),
         'Kantonsspital Winterthur',
         '2018',
         'Änderungen Winterthur 2018')

print(file_info)



FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/Winterthur.xlsx', hospital_name_db='Kantonsspital Winterthur', year='2018', sheet='Änderungen Winterthur 2018')


In [3]:
columns_to_rename = dict(COLUMNS_TO_RENAME)
columns_to_rename.pop("admno")
columns_to_rename['fid'] = CASE_ID_COL

revised_cases_df = normalize(file_info, columns_mapper = columns_to_rename)
revised_cases_df.head()

2022-11-21 09:33:41.852 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:38 - Read 193 cases for Kantonsspital Winterthur 2018
2022-11-21 09:33:41.856 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:73 - TYPES:
tranche                                                 string
datum                                                   string
case_id                                                 object
patient_id                                              object
kkik                                                    string
gender                                                  string
age_years                                                int64
bfs_code                                                string
duration_of_stay                                         int64
pflegetage neu                                          string
pccl                                                     int64
pccl neu                   

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,900362200,120A84ED0B5D412F,W,54,8,900362200,C795,C795,M200,3,I10B,[M4957],[],[],[]
1,900449400,2833C0B8F3FAF2B2,W,80,12,900449400,N1793,N1793,M100,3,L60D,[G2090],[],[],[]
2,900404910,220435AE84965999,M,79,3,900404910,S3702,S3702,M200,3,L09B,[S3683],[S3681],[],[]
3,900454320,472E599EDB7090A6,W,66,2,900454320,R410,R410,M100,3,B64B,[L8918],[L8908],[],[]
4,900475230,B127DAEB25A7546F,W,58,9,900475230,C56,C56,M200,3,N60B,"[I1091, K658, Y849]",[I1090],[],[]


In [4]:
# Replace case_id with mapped case_ids from DtoD

case_id_mapped = pd.read_excel(os.path.join(ROOT_DIR, 'case_id_mappings/case_id_mapping_KSW_2018.xlsx')).astype(str)

In [5]:
# merge case_id mappings with revised cases and replace values in 'case_id_norm' column with mapped case ids

revised_cases_df = pd.merge(revised_cases_df, case_id_mapped, on ="case_id", how = "left")
revised_cases_df = revised_cases_df.drop('case_id_norm', axis=1)
revised_cases_df.rename(columns = {'case_id_mapped':'case_id_norm'}, inplace=True)

# Match revised DtoD data with the BfS data from the database

In [6]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

# Ignore the patient ID in this dataset, because it is encrypted
cols_to_join.remove(PATIENT_ID_COL)

revised_cases, unmatched = revise(file_info, revised_cases_df, validation_cols=cols_to_join)

2022-11-21 09:33:48.217 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:95 - Read 27458 rows from the DB, for the hospital 'Kantonsspital Winterthur' in 2018


In [7]:
if unmatched.shape[0] > 0:
    display(unmatched)

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops,case_id_norm,aimedic_id,case_id_db,patient_id_db,age_days,admission_weight,gestation_age,admission_date,grouper_admission_type,discharge_date,grouper_discharge_type,ventilation_hours
66,900481157,801704,M,2,6,J208,J208,M400,3,E69C,[Q623],[],[],[],22253552027,,,,,,,,,,,


In [8]:
revisions_update, diagnoses_update, procedures_update = group(revised_cases)
revisions_update[REVISION_DATE_COL] = '2022-12-31'

2022-11-21 09:33:48.799 | INFO     | src.revised_case_normalization.notebook_functions.group:group:86 - Grouping 192 cases ...
2022-11-21 09:33:50.052 | SUCCESS  | src.revised_case_normalization.notebook_functions.group:group:91 - Grouped 192 cases into: 192 revisions, 1781 diagnoses rows, 631 procedure rows


In [9]:
revisions_update

Unnamed: 0,aimedic_id,drg,drg_cost_weight,effective_cost_weight,pccl,revision_date
0,756721,H61A,1.35,1.35,4,2022-12-31
1,756865,O01D,1.22,1.22,3,2022-12-31
2,756932,E77C,1.13,1.13,4,2022-12-31
3,756959,E69B,0.60,0.60,3,2022-12-31
4,757264,L18A,1.70,1.70,4,2022-12-31
...,...,...,...,...,...,...
187,783476,E71A,1.37,1.37,4,2022-12-31
188,783493,I02B,5.21,5.21,4,2022-12-31
189,783635,G72A,0.95,0.95,4,2022-12-31
190,783957,L63D,0.65,0.65,3,2022-12-31


In [10]:
update_db(revisions_update, diagnoses_update, procedures_update)

2022-11-21 09:33:50.070 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_revisions:259 - Trying to insert 192 cases into the 'Revisions' table ...
2022-11-21 09:33:51.612 | SUCCESS  | src.service.bfs_cases_db_service:insert_revised_cases_into_revisions:296 - Inserted 192 cases into the 'Revisions' table
2022-11-21 09:33:51.615 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_diagnoses:309 - Trying to insert 1781 rows into the 'Diagnoses' table ...
2022-11-21 09:34:02.498 | SUCCESS  | src.service.bfs_cases_db_service:insert_revised_cases_into_diagnoses:347 - Inserted 1781 rows into the 'Diagnoses' table
2022-11-21 09:34:02.502 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_procedures:358 - Trying to insert 631 rows into the 'Procedures' table ...
2022-11-21 09:34:04.235 | SUCCESS  | src.service.bfs_cases_db_service:insert_revised_cases_into_procedures:404 - Inserted 631 rows into the 'Procedures' table
