# Revised case normalization for Winterthur 2018

Notes:

- no 'Admno' or 'Fall Nummer' to be matched to DB

In [1]:
import sys
sys.path.insert(0, '/home/jovyan/work')

import pandas as pd
from src.revised_case_normalization.notebook_functions.global_configs import *
from src.revised_case_normalization.notebook_functions.normalize import normalize
from src.revised_case_normalization.notebook_functions.revise import revise, get_sociodemographics_for_hospital_year
from src.revised_case_normalization.notebook_functions.group import group
from src.revised_case_normalization.notebook_functions.update_db import update_db

In [2]:
file_info = FileInfo(
         os.path.join(ROOT_DIR, 'raw_data/Winterthur.xlsx'),
         'KSW',
         '2018',
         'Änderungen Winterthur 2018')

print(file_info)



FileInfo(path='/home/jovyan/work/src/revised_case_normalization/raw_data/Winterthur.xlsx', hospital_name_db='KSW', year='2018', sheet='Änderungen Winterthur 2018')


In [3]:
columns_to_rename = dict(COLUMNS_TO_RENAME)
columns_to_rename.pop("admno")
columns_to_rename['fid'] = CASE_ID_COL

revised_cases_df = normalize(file_info, columns_mapper = columns_to_rename)
revised_cases_df.head()

2022-11-16 11:18:52.273 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:35 - Read 193 cases for KSW 2018
2022-11-16 11:18:52.278 | INFO     | src.revised_case_normalization.notebook_functions.normalize:normalize:70 - TYPES:
tranche                                                 string
datum                                                   string
case_id                                                 object
patient_id                                              object
kkik                                                    string
gender                                                  string
age_years                                                int64
bfs_code                                                string
duration_of_stay                                         int64
pflegetage neu                                          string
pccl                                                     int64
pccl neu                                        

Unnamed: 0,case_id,patient_id,gender,age_years,duration_of_stay,case_id_norm,old_pd,primary_diagnosis,bfs_code,pccl,drg,added_icds,removed_icds,added_chops,removed_chops
0,900362200,120A84ED0B5D412F,W,54,8,900362200,C795,C795,M200,3,I10B,[M4957],[],[],[]
1,900449400,2833C0B8F3FAF2B2,W,80,12,900449400,N1793,N1793,M100,3,L60D,[G2090],[],[],[]
2,900404910,220435AE84965999,M,79,3,900404910,S3702,S3702,M200,3,L09B,[S3683],[S3681],[],[]
3,900454320,472E599EDB7090A6,W,66,2,900454320,R410,R410,M100,3,B64B,[L8918],[L8908],[],[]
4,900475230,B127DAEB25A7546F,W,58,9,900475230,C56,C56,M200,3,N60B,"[I1091, K658, Y849]",[I1090],[],[]


In [4]:
# Replace case_id with mapped case_ids from DtoD

case_id_mapped = pd.read_excel(os.path.join(ROOT_DIR, 'case_id_mappings/case_id_mapping_KSW_2018.xlsx')).astype(str)

In [5]:
# merge case_id mappings with revised cases and replace values in 'case_id_norm' column with mapped case ids

revised_cases_df = pd.merge(revised_cases_df, case_id_mapped, on ="case_id", how = "left")
revised_cases_df = revised_cases_df.drop('case_id_norm', axis=1)
revised_cases_df.rename(columns = {'case_id_mapped':'case_id_norm'}, inplace=True)

# Match revised DtoD data with the BfS data from the database

In [6]:
cols_to_join = list(VALIDATION_COLS)
cols_to_join.remove(CASE_ID_COL)
cols_to_join.append(NORM_CASE_ID_COL)

# Ignore the patient ID in this dataset, because it is encrypted
cols_to_join.remove(PATIENT_ID_COL)

revised_cases, unmatched = revise(file_info, revised_cases_df, validation_cols=cols_to_join)

2022-11-16 11:18:53.913 | INFO     | src.service.bfs_cases_db_service:get_sociodemographics_for_hospital_year:90 - Read 27501 rows from the DB, for the hospital 'KSW' in 2018


In [7]:
if unmatched.shape[0] > 0:
    display(unmatched)

In [8]:
revisions_update, diagnoses_update, procedures_update = group(revised_cases)
revisions_update[REVISION_DATE_COL] = '2022-12-31'

2022-11-16 11:18:54.379 | INFO     | src.revised_case_normalization.notebook_functions.group:group:83 - Grouping 193 cases ...
2022-11-16 11:18:55.461 | SUCCESS  | src.revised_case_normalization.notebook_functions.group:group:88 - Grouped 193 cases into: 193 revisions, 1785 diagnoses rows, 631 procedure rows


In [9]:
revisions_update

Unnamed: 0,aimedic_id,drg,drg_cost_weight,effective_cost_weight,pccl,revision_date
0,198354,H61A,1.35,1.35,4,2022-12-31
1,198498,O01D,1.22,1.22,3,2022-12-31
2,198565,E77C,1.13,1.13,4,2022-12-31
3,198592,E69B,0.60,0.60,3,2022-12-31
4,198897,L18A,1.70,1.70,4,2022-12-31
...,...,...,...,...,...,...
188,225152,E71A,1.37,1.37,4,2022-12-31
189,225169,I02B,5.21,5.21,4,2022-12-31
190,225311,G72A,0.95,0.95,4,2022-12-31
191,225633,L63D,0.65,0.65,3,2022-12-31


In [10]:
update_db(revisions_update, diagnoses_update, procedures_update)

2022-11-16 11:18:55.481 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_revisions:249 - Trying to insert 193 cases into the 'Revisions' table ...
2022-11-16 11:18:56.020 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_revisions:275 - Deleted 193 rows from the "Revisions" table, which is about to be updated
2022-11-16 11:18:56.106 | SUCCESS  | src.service.bfs_cases_db_service:insert_revised_cases_into_revisions:286 - Inserted 193 cases into the 'Revisions' table
2022-11-16 11:18:56.109 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_diagnoses:298 - Trying to insert 1785 rows into the 'Diagnoses' table ...
2022-11-16 11:19:00.300 | SUCCESS  | src.service.bfs_cases_db_service:insert_revised_cases_into_diagnoses:336 - Inserted 1785 rows into the 'Diagnoses' table
2022-11-16 11:19:00.304 | INFO     | src.service.bfs_cases_db_service:insert_revised_cases_into_procedures:347 - Trying to insert 631 rows into the 'Procedure