In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

from tqdm import tqdm

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 00. Init

In [2]:
root_dir = '../'

cfg.init(filename=f"{root_dir}/config.yaml")

input_dir = f"{cfg.config['out_dir']}/out-publish-normalized"

In [3]:
COMBINE_harmonizer.init_redcap(token=cfg.config["redcap_token"], host=cfg.config["redcap_host"])

## 01. Construct REDCap Sheet Column Map

In [4]:
data_dict_filename = f'{root_dir}/Dictionary_HIE_clinical_variables.xlsx'
_REDCAP_MAIN_COLUMN_MAP = COMBINE_harmonizer.build_redcap_column_map(data_dict_filename, COMBINE_harmonizer.SHEET_MAIN)
_REDCAP_FOLLOWUP_COLUMN_MAP = COMBINE_harmonizer.build_redcap_column_map(data_dict_filename, COMBINE_harmonizer.SHEET_FOLLOW_UP)
_REDCAP_ANALYSIS_COLUMN_MAP = COMBINE_harmonizer.build_redcap_column_map(data_dict_filename, COMBINE_harmonizer.SHEET_DERIVED_DATA)

_REDCAP_SHEET_COLUMN_MAP = {
    COMBINE_harmonizer.SHEET_MAIN: _REDCAP_MAIN_COLUMN_MAP,
    COMBINE_harmonizer.SHEET_FOLLOW_UP: _REDCAP_FOLLOWUP_COLUMN_MAP,
    COMBINE_harmonizer.SHEET_DERIVED_DATA: _REDCAP_ANALYSIS_COLUMN_MAP,
}

## 03. Construct Filename Infos

In [5]:
filename_infos = COMBINE_harmonizer.build_redcap_filename_infos(_REDCAP_SHEET_COLUMN_MAP)

In [6]:
len(filename_infos)

70

## 04. Normalize and Put to REDCap

In [7]:
start_idx = 14
end_idx = 70
process_filename_infos = filename_infos[start_idx:end_idx]
for idx, filename_info in enumerate(process_filename_infos):
    print(f"[INFO] ({idx}/{len(process_filename_infos)}) {filename_info['name']}")
    df_records = COMBINE_harmonizer.redcap_normalize_filename_info(filename_info, input_dir)

    all_records = [dict(row) for idx, row in df_records.iterrows()]
    for record in tqdm(all_records):
        COMBINE_harmonizer.put_redcap_data('record', [record])

[INFO] (0/56) 02-04-blood-gas.csv


100%|██████████| 10768/10768 [53:11<00:00,  3.37it/s]


[INFO] (1/56) 02-05-hematology.csv


100%|██████████| 2742/2742 [13:35<00:00,  3.36it/s]


[INFO] (2/56) 02-05_s-hematology.csv


100%|██████████| 532/532 [02:32<00:00,  3.49it/s]


[INFO] (3/56) 02-06_s-blood-value.csv


100%|██████████| 532/532 [02:32<00:00,  3.49it/s]


[INFO] (4/56) 02-07-infection.csv


100%|██████████| 532/532 [02:45<00:00,  3.22it/s]


[INFO] (5/56) 02-08-other-med.csv


100%|██████████| 2757/2757 [13:36<00:00,  3.38it/s]


[INFO] (6/56) 02-09-imaging.csv


100%|██████████| 525/525 [02:46<00:00,  3.15it/s]


[INFO] (7/56) 02-11-elevated-temperature.csv


100%|██████████| 445/445 [02:08<00:00,  3.46it/s]


[INFO] (8/56) 02-12-fluctuated-temperature.csv


100%|██████████| 54/54 [00:15<00:00,  3.53it/s]


[INFO] (9/56) 02-13-bradycardia.csv


100%|██████████| 210/210 [01:02<00:00,  3.36it/s]


[INFO] (10/56) 02-14-adverse-event.csv


100%|██████████| 111/111 [00:36<00:00,  3.07it/s]


[INFO] (11/56) 02-15-violation.csv


100%|██████████| 156/156 [00:47<00:00,  3.27it/s]


[INFO] (12/56) 02-16-interrupt.csv


100%|██████████| 249/249 [01:14<00:00,  3.32it/s]


[INFO] (13/56) 02-17-discontinue.csv


100%|██████████| 532/532 [02:41<00:00,  3.30it/s]


[INFO] (14/56) 03-01-post-temperature.csv


100%|██████████| 2333/2333 [11:55<00:00,  3.26it/s]


[INFO] (15/56) 03-01_s-post-temperature.csv


100%|██████████| 364/364 [01:43<00:00,  3.53it/s]


[INFO] (16/56) 03-02-post-blood-value.csv


100%|██████████| 364/364 [01:49<00:00,  3.32it/s]


[INFO] (17/56) 03-03-post-imaging.csv


100%|██████████| 529/529 [02:47<00:00,  3.15it/s]


[INFO] (18/56) 03-04-post-neuro-exam.csv


100%|██████████| 527/527 [02:55<00:00,  3.00it/s]


[INFO] (19/56) 03-05-mri.csv


100%|██████████| 915/915 [06:03<00:00,  2.52it/s]


[INFO] (20/56) 03-05_s-mri.csv


100%|██████████| 477/477 [02:21<00:00,  3.37it/s]


[INFO] (21/56) 04-01-status.csv


100%|██████████| 532/532 [02:47<00:00,  3.18it/s]


[INFO] (22/56) 04-03-cardiovascular.csv


100%|██████████| 532/532 [02:46<00:00,  3.19it/s]


[INFO] (23/56) 04-04-respiratory.csv


100%|██████████| 532/532 [02:49<00:00,  3.14it/s]


[INFO] (24/56) 04-05-hematology.csv


100%|██████████| 532/532 [02:38<00:00,  3.36it/s]


[INFO] (25/56) 04-06-metabolic.csv


100%|██████████| 532/532 [02:36<00:00,  3.40it/s]


[INFO] (26/56) 04-07-renal.csv


100%|██████████| 532/532 [02:38<00:00,  3.36it/s]


[INFO] (27/56) 04-08-gastrointestinal.csv


100%|██████████| 532/532 [02:50<00:00,  3.13it/s]


[INFO] (28/56) 04-09-skin.csv


100%|██████████| 532/532 [02:37<00:00,  3.37it/s]


[INFO] (29/56) 04-10-auditory.csv


100%|██████████| 532/532 [02:35<00:00,  3.42it/s]


[INFO] (30/56) 04-11-surgery.csv


100%|██████████| 532/532 [02:36<00:00,  3.40it/s]


[INFO] (31/56) 04-12-infection.csv


100%|██████████| 532/532 [02:37<00:00,  3.38it/s]


[INFO] (32/56) 04-02-neuro-exam.csv


100%|██████████| 532/532 [02:48<00:00,  3.16it/s]


[INFO] (33/56) 04-13-seizure.csv


100%|██████████| 532/532 [02:41<00:00,  3.30it/s]


[INFO] (34/56) 04-14-birth-defect.csv


100%|██████████| 532/532 [02:37<00:00,  3.38it/s]


[INFO] (35/56) 04-15-home-therapy.csv


100%|██████████| 532/532 [02:40<00:00,  3.32it/s]


[INFO] (36/56) 04-16-wdraw-support.csv


100%|██████████| 532/532 [02:41<00:00,  3.29it/s]


[INFO] (37/56) 04-17-limit-care.csv


100%|██████████| 532/532 [02:38<00:00,  3.36it/s]


[INFO] (38/56) 20-00-follow-up.csv


100%|██████████| 467/467 [02:16<00:00,  3.43it/s]


[INFO] (39/56) 20-01-ses.csv


100%|██████████| 467/467 [03:16<00:00,  2.38it/s]


[INFO] (40/56) 20-02-medical-history.csv


100%|██████████| 467/467 [02:48<00:00,  2.77it/s]


[INFO] (41/56) 20-03-medical-exam.csv


100%|██████████| 467/467 [03:20<00:00,  2.33it/s]


[INFO] (42/56) 20-04-bayley-iii.csv


100%|██████████| 467/467 [02:44<00:00,  2.85it/s]


[INFO] (43/56) 20-05-gmfcs.csv


100%|██████████| 467/467 [02:19<00:00,  3.35it/s]


[INFO] (44/56) 20-06-status.csv


100%|██████████| 467/467 [02:21<00:00,  3.29it/s]


[INFO] (45/56) 20-07-readmission.csv


100%|██████████| 212/212 [01:06<00:00,  3.19it/s]


[INFO] (46/56) 20-08-lost.csv


100%|██████████| 467/467 [02:24<00:00,  3.22it/s]


[INFO] (47/56) 30-01-secondary.csv


100%|██████████| 532/532 [03:01<00:00,  2.93it/s]


[INFO] (48/56) 30-02-outcome.csv


100%|██████████| 532/532 [02:46<00:00,  3.20it/s]


[INFO] (49/56) 30-03-mri.csv


100%|██████████| 532/532 [03:14<00:00,  2.73it/s]


[INFO] (50/56) 31-02-total-modified-sarnat.csv


100%|██████████| 532/532 [03:19<00:00,  2.67it/s]


[INFO] (51/56) 31-03-mri.csv


100%|██████████| 532/532 [02:41<00:00,  3.29it/s]


[INFO] (52/56) 31-04-pse.csv


100%|██████████| 532/532 [02:36<00:00,  3.39it/s]


[INFO] (53/56) 31-05-disability-level-death.csv


100%|██████████| 532/532 [02:38<00:00,  3.36it/s]


[INFO] (54/56) 31-06-emergency-csection.csv


100%|██████████| 532/532 [02:34<00:00,  3.45it/s]


[INFO] (55/56) 31-07-length-of-stay.csv


100%|██████████| 532/532 [02:37<00:00,  3.39it/s]
