In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

from tqdm import tqdm

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 00. Init

In [2]:
root_dir = '../'

cfg.init(filename=f"{root_dir}/config.yaml")

input_dir = f"{cfg.config['out_dir']}/out-publish-normalized"

In [3]:
COMBINE_harmonizer.init_redcap(token=cfg.config["redcap_token"], host=cfg.config["redcap_host"])

## 01. Construct REDCap Sheet Column Map

In [4]:
data_dict_filename = f'{root_dir}/Dictionary_HIE_clinical_variables.xlsx'
_REDCAP_MAIN_COLUMN_MAP = COMBINE_harmonizer.build_redcap_column_map(data_dict_filename, COMBINE_harmonizer.SHEET_MAIN)
_REDCAP_FOLLOWUP_COLUMN_MAP = COMBINE_harmonizer.build_redcap_column_map(data_dict_filename, COMBINE_harmonizer.SHEET_FOLLOW_UP)
_REDCAP_ANALYSIS_COLUMN_MAP = COMBINE_harmonizer.build_redcap_column_map(data_dict_filename, COMBINE_harmonizer.SHEET_DERIVED_DATA)

_REDCAP_SHEET_COLUMN_MAP = {
    COMBINE_harmonizer.SHEET_MAIN: _REDCAP_MAIN_COLUMN_MAP,
    COMBINE_harmonizer.SHEET_FOLLOW_UP: _REDCAP_FOLLOWUP_COLUMN_MAP,
    COMBINE_harmonizer.SHEET_DERIVED_DATA: _REDCAP_ANALYSIS_COLUMN_MAP,
}

## 03. Construct Filename Infos

In [5]:
filename_infos = COMBINE_harmonizer.build_redcap_filename_infos(_REDCAP_SHEET_COLUMN_MAP)

In [6]:
len(filename_infos)

70

## 04. Normalize and Put to REDCap

In [7]:
start_idx = 0
end_idx = len(filename_infos)
process_filename_infos = filename_infos[start_idx:end_idx]
for idx, filename_info in enumerate(process_filename_infos):
    print(f"[INFO] ({idx}/{len(process_filename_infos)}) {filename_info['name']}")
    df_records = COMBINE_harmonizer.redcap_normalize_filename_info(filename_info, input_dir)

    all_records = [dict(row) for _, row in df_records.iterrows()]
    for idx2, record in tqdm(enumerate(all_records)):
        COMBINE_harmonizer.put_redcap_data('record', [record])

[INFO] (0/70) 01-02-screening.csv


532it [02:56,  3.02it/s]


[INFO] (1/70) 01-03-maternal-demographics.csv


532it [02:44,  3.24it/s]


[INFO] (2/70) 01-04-pregnancy-history.csv


532it [02:45,  3.21it/s]


[INFO] (3/70) 01-05-labor-delivery.csv


532it [03:00,  2.94it/s]


[INFO] (4/70) 01-06-birth.csv


532it [03:37,  2.44it/s]


[INFO] (5/70) 01-07-pre-temperature.csv


519it [02:49,  3.07it/s]


[INFO] (6/70) 01-08-pre-cardiovascular.csv


524it [02:46,  3.15it/s]


[INFO] (7/70) 01-09-pre-infection.csv


532it [02:41,  3.30it/s]


[INFO] (8/70) 01-10-pre-other-med.csv


492it [02:29,  3.30it/s]


[INFO] (9/70) 01-11-pre-imaging.csv


518it [02:41,  3.20it/s]


[INFO] (10/70) 01-12-neuro-exam.csv


532it [02:52,  3.08it/s]


[INFO] (11/70) 02-01-temperature.csv


24439it [2:42:29,  2.51it/s] 


[INFO] (12/70) 02-02-cardiovascular.csv


13629it [1:14:00,  3.07it/s]


[INFO] (13/70) 02-03-respiratory.csv


2794it [15:46,  2.95it/s]


[INFO] (14/70) 02-04-blood-gas.csv


10768it [59:15,  3.03it/s]


[INFO] (15/70) 02-05-hematology.csv


2742it [15:10,  3.01it/s]


[INFO] (16/70) 02-05_s-hematology.csv


532it [02:43,  3.26it/s]


[INFO] (17/70) 02-06_s-blood-value.csv


532it [03:50,  2.31it/s]


[INFO] (18/70) 02-07-infection.csv


532it [02:47,  3.18it/s]


[INFO] (19/70) 02-08-other-med.csv


2757it [14:51,  3.09it/s]


[INFO] (20/70) 02-09-imaging.csv


525it [02:49,  3.09it/s]


[INFO] (21/70) 02-11-elevated-temperature.csv


445it [02:24,  3.08it/s]


[INFO] (22/70) 02-12-fluctuated-temperature.csv


54it [00:17,  3.02it/s]


[INFO] (23/70) 02-13-bradycardia.csv


210it [01:07,  3.09it/s]


[INFO] (24/70) 02-14-adverse-event.csv


111it [00:41,  2.66it/s]


[INFO] (25/70) 02-15-violation.csv


156it [00:49,  3.18it/s]


[INFO] (26/70) 02-16-interrupt.csv


249it [01:16,  3.24it/s]


[INFO] (27/70) 02-17-discontinue.csv


532it [02:47,  3.18it/s]


[INFO] (28/70) 03-01-post-temperature.csv


2333it [12:31,  3.10it/s]


[INFO] (29/70) 03-01_s-post-temperature.csv


364it [02:01,  2.98it/s]


[INFO] (30/70) 03-02-post-blood-value.csv


364it [01:55,  3.14it/s]


[INFO] (31/70) 03-03-post-imaging.csv


529it [02:52,  3.06it/s]


[INFO] (32/70) 03-04-post-neuro-exam.csv


527it [02:54,  3.02it/s]


[INFO] (33/70) 03-05-mri.csv


915it [06:20,  2.41it/s]


[INFO] (34/70) 03-05_s-mri.csv


477it [02:32,  3.12it/s]


[INFO] (35/70) 04-01-status.csv


532it [02:51,  3.10it/s]


[INFO] (36/70) 04-03-cardiovascular.csv


532it [02:45,  3.21it/s]


[INFO] (37/70) 04-04-respiratory.csv


532it [02:58,  2.97it/s]


[INFO] (38/70) 04-05-hematology.csv


532it [02:45,  3.21it/s]


[INFO] (39/70) 04-06-metabolic.csv


532it [02:50,  3.12it/s]


[INFO] (40/70) 04-07-renal.csv


532it [02:51,  3.10it/s]


[INFO] (41/70) 04-08-gastrointestinal.csv


532it [02:50,  3.12it/s]


[INFO] (42/70) 04-09-skin.csv


532it [02:47,  3.17it/s]


[INFO] (43/70) 04-10-auditory.csv


532it [02:49,  3.14it/s]


[INFO] (44/70) 04-11-surgery.csv


532it [02:44,  3.23it/s]


[INFO] (45/70) 04-12-infection.csv


532it [02:44,  3.23it/s]


[INFO] (46/70) 04-02-neuro-exam.csv


532it [02:59,  2.96it/s]


[INFO] (47/70) 04-13-seizure.csv


532it [03:00,  2.95it/s]


[INFO] (48/70) 04-14-birth-defect.csv


532it [02:49,  3.14it/s]


[INFO] (49/70) 04-15-home-therapy.csv


532it [02:48,  3.15it/s]


[INFO] (50/70) 04-16-wdraw-support.csv


532it [02:46,  3.19it/s]


[INFO] (51/70) 04-17-limit-care.csv


532it [02:45,  3.21it/s]


[INFO] (52/70) 20-00-follow-up.csv


467it [02:24,  3.23it/s]


[INFO] (53/70) 20-01-ses.csv


467it [03:27,  2.25it/s]


[INFO] (54/70) 20-02-medical-history.csv


467it [03:07,  2.49it/s]


[INFO] (55/70) 20-03-medical-exam.csv


467it [03:32,  2.20it/s]


[INFO] (56/70) 20-04-bayley-iii.csv


467it [02:58,  2.62it/s]


[INFO] (57/70) 20-05-gmfcs.csv


467it [02:29,  3.12it/s]


[INFO] (58/70) 20-06-status.csv


467it [02:36,  2.98it/s]


[INFO] (59/70) 20-07-readmission.csv


212it [01:11,  2.98it/s]


[INFO] (60/70) 20-08-lost.csv


467it [02:35,  3.00it/s]


[INFO] (61/70) 30-01-secondary.csv


532it [03:14,  2.73it/s]


[INFO] (62/70) 30-02-outcome.csv


532it [02:59,  2.97it/s]


[INFO] (63/70) 30-03-mri.csv


532it [03:25,  2.59it/s]


[INFO] (64/70) 31-02-total-modified-sarnat.csv


532it [03:27,  2.56it/s]


[INFO] (65/70) 31-03-mri.csv


532it [02:49,  3.15it/s]


[INFO] (66/70) 31-04-pse.csv


532it [02:38,  3.35it/s]


[INFO] (67/70) 31-05-disability-level-death.csv


532it [02:48,  3.16it/s]


[INFO] (68/70) 31-06-emergency-csection.csv


532it [02:40,  3.31it/s]


[INFO] (69/70) 31-07-length-of-stay.csv


532it [02:45,  3.22it/s]
