In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import os
import re
import pydoc

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 00-0. Variables

In [2]:
study_name = COMBINE_harmonizer.STUDY_OC
sheet_name = COMBINE_harmonizer.SHEET_MAIN

root_dir = '..'
COMBINE_harmonizer.init(f'{root_dir}/config.yaml')

In [3]:
input_dir = f"{cfg.config['out_dir']}/out-{study_name}"
data_dict_filename = f"{root_dir}/{COMBINE_harmonizer.DATA_DICTIONARY_EXCEL}"
out_dir = f"{cfg.config['out_dir']}/out-{study_name}-normalized"

os.makedirs(out_dir, exist_ok=True)

In [4]:
COMBINE_harmonizer.init_mapping(data_dict_filename, study_name)
_VALUE_MAP = COMBINE_harmonizer.build_value_map(data_dict_filename, sheet_name)

[INFO] init_mapping (0/125): signOfHIETone
[INFO] init_mapping (1/125): signOfHIELvlOfCons
[INFO] init_mapping (2/125): signOfHIEPosture
[INFO] init_mapping (3/125): signOfHIEMoro
[INFO] init_mapping (4/125): signOfHIESuck
[INFO] init_mapping (5/125): signOfHIERespiratory
[INFO] init_mapping (6/125): signOfHIEHeartRate
[INFO] init_mapping (7/125): signOfHIEPupils
[INFO] init_mapping (8/125): signOfHIESpontaneousActivity
[INFO] init_mapping (9/125): noNeuroExamReason
[INFO] init_mapping (10/125): consentStatus
[INFO] init_mapping (11/125): treatmentAssign
[INFO] init_mapping (12/125): targetTreatmentTemperature
[INFO] init_mapping (13/125): blanketType
[INFO] init_mapping (14/125): encephalopathyLevel
[INFO] init_mapping (15/125): infantAge
[INFO] init_mapping (16/125): infantSex
[INFO] init_mapping (17/125): ethnicity
[INFO] init_mapping (18/125): education
[INFO] init_mapping (19/125): insurance
[INFO] init_mapping (20/125): race
[INFO] init_mapping (21/125): maritalStatus
[INFO] init

build_value_map: (0/1108) variable: center type: center
build_value_map: (1/1108) variable: subjectID type: text
build_value_map: (2/1108) variable: siteID type: text
build_value_map: (3/1108) variable: birthDate type: date
build_value_map: (4/1108) variable: birthNumber type: int
build_value_map: (5/1108) variable: screenComment type: text
build_value_map: (6/1108) variable: coreTempLess32p5CGreaterEq2Hr_e type: bool
build_value_map: (7/1108) variable: coreTempLess33p5CGreater1Hr_e type: bool
build_value_map: (8/1108) variable: coreTempLess34CGreater1Hr_e type: bool
build_value_map: (9/1108) variable: first6HrCoolByClinicalProtocol_e type: bool
build_value_map: (10/1108) variable: chromosomalAbnormality_e type: bool
build_value_map: (11/1108) variable: majorCongenitalAnomaly_e type: bool
build_value_map: (12/1108) variable: birthWeightLessEq1800g_e type: bool
build_value_map: (13/1108) variable: infantUnlikelySurvive_e type: bool
build_value_map: (14/1108) variable: first60MinAllBlood

In [5]:
_ORDER_MAP = COMBINE_harmonizer.build_variable_order_map(data_dict_filename, sheet_name)

## 00-screening

In [6]:
base_filename = '00-02-screening.csv'

In [7]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/37) normalize_value: column: center
(1/37) normalize_value: column: subjectID
(2/37) normalize_value: column: uniqueID
(3/37) normalize_value: column: siteID
(4/37) normalize_value: column: screenComment
(5/37) normalize_value: column: birthDate
(6/37) normalize_value: column: birthNumber
(7/37) normalize_value: column: coreTempLess33p5CGreater1Hr_e
(8/37) normalize_value: column: first6HrCoolByClinicalProtocol_e
(9/37) normalize_value: column: chromosomalAbnormality_e
(10/37) normalize_value: column: majorCongenitalAnomaly_e
(11/37) normalize_value: column: birthWeightLessEq1800g_e
(12/37) normalize_value: column: infantUnlikelySurvive_e
(13/37) normalize_value: column: first60MinAllBloodGasPHGreater7p15BaseDeficitLess10mEqPerL_e
(14/37) normalize_value: column: first60MinAnyBloodGasPHLessEq7_i
(15/37) normalize_value: column: first60MinAnyBloodGasBaseDeficitGreaterEq16mEqPerL_i
[WARN] unable to bool: val: (**/<class 'str'>)
[WARN] unable to bool: val: (**/<class 'str'>)
(16/37) no

In [8]:
COMBINE_harmonizer.column_info(df)

(0/74) center: (1261/0)
(1/74) center.orig: (1261/0)
(2/74) subjectID: (1261/0)
(3/74) subjectID.orig: (1261/0)
(4/74) uniqueID: (1261/0)
(5/74) uniqueID.orig: (1261/0)
(6/74) siteID: (1261/0)
(7/74) siteID.orig: (1261/0)
(8/74) birthNumber: (1261/0)
(9/74) birthNumber.orig: (1261/0)
(10/74) screenComment: (1261/0)
(11/74) screenComment.orig: (204/1057)
(12/74) coreTempLess32p5CGreaterEq2Hr_e: (1261/0)
(13/74) coreTempLess32p5CGreaterEq2Hr_e.orig: (413/848)
(14/74) coreTempLess33p5CGreater1Hr_e: (1261/0)
(15/74) coreTempLess33p5CGreater1Hr_e.orig: (848/413)
(16/74) first6HrCoolByClinicalProtocol_e: (1261/0)
(17/74) first6HrCoolByClinicalProtocol_e.orig: (1261/0)
(18/74) chromosomalAbnormality_e: (1261/0)
(19/74) chromosomalAbnormality_e.orig: (1261/0)
(20/74) majorCongenitalAnomaly_e: (1261/0)
(21/74) majorCongenitalAnomaly_e.orig: (1261/0)
(22/74) birthWeightLessEq1800g_e: (1261/0)
(23/74) birthWeightLessEq1800g_e.orig: (1261/0)
(24/74) infantUnlikelySurvive_e: (1261/0)
(25/74) infant

## 01-main-screening

In [9]:
base_filename = '01-02-screening.csv'

In [10]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/37) normalize_value: column: center
(1/37) normalize_value: column: subjectID
(2/37) normalize_value: column: uniqueID
(3/37) normalize_value: column: siteID
(4/37) normalize_value: column: screenComment
(5/37) normalize_value: column: birthDate
(6/37) normalize_value: column: birthNumber
(7/37) normalize_value: column: coreTempLess33p5CGreater1Hr_e
(8/37) normalize_value: column: first6HrCoolByClinicalProtocol_e
(9/37) normalize_value: column: chromosomalAbnormality_e
(10/37) normalize_value: column: majorCongenitalAnomaly_e
(11/37) normalize_value: column: birthWeightLessEq1800g_e
(12/37) normalize_value: column: infantUnlikelySurvive_e
(13/37) normalize_value: column: first60MinAllBloodGasPHGreater7p15BaseDeficitLess10mEqPerL_e
(14/37) normalize_value: column: first60MinAnyBloodGasPHLessEq7_i
(15/37) normalize_value: column: first60MinAnyBloodGasBaseDeficitGreaterEq16mEqPerL_i
(16/37) normalize_value: column: historyPerinatalEvent_i
(17/37) normalize_value: column: at10MinApgarLe

In [11]:
COMBINE_harmonizer.column_info(df)

(0/74) center: (364/0)
(1/74) center.orig: (364/0)
(2/74) subjectID: (364/0)
(3/74) subjectID.orig: (364/0)
(4/74) uniqueID: (364/0)
(5/74) uniqueID.orig: (364/0)
(6/74) siteID: (364/0)
(7/74) siteID.orig: (364/0)
(8/74) birthNumber: (364/0)
(9/74) birthNumber.orig: (364/0)
(10/74) screenComment: (364/0)
(11/74) screenComment.orig: (48/316)
(12/74) coreTempLess32p5CGreaterEq2Hr_e: (364/0)
(13/74) coreTempLess32p5CGreaterEq2Hr_e.orig: (95/269)
(14/74) coreTempLess33p5CGreater1Hr_e: (364/0)
(15/74) coreTempLess33p5CGreater1Hr_e.orig: (269/95)
(16/74) first6HrCoolByClinicalProtocol_e: (364/0)
(17/74) first6HrCoolByClinicalProtocol_e.orig: (364/0)
(18/74) chromosomalAbnormality_e: (364/0)
(19/74) chromosomalAbnormality_e.orig: (364/0)
(20/74) majorCongenitalAnomaly_e: (364/0)
(21/74) majorCongenitalAnomaly_e.orig: (364/0)
(22/74) birthWeightLessEq1800g_e: (364/0)
(23/74) birthWeightLessEq1800g_e.orig: (364/0)
(24/74) infantUnlikelySurvive_e: (364/0)
(25/74) infantUnlikelySurvive_e.orig: (3

## 01-12-neuro-exam

In [12]:
base_filename = '00-12-neuro-exam.csv'

In [13]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/19) normalize_value: column: center
(1/19) normalize_value: column: subjectID
(2/19) normalize_value: column: uniqueID
(3/19) normalize_value: column: pre_NeuroExamSeizure
(4/19) normalize_value: column: pre_NeuroExam
(5/19) normalize_value: column: pre_NeuroExamTone
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIETone value: ** value_float: ** value_int: **
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIETone value: ** value_float: ** value_int: **
(6/19) normalize_value: column: pre_NeuroExamRespiration
(7/19) normalize_value: column: pre_NeuroExamSignModerateSevereHIE3Category
(8/19) normalize_value: column: pre_NeuroExamSedate
(9/19) normalize_value: column: pre_NoNeuroExamReason
(10/19) normalize_value: column: pre_NeuroExamLevelConsciousness
(11/19) normalize_value: column: pre_NeuroExamSpontaneous

In [14]:
COMBINE_harmonizer.column_info(df)

(0/38) center: (1261/0)
(1/38) center.orig: (1261/0)
(2/38) subjectID: (1261/0)
(3/38) subjectID.orig: (1261/0)
(4/38) uniqueID: (1261/0)
(5/38) uniqueID.orig: (1261/0)
(6/38) pre_NeuroExam: (1261/0)
(7/38) pre_NeuroExam.orig: (882/379)
(8/38) pre_NoNeuroExamReason: (1261/0)
(9/38) pre_NoNeuroExamReason.orig: (59/1202)
(10/38) pre_NeuroExamSignModerateSevereHIE3Category: (1261/0)
(11/38) pre_NeuroExamSignModerateSevereHIE3Category.orig: (797/464)
(12/38) pre_NeuroExamLevelConsciousness: (1261/0)
(13/38) pre_NeuroExamLevelConsciousness.orig: (797/464)
(14/38) pre_NeuroExamSpontaneousActivity: (1261/0)
(15/38) pre_NeuroExamSpontaneousActivity.orig: (797/464)
(16/38) pre_NeuroExamPosture: (1261/0)
(17/38) pre_NeuroExamPosture.orig: (791/470)
(18/38) pre_NeuroExamTone: (1261/0)
(19/38) pre_NeuroExamTone.orig: (797/464)
(20/38) pre_NeuroExamSuck: (1261/0)
(21/38) pre_NeuroExamSuck.orig: (786/475)
(22/38) pre_NeuroExamMoro: (1261/0)
(23/38) pre_NeuroExamMoro.orig: (774/487)
(24/38) pre_Neuro

## 01-12-neuro-exam

In [15]:
base_filename = '01-12-neuro-exam.csv'

In [16]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/19) normalize_value: column: center
(1/19) normalize_value: column: subjectID
(2/19) normalize_value: column: uniqueID
(3/19) normalize_value: column: pre_NeuroExamSeizure
(4/19) normalize_value: column: pre_NeuroExam
(5/19) normalize_value: column: pre_NeuroExamTone
(6/19) normalize_value: column: pre_NeuroExamRespiration
(7/19) normalize_value: column: pre_NeuroExamSignModerateSevereHIE3Category
(8/19) normalize_value: column: pre_NeuroExamSedate
(9/19) normalize_value: column: pre_NoNeuroExamReason
(10/19) normalize_value: column: pre_NeuroExamLevelConsciousness
(11/19) normalize_value: column: pre_NeuroExamSpontaneousActivity
(12/19) normalize_value: column: pre_NeuroExamPosture
(13/19) normalize_value: column: pre_NeuroExamSuck
(14/19) normalize_value: column: pre_NeuroExamMoro
(15/19) normalize_value: column: pre_NeuroExamPupils
(16/19) normalize_value: column: pre_NeuroExamHeartRate
(17/19) normalize_value: column: pre_NeuroExamDate
(18/19) normalize_value: column: pre_NeuroE

In [17]:
COMBINE_harmonizer.column_info(df)

(0/38) center: (364/0)
(1/38) center.orig: (364/0)
(2/38) subjectID: (364/0)
(3/38) subjectID.orig: (364/0)
(4/38) uniqueID: (364/0)
(5/38) uniqueID.orig: (364/0)
(6/38) pre_NeuroExam: (364/0)
(7/38) pre_NeuroExam.orig: (364/0)
(8/38) pre_NoNeuroExamReason: (364/0)
(9/38) pre_NoNeuroExamReason.orig: (2/362)
(10/38) pre_NeuroExamSignModerateSevereHIE3Category: (364/0)
(11/38) pre_NeuroExamSignModerateSevereHIE3Category.orig: (362/2)
(12/38) pre_NeuroExamLevelConsciousness: (364/0)
(13/38) pre_NeuroExamLevelConsciousness.orig: (362/2)
(14/38) pre_NeuroExamSpontaneousActivity: (364/0)
(15/38) pre_NeuroExamSpontaneousActivity.orig: (362/2)
(16/38) pre_NeuroExamPosture: (364/0)
(17/38) pre_NeuroExamPosture.orig: (360/4)
(18/38) pre_NeuroExamTone: (364/0)
(19/38) pre_NeuroExamTone.orig: (362/2)
(20/38) pre_NeuroExamSuck: (364/0)
(21/38) pre_NeuroExamSuck.orig: (357/7)
(22/38) pre_NeuroExamMoro: (364/0)
(23/38) pre_NeuroExamMoro.orig: (348/16)
(24/38) pre_NeuroExamPupils: (364/0)
(25/38) pre_

## 01-03-maternal-demographics

In [18]:
base_filename = '01-03-maternal-demographics.csv'

In [19]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/18) normalize_value: column: center
(1/18) normalize_value: column: subjectID
(2/18) normalize_value: column: uniqueID
(3/18) normalize_value: column: motherAge_year
(4/18) normalize_value: column: motherRace
(5/18) normalize_value: column: motherRaceOther1
(6/18) normalize_value: column: motherRaceOther2
(7/18) normalize_value: column: motherRaceOther3
(8/18) normalize_value: column: motherRaceOther4
(9/18) normalize_value: column: motherRaceOther5
(10/18) normalize_value: column: motherRaceOther6
(11/18) normalize_value: column: motherEthnicity
(12/18) normalize_value: column: motherMaritalStatus
(13/18) normalize_value: column: motherEducation
(14/18) normalize_value: column: motherInsurance
(15/18) normalize_value: column: motherEducation2
(16/18) normalize_value: column: motherInsurancePublic
(17/18) normalize_value: column: motherRace2


In [20]:
COMBINE_harmonizer.column_info(df)

(0/36) center: (364/0)
(1/36) center.orig: (364/0)
(2/36) subjectID: (364/0)
(3/36) subjectID.orig: (364/0)
(4/36) uniqueID: (364/0)
(5/36) uniqueID.orig: (364/0)
(6/36) motherAge_year: (364/0)
(7/36) motherAge_year.orig: (364/0)
(8/36) motherRace: (364/0)
(9/36) motherRace.orig: (361/3)
(10/36) motherRaceOther1: (364/0)
(11/36) motherRaceOther1.orig: (1/363)
(12/36) motherRaceOther2: (364/0)
(13/36) motherRaceOther2.orig: (1/363)
(14/36) motherRaceOther3: (364/0)
(15/36) motherRaceOther3.orig: (0/364)
(16/36) motherRaceOther4: (364/0)
(17/36) motherRaceOther4.orig: (0/364)
(18/36) motherRaceOther5: (364/0)
(19/36) motherRaceOther5.orig: (0/364)
(20/36) motherRaceOther6: (364/0)
(21/36) motherRaceOther6.orig: (0/364)
(22/36) motherRace2: (364/0)
(23/36) motherRace2.orig: (358/6)
(24/36) motherEthnicity: (364/0)
(25/36) motherEthnicity.orig: (364/0)
(26/36) motherMaritalStatus: (364/0)
(27/36) motherMaritalStatus.orig: (364/0)
(28/36) motherEducation: (364/0)
(29/36) motherEducation.ori

## 01-04-pregnancy-history

In [21]:
base_filename = '01-04-pregnancy-history.csv'

In [22]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: multipleBirth
(4/12) normalize_value: column: prenatalCare
(5/12) normalize_value: column: hypertensionEclampsia
(6/12) normalize_value: column: antepartumHemorrhage
(7/12) normalize_value: column: thyroidMalfunction
(8/12) normalize_value: column: diabetes
(9/12) normalize_value: column: gravida
(10/12) normalize_value: column: parity
(11/12) normalize_value: column: numFetus


In [23]:
COMBINE_harmonizer.column_info(df)

(0/24) center: (364/0)
(1/24) center.orig: (364/0)
(2/24) subjectID: (364/0)
(3/24) subjectID.orig: (364/0)
(4/24) uniqueID: (364/0)
(5/24) uniqueID.orig: (364/0)
(6/24) gravida: (364/0)
(7/24) gravida.orig: (364/0)
(8/24) parity: (364/0)
(9/24) parity.orig: (364/0)
(10/24) multipleBirth: (364/0)
(11/24) multipleBirth.orig: (364/0)
(12/24) numFetus: (364/0)
(13/24) numFetus.orig: (10/354)
(14/24) prenatalCare: (364/0)
(15/24) prenatalCare.orig: (364/0)
(16/24) hypertensionEclampsia: (364/0)
(17/24) hypertensionEclampsia.orig: (364/0)
(18/24) antepartumHemorrhage: (364/0)
(19/24) antepartumHemorrhage.orig: (364/0)
(20/24) thyroidMalfunction: (364/0)
(21/24) thyroidMalfunction.orig: (364/0)
(22/24) diabetes: (364/0)
(23/24) diabetes.orig: (364/0)


## 01-05-labor-delivery

In [24]:
base_filename = '01-05-labor-delivery.csv'

In [25]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/32) normalize_value: column: center
(1/32) normalize_value: column: subjectID
(2/32) normalize_value: column: uniqueID
(3/32) normalize_value: column: fetalDecelerate
(4/32) normalize_value: column: cordMishap
(5/32) normalize_value: column: uterineRupture
(6/32) normalize_value: column: shoulderDystocia
(7/32) normalize_value: column: placentalProblem
(8/32) normalize_value: column: maternalHemorrhage
(9/32) normalize_value: column: maternalTrauma
(10/32) normalize_value: column: maternalCardioRespiratoryArrest
(11/32) normalize_value: column: maternalSeizure
(12/32) normalize_value: column: pyrexiaGreater37p6C
(13/32) normalize_value: column: chorioamnionitis
(14/32) normalize_value: column: placentalPathologyPerformed
(15/32) normalize_value: column: histologicChorioamionitis
(16/32) normalize_value: column: laborAntibiotics
(17/32) normalize_value: column: ruptureBeforeDelivery
(18/32) normalize_value: column: ruptureGreater18Hr
(19/32) normalize_value: column: maternalAdmission

In [26]:
df.columns

Index(['center', 'center.orig', 'subjectID', 'subjectID.orig', 'uniqueID',
       'uniqueID.orig', 'maternalAdmissionDate', 'maternalAdmissionDate.orig',
       'maternalAdmissionTime', 'maternalAdmissionTime.orig', 'ruptureDate',
       'ruptureDate.orig', 'ruptureTime', 'ruptureTime.orig',
       'ruptureGreater18Hr', 'ruptureGreater18Hr.orig',
       'ruptureBeforeDelivery', 'ruptureBeforeDelivery.orig', 'deliveryMode',
       'deliveryMode.orig', 'fetalDecelerate', 'fetalDecelerate.orig',
       'cordMishap', 'cordMishap.orig', 'uterineRupture',
       'uterineRupture.orig', 'shoulderDystocia', 'shoulderDystocia.orig',
       'placentalProblem', 'placentalProblem.orig', 'maternalHemorrhage',
       'maternalHemorrhage.orig', 'maternalTrauma', 'maternalTrauma.orig',
       'maternalCardioRespiratoryArrest',
       'maternalCardioRespiratoryArrest.orig', 'maternalSeizure',
       'maternalSeizure.orig', 'perinatalSentinelEvent',
       'perinatalSentinelEvent.orig', 'pyrexiaGreater37

In [27]:
COMBINE_harmonizer.column_info(df)

(0/64) center: (364/0)
(1/64) center.orig: (364/0)
(2/64) subjectID: (364/0)
(3/64) subjectID.orig: (364/0)
(4/64) uniqueID: (364/0)
(5/64) uniqueID.orig: (364/0)
(6/64) maternalAdmissionDate: (364/0)
(7/64) maternalAdmissionDate.orig: (340/24)
(8/64) maternalAdmissionTime: (364/0)
(9/64) maternalAdmissionTime.orig: (288/76)
(10/64) ruptureDate: (364/0)
(11/64) ruptureDate.orig: (245/119)
(12/64) ruptureTime: (364/0)
(13/64) ruptureTime.orig: (220/144)
(14/64) ruptureGreater18Hr: (364/0)
(15/64) ruptureGreater18Hr.orig: (47/317)
(16/64) ruptureBeforeDelivery: (364/0)
(17/64) ruptureBeforeDelivery.orig: (364/0)
(18/64) deliveryMode: (364/0)
(19/64) deliveryMode.orig: (364/0)
(20/64) fetalDecelerate: (364/0)
(21/64) fetalDecelerate.orig: (364/0)
(22/64) cordMishap: (364/0)
(23/64) cordMishap.orig: (364/0)
(24/64) uterineRupture: (364/0)
(25/64) uterineRupture.orig: (364/0)
(26/64) shoulderDystocia: (364/0)
(27/64) shoulderDystocia.orig: (364/0)
(28/64) placentalProblem: (364/0)
(29/64) p

## 01-06-birth

In [28]:
base_filename = '01-06-birth.csv'

In [29]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

# XXX hack for _is_delivery_resuscitation
def _is_delivery_resuscitation(x):
    for idx in ['deliveryOxygen', 'deliveryBaggingAndMask', 'deliveryChestCompression', 'deliveryIntubation', 'deliveryDrug']:
        if x[idx] == 'TRUE':
            return True

    return False

df.loc[:, 'deliveryResuscitation'] = 'FALSE'
is_delivery_resuscitation = df.apply(lambda x: _is_delivery_resuscitation(x), axis=1)
df.loc[is_delivery_resuscitation, 'deliveryResuscitation'] = 'TRUE'

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/58) normalize_value: column: center
(1/58) normalize_value: column: subjectID
(2/58) normalize_value: column: uniqueID
(3/58) normalize_value: column: birthDate
(4/58) normalize_value: column: encephalopathyLevel
(5/58) normalize_value: column: infantOutborn
(6/58) normalize_value: column: outbornInHospital
(7/58) normalize_value: column: outbornOutHospital
(8/58) normalize_value: column: deliveryOxygen
(9/58) normalize_value: column: deliveryBaggingAndMask
(10/58) normalize_value: column: deliveryChestCompression
(11/58) normalize_value: column: deliveryIntubation
(12/58) normalize_value: column: deliveryDrug
(13/58) normalize_value: column: at10MinContinueResuscitation
(14/58) normalize_value: column: at10MinOxygen
(15/58) normalize_value: column: at10MinBaggingAndMask
(16/58) normalize_value: column: at10MinChestCompression
(17/58) normalize_value: column: at10MinIntubation
(18/58) normalize_value: column: at10MinDrug
(19/58) normalize_value: column: cordBloodGas
(20/58) normaliz

In [30]:
COMBINE_harmonizer.column_info(df)

(0/117) center: (364/0)
(1/117) center.orig: (364/0)
(2/117) subjectID: (364/0)
(3/117) subjectID.orig: (364/0)
(4/117) uniqueID: (364/0)
(5/117) uniqueID.orig: (364/0)
(6/117) encephalopathyLevel: (364/0)
(7/117) encephalopathyLevel.orig: (364/0)
(8/117) birthTime: (364/0)
(9/117) birthTime.orig: (364/0)
(10/117) birthWeight_g: (364/0)
(11/117) birthWeight_g.orig: (363/1)
(12/117) birthLength_cm: (364/0)
(13/117) birthLength_cm.orig: (340/24)
(14/117) birthHeadCircumference_cm: (364/0)
(15/117) birthHeadCircumference_cm.orig: (344/20)
(16/117) birthGestationalAge_week: (364/0)
(17/117) birthGestationalAge_week.orig: (363/1)
(18/117) infantSex: (364/0)
(19/117) infantSex.orig: (364/0)
(20/117) maleSex: (364/0)
(21/117) maleSex.orig: (364/0)
(22/117) infantOutborn: (364/0)
(23/117) infantOutborn.orig: (364/0)
(24/117) outbornInHospital: (364/0)
(25/117) outbornInHospital.orig: (234/130)
(26/117) outbornOutHospital: (364/0)
(27/117) outbornOutHospital.orig: (234/130)
(28/117) neonateAdmi

## 01-07-pre-temperature

In [31]:
base_filename = '01-07-pre-temperature.csv'

In [32]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/25) normalize_value: column: center
(1/25) normalize_value: column: subjectID
(2/25) normalize_value: column: uniqueID
(3/25) normalize_value: column: pre_CoolInitiate
(4/25) normalize_value: column: pre_CoolbyIceGelPack
(5/25) normalize_value: column: pre_CoolPassively
(6/25) normalize_value: column: pre_CoolClinically
(7/25) normalize_value: column: pre_AfterOvershootReach33p5C
(8/25) normalize_value: column: targetTreatmentTemperature_C
(9/25) normalize_value: column: pre_CoolInitiateDate
(10/25) normalize_value: column: pre_CoolInitiateTime
(11/25) normalize_value: column: pre_AfterOvershootReach33p5CDate
(12/25) normalize_value: column: pre_AfterOvershootReach33p5CTime
(13/25) normalize_value: column: pre_TemperatureMinDate
(14/25) normalize_value: column: pre_TemperatureMinTime
(15/25) normalize_value: column: pre_SkinTemperatureMin_C
(16/25) normalize_value: column: pre_AxillaryTemperatureMin_C
(17/25) normalize_value: column: pre_EsophagealTemperatureMin_C
(18/25) normalize_

In [33]:
COMBINE_harmonizer.column_info(df)

(0/50) center: (364/0)
(1/50) center.orig: (364/0)
(2/50) subjectID: (364/0)
(3/50) subjectID.orig: (364/0)
(4/50) uniqueID: (364/0)
(5/50) uniqueID.orig: (364/0)
(6/50) targetTreatmentTemperature_C: (364/0)
(7/50) targetTreatmentTemperature_C.orig: (364/0)
(8/50) pre_CoolInitiate: (364/0)
(9/50) pre_CoolInitiate.orig: (364/0)
(10/50) pre_CoolbyIceGelPack: (364/0)
(11/50) pre_CoolbyIceGelPack.orig: (252/112)
(12/50) pre_CoolPassively: (364/0)
(13/50) pre_CoolPassively.orig: (252/112)
(14/50) pre_CoolClinically: (364/0)
(15/50) pre_CoolClinically.orig: (252/112)
(16/50) pre_CoolInitiateDate: (364/0)
(17/50) pre_CoolInitiateDate.orig: (201/163)
(18/50) pre_CoolInitiateTime: (364/0)
(19/50) pre_CoolInitiateTime.orig: (200/164)
(20/50) pre_AfterOvershootReach33p5C: (364/0)
(21/50) pre_AfterOvershootReach33p5C.orig: (364/0)
(22/50) pre_AfterOvershootReach33p5CDate: (364/0)
(23/50) pre_AfterOvershootReach33p5CDate.orig: (70/294)
(24/50) pre_AfterOvershootReach33p5CTime: (364/0)
(25/50) pre_A

## 01-08-pre-cardiovascular

In [34]:
base_filename = '01-08-pre-cardiovascular.csv'

In [35]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: pre_CardioVolumeExpand
(4/12) normalize_value: column: pre_CardioInotropicAgent
(5/12) normalize_value: column: pre_CardioBloodTransfusion
(6/12) normalize_value: column: pre_CardioPlatelets
(7/12) normalize_value: column: pre_CardioDate
(8/12) normalize_value: column: pre_CardioTime
(9/12) normalize_value: column: pre_CardioSystolicBloodPressure_mmHg
(10/12) normalize_value: column: pre_CardioDiastolicBloodPressure_mmHg
(11/12) normalize_value: column: pre_CardioHeartRate_BPM


In [36]:
COMBINE_harmonizer.column_info(df)

(0/24) center: (357/0)
(1/24) center.orig: (357/0)
(2/24) subjectID: (357/0)
(3/24) subjectID.orig: (357/0)
(4/24) uniqueID: (357/0)
(5/24) uniqueID.orig: (357/0)
(6/24) pre_CardioDate: (357/0)
(7/24) pre_CardioDate.orig: (357/0)
(8/24) pre_CardioTime: (357/0)
(9/24) pre_CardioTime.orig: (355/2)
(10/24) pre_CardioSystolicBloodPressure_mmHg: (357/0)
(11/24) pre_CardioSystolicBloodPressure_mmHg.orig: (350/7)
(12/24) pre_CardioDiastolicBloodPressure_mmHg: (357/0)
(13/24) pre_CardioDiastolicBloodPressure_mmHg.orig: (350/7)
(14/24) pre_CardioHeartRate_BPM: (357/0)
(15/24) pre_CardioHeartRate_BPM.orig: (351/6)
(16/24) pre_CardioVolumeExpand: (357/0)
(17/24) pre_CardioVolumeExpand.orig: (357/0)
(18/24) pre_CardioInotropicAgent: (357/0)
(19/24) pre_CardioInotropicAgent.orig: (357/0)
(20/24) pre_CardioBloodTransfusion: (357/0)
(21/24) pre_CardioBloodTransfusion.orig: (357/0)
(22/24) pre_CardioPlatelets: (357/0)
(23/24) pre_CardioPlatelets.orig: (357/0)


## 01-14-pre-respiratory

In [37]:
# base_filename = '01-14-pre-respiratory.csv'

In [38]:
'''
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = HIE_data.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)
'''

"\nfilename = os.sep.join([input_dir, base_filename])\ndf = pd.read_csv(filename, dtype='O')\ndf = HIE_data.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)\n\nout_filename = os.sep.join([out_dir, base_filename])\ndf.to_csv(out_filename, index=False)\n"

In [39]:
# HIE_data.column_info(df)

## 01-15-pre-blood-gas

In [40]:
# base_filename = '01-15-pre-blood-gas.csv'

In [41]:
'''
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = HIE_data.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)
'''


"\nfilename = os.sep.join([input_dir, base_filename])\ndf = pd.read_csv(filename, dtype='O')\ndf = HIE_data.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)\n\nout_filename = os.sep.join([out_dir, base_filename])\ndf.to_csv(out_filename, index=False)\n"

In [42]:
# HIE_data.column_info(df)

## 01-16-pre-hematology

In [43]:
# base_filename = '01-16-pre-hematology.csv'

In [44]:
'''
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = HIE_data.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

# XXX hematology in OC is counted as kcPermuL
cbc_columns = [
    'hematologyWBC_cPermuL',
    'hematologyPolymorphNeutrophils_cPermuL',
    'hematologyMonocytes_cPermuL',
    'hematologyLymphocytes_cPermuL',
    'hematologyPlatelet_cPermuL'
]
for column in cbc_columns:
    df.loc[:, column] = df[column] * 1000

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)
'''

"\nfilename = os.sep.join([input_dir, base_filename])\ndf = pd.read_csv(filename, dtype='O')\ndf = HIE_data.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)\n\n# XXX hematology in OC is counted as kcPermuL\ncbc_columns = [\n    'hematologyWBC_cPermuL',\n    'hematologyPolymorphNeutrophils_cPermuL',\n    'hematologyMonocytes_cPermuL',\n    'hematologyLymphocytes_cPermuL',\n    'hematologyPlatelet_cPermuL'\n]\nfor column in cbc_columns:\n    df.loc[:, column] = df[column] * 1000\n\nout_filename = os.sep.join([out_dir, base_filename])\ndf.to_csv(out_filename, index=False)\n"

In [45]:
# HIE_data.column_info(df)

## 01-09-pre-infection

In [46]:
base_filename = '01-09-pre-infection.csv'

In [47]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/7) normalize_value: column: center


(1/7) normalize_value: column: subjectID
(2/7) normalize_value: column: uniqueID
(3/7) normalize_value: column: pre_Antibiotics
(4/7) normalize_value: column: pre_AntibioticsCode1
(5/7) normalize_value: column: pre_AntibioticsCode2
(6/7) normalize_value: column: pre_AntibioticsCode3


In [48]:
COMBINE_harmonizer.column_info(df)

(0/14) center: (364/0)
(1/14) center.orig: (364/0)
(2/14) subjectID: (364/0)
(3/14) subjectID.orig: (364/0)
(4/14) uniqueID: (364/0)
(5/14) uniqueID.orig: (364/0)
(6/14) pre_Antibiotics: (364/0)
(7/14) pre_Antibiotics.orig: (364/0)
(8/14) pre_AntibioticsCode1: (364/0)
(9/14) pre_AntibioticsCode1.orig: (308/56)
(10/14) pre_AntibioticsCode2: (364/0)
(11/14) pre_AntibioticsCode2.orig: (273/91)
(12/14) pre_AntibioticsCode3: (364/0)
(13/14) pre_AntibioticsCode3.orig: (8/356)


## 01-10-pre-other-med

In [49]:
base_filename = '01-10-pre-other-med.csv'

In [50]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/17) normalize_value: column: center
(1/17) normalize_value: column: subjectID
(2/17) normalize_value: column: uniqueID
(3/17) normalize_value: column: pre_Anticonvulsants1
(4/17) normalize_value: column: pre_Anticonvulsants2
(5/17) normalize_value: column: pre_Anticonvulsants3
(6/17) normalize_value: column: pre_AnalgesicsSedatives1
(7/17) normalize_value: column: pre_AnalgesicsSedatives2
(8/17) normalize_value: column: pre_AnalgesicsSedatives3
(9/17) normalize_value: column: pre_Antipyretics1
(10/17) normalize_value: column: pre_Antipyretics2
(11/17) normalize_value: column: pre_Antipyretics3
(12/17) normalize_value: column: pre_Paralytics1
(13/17) normalize_value: column: pre_Paralytics2
(14/17) normalize_value: column: pre_Paralytics3
(15/17) normalize_value: column: pre_OtherMedFluidIntake_ccPerKg
(16/17) normalize_value: column: pre_OtherMedUrineOutput_ccPerKg


In [51]:
COMBINE_harmonizer.column_info(df)

(0/34) center: (350/0)
(1/34) center.orig: (350/0)
(2/34) subjectID: (350/0)
(3/34) subjectID.orig: (350/0)
(4/34) uniqueID: (350/0)
(5/34) uniqueID.orig: (350/0)
(6/34) pre_Anticonvulsants1: (350/0)
(7/34) pre_Anticonvulsants1.orig: (85/265)
(8/34) pre_Anticonvulsants2: (350/0)
(9/34) pre_Anticonvulsants2.orig: (13/337)
(10/34) pre_Anticonvulsants3: (350/0)
(11/34) pre_Anticonvulsants3.orig: (0/350)
(12/34) pre_AnalgesicsSedatives1: (350/0)
(13/34) pre_AnalgesicsSedatives1.orig: (74/276)
(14/34) pre_AnalgesicsSedatives2: (350/0)
(15/34) pre_AnalgesicsSedatives2.orig: (19/331)
(16/34) pre_AnalgesicsSedatives3: (350/0)
(17/34) pre_AnalgesicsSedatives3.orig: (1/349)
(18/34) pre_Antipyretics1: (350/0)
(19/34) pre_Antipyretics1.orig: (1/349)
(20/34) pre_Antipyretics2: (350/0)
(21/34) pre_Antipyretics2.orig: (0/350)
(22/34) pre_Antipyretics3: (350/0)
(23/34) pre_Antipyretics3.orig: (0/350)
(24/34) pre_Paralytics1: (350/0)
(25/34) pre_Paralytics1.orig: (2/348)
(26/34) pre_Paralytics2: (350/0

## 01-11-pre-imaging

In [52]:
base_filename = '01-11-pre-imaging.csv'

In [53]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/39) normalize_value: column: center
(1/39) normalize_value: column: subjectID
(2/39) normalize_value: column: uniqueID
(3/39) normalize_value: column: pre_HeadSonogram
(4/39) normalize_value: column: pre_HeadSonogramResultText
(5/39) normalize_value: column: pre_HeadCT
(6/39) normalize_value: column: pre_HeadCTResultText
(7/39) normalize_value: column: pre_BrainMRI
(8/39) normalize_value: column: pre_BrainMRIResultText
(9/39) normalize_value: column: pre_HeadSonogramDate
(10/39) normalize_value: column: pre_HeadSonogramTime
(11/39) normalize_value: column: pre_HeadSonogramResult1
(12/39) normalize_value: column: pre_HeadSonogramResult2
(13/39) normalize_value: column: pre_HeadSonogramResult3
(14/39) normalize_value: column: pre_HeadSonogramResult4
(15/39) normalize_value: column: pre_HeadSonogramResult5
(16/39) normalize_value: column: pre_HeadSonogramResult6
(17/39) normalize_value: column: pre_HeadSonogramResult7
(18/39) normalize_value: column: pre_HeadSonogramResult8
(19/39) nor

In [54]:
COMBINE_harmonizer.column_info(df)

(0/78) center: (350/0)
(1/78) center.orig: (350/0)
(2/78) subjectID: (350/0)
(3/78) subjectID.orig: (350/0)
(4/78) uniqueID: (350/0)
(5/78) uniqueID.orig: (350/0)
(6/78) pre_HeadSonogram: (350/0)
(7/78) pre_HeadSonogram.orig: (350/0)
(8/78) pre_HeadSonogramDate: (350/0)
(9/78) pre_HeadSonogramDate.orig: (17/333)
(10/78) pre_HeadSonogramTime: (350/0)
(11/78) pre_HeadSonogramTime.orig: (17/333)
(12/78) pre_HeadSonogramResult1: (350/0)
(13/78) pre_HeadSonogramResult1.orig: (16/334)
(14/78) pre_HeadSonogramResult2: (350/0)
(15/78) pre_HeadSonogramResult2.orig: (1/349)
(16/78) pre_HeadSonogramResult3: (350/0)
(17/78) pre_HeadSonogramResult3.orig: (0/350)
(18/78) pre_HeadSonogramResult4: (350/0)
(19/78) pre_HeadSonogramResult4.orig: (0/350)
(20/78) pre_HeadSonogramResult5: (350/0)
(21/78) pre_HeadSonogramResult5.orig: (0/350)
(22/78) pre_HeadSonogramResult6: (350/0)
(23/78) pre_HeadSonogramResult6.orig: (0/350)
(24/78) pre_HeadSonogramResult7: (350/0)
(25/78) pre_HeadSonogramResult7.orig: (0

## 02-01-temperature

In [55]:
_FLATTEN_IDS = ['temperatureTimeSlot_min']
base_filename = '02-01-temperature.csv'

In [56]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')

df['temperatureTimeSlot_min'] = df['temperatureTimeSlot_min'].apply(lambda x: str(int(float(x) * 60)))

df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)


(0/14) normalize_value: column: center
(1/14) normalize_value: column: subjectID
(2/14) normalize_value: column: uniqueID
(3/14) normalize_value: column: alterationSkinIntegrity
(4/14) normalize_value: column: shiver
(5/14) normalize_value: column: temperatureTimeSlot_min
(6/14) normalize_value: column: temperatureTimeSlotNoForm


(7/14) normalize_value: column: temperatureDate
(8/14) normalize_value: column: temperatureTime
(9/14) normalize_value: column: skinTemperature_C
(10/14) normalize_value: column: axillaryTemperature_C
(11/14) normalize_value: column: esophagealTemperature_C
(12/14) normalize_value: column: blanketTemperature_C
(13/14) normalize_value: column: servoSetTemperature_C


flatten_index: flatten_ids: ['temperatureTimeSlot_min'] unique_id_map: {np.int64(0): np.int64(0), np.int64(15): np.int64(15), np.int64(30): np.int64(30), np.int64(45): np.int64(45), np.int64(60): np.int64(60), np.int64(75): np.int64(75), np.int64(90): np.int64(90), np.int64(105): np.int64(105), np.int64(120): np.int64(120), np.int64(135): np.int64(135), np.int64(150): np.int64(150), np.int64(165): np.int64(165), np.int64(180): np.int64(180), np.int64(195): np.int64(195), np.int64(210): np.int64(210), np.int64(225): np.int64(225), np.int64(240): np.int64(240), np.int64(300): np.int64(300), np.int64(360): np.int64(360), np.int64(420): np.int64(420), np.int64(480): np.int64(480), np.int64(540): np.int64(540), np.int64(600): np.int64(600), np.int64(660): np.int64(660), np.int64(720): np.int64(720), np.int64(960): np.int64(960), np.int64(1200): np.int64(1200), np.int64(1440): np.int64(1440), np.int64(1680): np.int64(1680), np.int64(1920): np.int64(1920), np.int64(2160): np.int64(2160), np.i

In [57]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [58]:
COMBINE_harmonizer.column_info(df)

(0/29) center: (18275/0)
(1/29) center.orig: (18275/0)
(2/29) subjectID: (18275/0)
(3/29) subjectID.orig: (18275/0)
(4/29) uniqueID: (18275/0)
(5/29) uniqueID.orig: (18275/0)
(6/29) _flatten_index: (18275/0)
(7/29) temperatureTimeSlot_min: (18275/0)
(8/29) temperatureTimeSlot_min.orig: (18275/0)
(9/29) temperatureTimeSlotNoForm: (18275/0)
(10/29) temperatureTimeSlotNoForm.orig: (0/18275)
(11/29) temperatureDate: (18275/0)
(12/29) temperatureDate.orig: (18226/49)
(13/29) temperatureTime: (18275/0)
(14/29) temperatureTime.orig: (18168/107)
(15/29) skinTemperature_C: (18275/0)
(16/29) skinTemperature_C.orig: (17237/1038)
(17/29) axillaryTemperature_C: (18275/0)
(18/29) axillaryTemperature_C.orig: (250/18025)
(19/29) esophagealTemperature_C: (18275/0)
(20/29) esophagealTemperature_C.orig: (17723/552)
(21/29) blanketTemperature_C: (18275/0)
(22/29) blanketTemperature_C.orig: (17242/1033)
(23/29) servoSetTemperature_C: (18275/0)
(24/29) servoSetTemperature_C.orig: (17677/598)
(25/29) alterat

## 02-02-cardiovascular

In [59]:
_FLATTEN_IDS = ['cardioTimeSlot_min']
base_filename = '02-02-cardiovascular.csv'

In [60]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/13) normalize_value: column: center
(1/13) normalize_value: column: subjectID
(2/13) normalize_value: column: uniqueID
(3/13) normalize_value: column: cardioVolumeExpand
(4/13) normalize_value: column: cardioInotropicAgent
(5/13) normalize_value: column: cardioBloodTransfusion
(6/13) normalize_value: column: cardioPlatelets
(7/13) normalize_value: column: cardioTimeSlot_min
(8/13) normalize_value: column: cardioDate
(9/13) normalize_value: column: cardioTime
(10/13) normalize_value: column: cardioSystolicBloodPressure_mmHg
(11/13) normalize_value: column: cardioDiastolicBloodPressure_mmHg
(12/13) normalize_value: column: cardioHeartRate_BPM
flatten_index: flatten_ids: ['cardioTimeSlot_min'] unique_id_map: {np.int64(1): np.int64(1), np.int64(4): np.int64(4), np.int64(8): np.int64(8), np.int64(12): np.int64(12), np.int64(16): np.int64(16), np.int64(20): np.int64(20), np.int64(24): np.int64(24), np.int64(28): np.int64(28), np.int64(32): np.int64(32), np.int64(36): np.int64(36), np.int6

In [61]:
# XXX hack for _flatten_index: 1 as 0
is_flatten_index_1 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 1
df.loc[is_flatten_index_1, COMBINE_harmonizer.FLATTEN_INDEX] = 0

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([  0   4   8  12  16  20  24  28  32  36  40  44  48  52  56  60  64  68
  72  76  80  84  88  92  96 100 104 108 112 116 120 124 128 132 136 140
 144 148 152 156 160 164 168 172 176 180 184 188 192 196 200 204 208 212
 216 220 224 228 232 236 240 244 248 252 256 260 264 268 272 276 280 284
 288] / int64)


In [62]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [63]:
COMBINE_harmonizer.column_info(df)

(0/27) center: (9573/0)
(1/27) center.orig: (9573/0)
(2/27) subjectID: (9573/0)
(3/27) subjectID.orig: (9573/0)
(4/27) uniqueID: (9573/0)
(5/27) uniqueID.orig: (9573/0)
(6/27) _flatten_index: (9573/0)
(7/27) cardioTimeSlot_min: (9573/0)
(8/27) cardioTimeSlot_min.orig: (9573/0)
(9/27) cardioDate: (9573/0)
(10/27) cardioDate.orig: (9565/8)
(11/27) cardioTime: (9573/0)
(12/27) cardioTime.orig: (9551/22)
(13/27) cardioSystolicBloodPressure_mmHg: (9573/0)
(14/27) cardioSystolicBloodPressure_mmHg.orig: (9087/486)
(15/27) cardioDiastolicBloodPressure_mmHg: (9573/0)
(16/27) cardioDiastolicBloodPressure_mmHg.orig: (9087/486)
(17/27) cardioHeartRate_BPM: (9573/0)
(18/27) cardioHeartRate_BPM.orig: (9460/113)
(19/27) cardioVolumeExpand: (9573/0)
(20/27) cardioVolumeExpand.orig: (9565/8)
(21/27) cardioInotropicAgent: (9573/0)
(22/27) cardioInotropicAgent.orig: (9565/8)
(23/27) cardioBloodTransfusion: (9573/0)
(24/27) cardioBloodTransfusion.orig: (9565/8)
(25/27) cardioPlatelets: (9573/0)
(26/27) ca

## 02-03-respiratory

In [64]:
_FLATTEN_IDS = ['respiratoryTimeSlot_min']
base_filename = '02-03-respiratory.csv'

In [65]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: respiratoryTimeSlot_min
(4/12) normalize_value: column: respiratoryDate
(5/12) normalize_value: column: respiratoryTime
(6/12) normalize_value: column: respiratorySupportType
(7/12) normalize_value: column: respiratoryFiO2
(8/12) normalize_value: column: respiratoryRate_Hz
(9/12) normalize_value: column: respiratoryPIP_cmH2O
(10/12) normalize_value: column: respiratoryMAP_cmH2O
(11/12) normalize_value: column: respiratoryPEEP_cmH2O
flatten_index: flatten_ids: ['respiratoryTimeSlot_min'] unique_id_map: {np.int64(1): np.int64(1), np.int64(24): np.int64(24), np.int64(48): np.int64(48), np.int64(72): np.int64(72), np.int64(96): np.int64(96), np.int64(120): np.int64(120), np.int64(144): np.int64(144), np.int64(168): np.int64(168), np.int64(192): np.int64(192), np.int64(216): np.int64(216)} the_type: int64


In [66]:
# XXX hack for _flatten_index: 1 as 0
is_flatten_index_1 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 1
df.loc[is_flatten_index_1, COMBINE_harmonizer.FLATTEN_INDEX] = 0

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([  0  24  48  72  96 120 144 168 192 216] / int64)


In [67]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

## 02-04-blood-gas

In [68]:
_FLATTEN_IDS = ['bloodGasTimeSlot_min']
base_filename = '02-04-blood-gas.csv'

In [69]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/17) normalize_value: column: center
(1/17) normalize_value: column: subjectID
(2/17) normalize_value: column: uniqueID
(3/17) normalize_value: column: bloodGasSrc
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc value: * value_float:  value_int: 
[WARN] unable to get value: sheet_name: bloodGasSrc

(9/17) normalize_value: column: bloodGasPO2_mmHg
(10/17) normalize_value: column: bloodGasHCO3_mEqPerL
(11/17) normalize_value: column: bloodGasBaseDeficit_mEqPerL
(12/17) normalize_value: column: bloodGasPHCorrect
(13/17) normalize_value: column: bloodGasPCO2Correct_mmHg
(14/17) normalize_value: column: bloodGasPO2Correct_mmHg
(15/17) normalize_value: column: bloodGasHCO3Correct_mEqPerL
(16/17) normalize_value: column: bloodGasBaseDeficitCorrect_mEqPerL
flatten_index: flatten_ids: ['bloodGasTimeSlot_min'] unique_id_map: {np.int64(1): np.int64(1), np.int64(4): np.int64(4), np.int64(8): np.int64(8), np.int64(12): np.int64(12), np.int64(16): np.int64(16), np.int64(20): np.int64(20), np.int64(24): np.int64(24), np.int64(28): np.int64(28), np.int64(32): np.int64(32), np.int64(36): np.int64(36), np.int64(40): np.int64(40), np.int64(44): np.int64(44), np.int64(48): np.int64(48), np.int64(52): np.int64(52), np.int64(56): np.int64(56), np.int64(60): np.int64(60), np.int64(64): np.int64(64), np

In [70]:
# XXX hack for _flatten_index: 1 as 0
is_flatten_index_1 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 1
df.loc[is_flatten_index_1, COMBINE_harmonizer.FLATTEN_INDEX] = 0

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([  0   4   8  12  16  20  24  28  32  36  40  44  48  52  56  60  64  68
  72  76  80  84  88  92  96 100 104 108 112 116 120 124 128 132 136 140
 144 148 152 156 160 164 168 172 176 180 184 188 192 196 200 204 208 212
 216 220 224 228 232 236 240 244 248 252 256 260 264 268 272 276 280 284
 288] / int64)


In [71]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [72]:
COMBINE_harmonizer.column_info(df)

(0/35) center: (9573/0)
(1/35) center.orig: (9573/0)
(2/35) subjectID: (9573/0)
(3/35) subjectID.orig: (9573/0)
(4/35) uniqueID: (9573/0)
(5/35) uniqueID.orig: (9573/0)
(6/35) _flatten_index: (9573/0)
(7/35) bloodGasTimeSlot_min: (9573/0)
(8/35) bloodGasTimeSlot_min.orig: (9573/0)
(9/35) bloodGasDate: (9573/0)
(10/35) bloodGasDate.orig: (5127/4446)
(11/35) bloodGasTime: (9573/0)
(12/35) bloodGasTime.orig: (4887/4686)
(13/35) bloodGasSrc: (9573/0)
(14/35) bloodGasSrc.orig: (3775/5798)
(15/35) bloodGasPH: (9573/0)
(16/35) bloodGasPH.orig: (2575/6998)
(17/35) bloodGasPCO2_mmHg: (9573/0)
(18/35) bloodGasPCO2_mmHg.orig: (2576/6997)
(19/35) bloodGasPO2_mmHg: (9573/0)
(20/35) bloodGasPO2_mmHg.orig: (2570/7003)
(21/35) bloodGasHCO3_mEqPerL: (9573/0)
(22/35) bloodGasHCO3_mEqPerL.orig: (2502/7071)
(23/35) bloodGasBaseDeficit_mEqPerL: (9573/0)
(24/35) bloodGasBaseDeficit_mEqPerL.orig: (2449/7124)
(25/35) bloodGasPHCorrect: (9573/0)
(26/35) bloodGasPHCorrect.orig: (3334/6239)
(27/35) bloodGasPCO2C

## 02-05-hematology

In [73]:
_FLATTEN_IDS = ['hematologyTimeSlot_min']
base_filename = '02-05-hematology.csv'

In [74]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)


# XXX hematology in OC is counted as kcPermuL
cbc_columns = [
    'hematologyWBC_cPermuL',
    'hematologyPolymorphNeutrophils_cPermuL',
    'hematologyMonocytes_cPermuL',
    'hematologyLymphocytes_cPermuL',
    'hematologyPlatelet_cPermuL'
]
for column in cbc_columns:
    df.loc[:, column] = df[column] * 1000

(0/15) normalize_value: column: center
(1/15) normalize_value: column: subjectID
(2/15) normalize_value: column: uniqueID
(3/15) normalize_value: column: hematology
(4/15) normalize_value: column: hematologyTimeSlot_min
(5/15) normalize_value: column: hematologyDate
(6/15) normalize_value: column: hematologyTime
(7/15) normalize_value: column: hematologyWBC_cPermuL
(8/15) normalize_value: column: hematologyHemoglobin_gPerdL
(9/15) normalize_value: column: hematologyPolymorphNeutrophils_cPermuL
(10/15) normalize_value: column: hematologyMonocytes_cPermuL
(11/15) normalize_value: column: hematologyLymphocytes_cPermuL
(12/15) normalize_value: column: hematologyPlatelet_cPermuL
(13/15) normalize_value: column: hematologyPT_s
(14/15) normalize_value: column: hematologyPTT_s
flatten_index: flatten_ids: ['hematologyTimeSlot_min'] unique_id_map: {np.int64(1): np.int64(1), np.int64(24): np.int64(24), np.int64(48): np.int64(48), np.int64(72): np.int64(72), np.int64(96): np.int64(96), np.int64(12

In [75]:
# XXX hack for _flatten_index: 1 as 0
is_flatten_index_1 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 1
df.loc[is_flatten_index_1, COMBINE_harmonizer.FLATTEN_INDEX] = 0

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([  0  24  48  72  96 120 144 168 192 216] / int64)


In [76]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [77]:
COMBINE_harmonizer.column_info(df)

(0/31) center: (1979/0)
(1/31) center.orig: (1979/0)
(2/31) subjectID: (1979/0)
(3/31) subjectID.orig: (1979/0)
(4/31) uniqueID: (1979/0)
(5/31) uniqueID.orig: (1979/0)
(6/31) _flatten_index: (1979/0)
(7/31) hematology: (1979/0)
(8/31) hematology.orig: (1951/28)
(9/31) hematologyTimeSlot_min: (1979/0)
(10/31) hematologyTimeSlot_min.orig: (1979/0)
(11/31) hematologyDate: (1979/0)
(12/31) hematologyDate.orig: (1979/0)
(13/31) hematologyTime: (1979/0)
(14/31) hematologyTime.orig: (1974/5)
(15/31) hematologyWBC_cPermuL: (1979/0)
(16/31) hematologyWBC_cPermuL.orig: (1239/740)
(17/31) hematologyHemoglobin_gPerdL: (1979/0)
(18/31) hematologyHemoglobin_gPerdL.orig: (1340/639)
(19/31) hematologyPolymorphNeutrophils_cPermuL: (1979/0)
(20/31) hematologyPolymorphNeutrophils_cPermuL.orig: (1067/912)
(21/31) hematologyMonocytes_cPermuL: (1979/0)
(22/31) hematologyMonocytes_cPermuL.orig: (1089/890)
(23/31) hematologyLymphocytes_cPermuL: (1979/0)
(24/31) hematologyLymphocytes_cPermuL.orig: (1112/867)


## 02-05_s-hematology

In [78]:
base_filename = '02-05_s-hematology.csv'

In [79]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

# XXX hematology in OC is counted as kcPermuL
cbc_columns = [
    'hematologyPlateletMin_cPermuL'
]
for column in cbc_columns:
    df.loc[:, column] = df[column] * 1000

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/7) normalize_value: column: center
(1/7) normalize_value: column: subjectID
(2/7) normalize_value: column: uniqueID
(3/7) normalize_value: column: hematologyHematocritMin
(4/7) normalize_value: column: hematologyHematocritMinDate
(5/7) normalize_value: column: hematologyPlateletMin_cPermuL
(6/7) normalize_value: column: hematologyPlateletMin_cPermuLDate


In [80]:
COMBINE_harmonizer.column_info(df)

(0/14) center: (364/0)
(1/14) center.orig: (364/0)
(2/14) subjectID: (364/0)
(3/14) subjectID.orig: (364/0)
(4/14) uniqueID: (364/0)
(5/14) uniqueID.orig: (364/0)
(6/14) hematologyHematocritMin: (364/0)
(7/14) hematologyHematocritMin.orig: (364/0)
(8/14) hematologyHematocritMinDate: (364/0)
(9/14) hematologyHematocritMinDate.orig: (364/0)
(10/14) hematologyPlateletMin_cPermuL: (364/0)
(11/14) hematologyPlateletMin_cPermuL.orig: (361/3)
(12/14) hematologyPlateletMin_cPermuLDate: (364/0)
(13/14) hematologyPlateletMin_cPermuLDate.orig: (361/3)


## 02-06_s-blood-value

In [81]:
base_filename = '02-06_s-blood-value.csv'

In [82]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/61) normalize_value: column: center
(1/61) normalize_value: column: subjectID
(2/61) normalize_value: column: uniqueID
(3/61) normalize_value: column: bloodValuePHMin
(4/61) normalize_value: column: bloodValuePHMinDate
(5/61) normalize_value: column: bloodValuePHMax
(6/61) normalize_value: column: bloodValuePHMaxDate
(7/61) normalize_value: column: bloodValueHCO3Min_mEqPerL
(8/61) normalize_value: column: bloodValueHCO3Min_mEqPerLDate
(9/61) normalize_value: column: bloodValueBaseDeficitMax_mEqPerL
(10/61) normalize_value: column: bloodValueBaseDeficitMax_mEqPerLDate
(11/61) normalize_value: column: bloodValueSerumNaMin_mEqPerL
(12/61) normalize_value: column: bloodValueSerumNaMin_mEqPerLDate
(13/61) normalize_value: column: bloodValueSerumNaMax_mEqPerL
(14/61) normalize_value: column: bloodValueSerumNaMax_mEqPerLDate
(15/61) normalize_value: column: bloodValueSerumKMin_mEqPerL
(16/61) normalize_value: column: bloodValueSerumKMin_mEqPerLDate
(17/61) normalize_value: column: bloodVal

## 02-07-infection

In [83]:
_FLATTEN_IDS = ['positiveCultureNumber']
base_filename = '02-07-infection.csv'

In [84]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/18) normalize_value: column: center
(1/18) normalize_value: column: subjectID
(2/18) normalize_value: column: uniqueID
(3/18) normalize_value: column: positiveCultureNumber
(4/18) normalize_value: column: positiveCultureSrc
(5/18) normalize_value: column: positiveCultureDate
(6/18) normalize_value: column: positiveCultureTime
(7/18) normalize_value: column: positiveCultureOrganismCode1
(8/18) normalize_value: column: positiveCultureOrganismCode2
(9/18) normalize_value: column: positiveCultureOrganismCode3
(10/18) normalize_value: column: antibiotics
(11/18) normalize_value: column: rewarmingAntibiotics
(12/18) normalize_value: column: antibioticsCode1
(13/18) normalize_value: column: antibioticsCode2
(14/18) normalize_value: column: antibioticsCode3
(15/18) normalize_value: column: rewarmingAntibioticsCode1
(16/18) normalize_value: column: rewarmingAntibioticsCode2
(17/18) normalize_value: column: rewarmingAntibioticsCode3
flatten_index: flatten_ids: ['positiveCultureNumber'] unique

In [85]:
COMBINE_harmonizer.column_info(df)

(0/37) center: (364/0)
(1/37) center.orig: (364/0)
(2/37) subjectID: (364/0)
(3/37) subjectID.orig: (364/0)
(4/37) uniqueID: (364/0)
(5/37) uniqueID.orig: (364/0)
(6/37) _flatten_index: (364/0)
(7/37) positiveCultureNumber: (364/0)
(8/37) positiveCultureNumber.orig: (364/0)
(9/37) positiveCultureSrc: (364/0)
(10/37) positiveCultureSrc.orig: (5/359)
(11/37) positiveCultureDate: (364/0)
(12/37) positiveCultureDate.orig: (5/359)
(13/37) positiveCultureTime: (364/0)
(14/37) positiveCultureTime.orig: (5/359)
(15/37) positiveCultureOrganismCode1: (364/0)
(16/37) positiveCultureOrganismCode1.orig: (5/359)
(17/37) positiveCultureOrganismCode2: (364/0)
(18/37) positiveCultureOrganismCode2.orig: (0/364)
(19/37) positiveCultureOrganismCode3: (364/0)
(20/37) positiveCultureOrganismCode3.orig: (0/364)
(21/37) antibiotics: (364/0)
(22/37) antibiotics.orig: (364/0)
(23/37) antibioticsCode1: (364/0)
(24/37) antibioticsCode1.orig: (152/212)
(25/37) antibioticsCode2: (364/0)
(26/37) antibioticsCode2.ori

## 02-08-other-med

In [86]:
_FLATTEN_IDS = ['otherMedTimeSlot_min']
base_filename = '02-08-other-med.csv'

In [87]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/18) normalize_value: column: center
(1/18) normalize_value: column: subjectID
(2/18) normalize_value: column: uniqueID
(3/18) normalize_value: column: otherMedTimeSlot_min
(4/18) normalize_value: column: anticonvulsants1
(5/18) normalize_value: column: anticonvulsants2
(6/18) normalize_value: column: anticonvulsants3
(7/18) normalize_value: column: analgesicsSedatives1
(8/18) normalize_value: column: analgesicsSedatives2
(9/18) normalize_value: column: analgesicsSedatives3
(10/18) normalize_value: column: antipyretics1
(11/18) normalize_value: column: antipyretics2
(12/18) normalize_value: column: antipyretics3
(13/18) normalize_value: column: paralytics1
(14/18) normalize_value: column: paralytics2
(15/18) normalize_value: column: paralytics3
(16/18) normalize_value: column: otherMedFluidIntake_ccPerKg
(17/18) normalize_value: column: otherMedUrineOutput_ccPerKg
flatten_index: flatten_ids: ['otherMedTimeSlot_min'] unique_id_map: {np.int64(1): np.int64(1), np.int64(24): np.int64(24)

In [88]:
# XXX hack for _flatten_index: 1 as 0
is_flatten_index_1 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 1
df.loc[is_flatten_index_1, COMBINE_harmonizer.FLATTEN_INDEX] = 0

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([  0  24  48  72  96 120 144 168 192 216] / int64)


In [89]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [90]:
COMBINE_harmonizer.column_info(df)

(0/37) center: (1979/0)
(1/37) center.orig: (1979/0)
(2/37) subjectID: (1979/0)
(3/37) subjectID.orig: (1979/0)
(4/37) uniqueID: (1979/0)
(5/37) uniqueID.orig: (1979/0)
(6/37) _flatten_index: (1979/0)
(7/37) otherMedTimeSlot_min: (1979/0)
(8/37) otherMedTimeSlot_min.orig: (1979/0)
(9/37) anticonvulsants1: (1979/0)
(10/37) anticonvulsants1.orig: (628/1351)
(11/37) anticonvulsants2: (1979/0)
(12/37) anticonvulsants2.orig: (106/1873)
(13/37) anticonvulsants3: (1979/0)
(14/37) anticonvulsants3.orig: (12/1967)
(15/37) analgesicsSedatives1: (1979/0)
(16/37) analgesicsSedatives1.orig: (852/1127)
(17/37) analgesicsSedatives2: (1979/0)
(18/37) analgesicsSedatives2.orig: (279/1700)
(19/37) analgesicsSedatives3: (1979/0)
(20/37) analgesicsSedatives3.orig: (20/1959)
(21/37) antipyretics1: (1979/0)
(22/37) antipyretics1.orig: (9/1970)
(23/37) antipyretics2: (1979/0)
(24/37) antipyretics2.orig: (0/1979)
(25/37) antipyretics3: (1979/0)
(26/37) antipyretics3.orig: (0/1979)
(27/37) paralytics1: (1979/0

## 02-09-imaging

In [91]:
_FLATTEN_IDS = ['imagingNumber']
base_filename = '02-09-imaging.csv'

In [92]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/40) normalize_value: column: center
(1/40) normalize_value: column: subjectID
(2/40) normalize_value: column: uniqueID
(3/40) normalize_value: column: headSonogram
(4/40) normalize_value: column: headSonogramResultText
(5/40) normalize_value: column: headCT
(6/40) normalize_value: column: headCTResultText
(7/40) normalize_value: column: brainMRI
(8/40) normalize_value: column: brainMRIResultText
(9/40) normalize_value: column: imagingNumber
(10/40) normalize_value: column: headSonogramDate
(11/40) normalize_value: column: headSonogramTime
(12/40) normalize_value: column: headSonogramResult1
(13/40) normalize_value: column: headSonogramResult2
(14/40) normalize_value: column: headSonogramResult3
(15/40) normalize_value: column: headSonogramResult4
(16/40) normalize_value: column: headSonogramResult5
(17/40) normalize_value: column: headSonogramResult6
(18/40) normalize_value: column: headSonogramResult7
(19/40) normalize_value: column: headSonogramResult8
(20/40) normalize_value: col

In [93]:
# XXX hack for _flatten_index: 2 as 1
is_flatten_index_2 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 2
df.loc[is_flatten_index_2, COMBINE_harmonizer.FLATTEN_INDEX] = 1

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([1] / int64)


In [94]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [95]:
COMBINE_harmonizer.column_info(df)

(0/81) center: (357/0)
(1/81) center.orig: (357/0)
(2/81) subjectID: (357/0)
(3/81) subjectID.orig: (357/0)
(4/81) uniqueID: (357/0)
(5/81) uniqueID.orig: (357/0)
(6/81) _flatten_index: (357/0)
(7/81) imagingNumber: (357/0)
(8/81) imagingNumber.orig: (357/0)
(9/81) headSonogram: (357/0)
(10/81) headSonogram.orig: (357/0)
(11/81) headSonogramDate: (357/0)
(12/81) headSonogramDate.orig: (176/181)
(13/81) headSonogramTime: (357/0)
(14/81) headSonogramTime.orig: (174/183)
(15/81) headSonogramResult1: (357/0)
(16/81) headSonogramResult1.orig: (174/183)
(17/81) headSonogramResult2: (357/0)
(18/81) headSonogramResult2.orig: (25/332)
(19/81) headSonogramResult3: (357/0)
(20/81) headSonogramResult3.orig: (3/354)
(21/81) headSonogramResult4: (357/0)
(22/81) headSonogramResult4.orig: (0/357)
(23/81) headSonogramResult5: (357/0)
(24/81) headSonogramResult5.orig: (0/357)
(25/81) headSonogramResult6: (357/0)
(26/81) headSonogramResult6.orig: (0/357)
(27/81) headSonogramResult7: (357/0)
(28/81) headS

## 03-01-post-normo-temperature

In [96]:
_FLATTEN_IDS = ['post_TemperatureTimeSlot_day']
base_filename = '03-01-post-temperature.csv'

In [97]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/10) normalize_value: column: center
(1/10) normalize_value: column: subjectID
(2/10) normalize_value: column: uniqueID
(3/10) normalize_value: column: post_AlterationSkinIntegrity
(4/10) normalize_value: column: post_Shiver
(5/10) normalize_value: column: post_TemperatureTimeSlot_day
(6/10) normalize_value: column: post_TemperatureDate
(7/10) normalize_value: column: post_TemperatureTime
(8/10) normalize_value: column: post_SkinTemperature_C
(9/10) normalize_value: column: post_AxillaryTemperature_C
flatten_index: flatten_ids: ['post_TemperatureTimeSlot_day'] unique_id_map: {np.int64(4): np.int64(4), np.int64(5): np.int64(5), np.int64(6): np.int64(6), np.int64(7): np.int64(7), np.int64(8): np.int64(8), np.int64(9): np.int64(9), np.int64(10): np.int64(10)} the_type: int64


In [98]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/10) normalize_value: column: center
(1/10) normalize_value: column: subjectID
(2/10) normalize_value: column: uniqueID
(3/10) normalize_value: column: post_AlterationSkinIntegrity
(4/10) normalize_value: column: post_Shiver
(5/10) normalize_value: column: post_TemperatureTimeSlot_day
(6/10) normalize_value: column: post_TemperatureDate
(7/10) normalize_value: column: post_TemperatureTime
(8/10) normalize_value: column: post_SkinTemperature_C
(9/10) normalize_value: column: post_AxillaryTemperature_C
flatten_index: flatten_ids: ['post_TemperatureTimeSlot_day'] unique_id_map: {np.int64(4): np.int64(4), np.int64(5): np.int64(5), np.int64(6): np.int64(6), np.int64(7): np.int64(7), np.int64(8): np.int64(8), np.int64(9): np.int64(9), np.int64(10): np.int64(10)} the_type: int64


In [99]:
COMBINE_harmonizer.column_info(df)

(0/21) center: (1883/0)
(1/21) center.orig: (1883/0)
(2/21) subjectID: (1883/0)
(3/21) subjectID.orig: (1883/0)
(4/21) uniqueID: (1883/0)
(5/21) uniqueID.orig: (1883/0)
(6/21) _flatten_index: (1883/0)
(7/21) post_TemperatureTimeSlot_day: (1883/0)
(8/21) post_TemperatureTimeSlot_day.orig: (1883/0)
(9/21) post_TemperatureDate: (1883/0)
(10/21) post_TemperatureDate.orig: (1847/36)
(11/21) post_TemperatureTime: (1883/0)
(12/21) post_TemperatureTime.orig: (1835/48)
(13/21) post_SkinTemperature_C: (1883/0)
(14/21) post_SkinTemperature_C.orig: (850/1033)
(15/21) post_AxillaryTemperature_C: (1883/0)
(16/21) post_AxillaryTemperature_C.orig: (1739/144)
(17/21) post_AlterationSkinIntegrity: (1883/0)
(18/21) post_AlterationSkinIntegrity.orig: (1847/36)
(19/21) post_Shiver: (1883/0)
(20/21) post_Shiver.orig: (1847/36)


## 03-01_s-post-temperature

In [100]:
base_filename = '03-01_s-post-temperature.csv'

In [101]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/10) normalize_value: column: center
(1/10) normalize_value: column: subjectID
(2/10) normalize_value: column: uniqueID
(3/10) normalize_value: column: normothermiaAtEndIntervention
(4/10) normalize_value: column: noNormothermiaReason
(5/10) normalize_value: column: coolAfterInterventionText
(6/10) normalize_value: column: normothermiaDate
(7/10) normalize_value: column: normothermiaTime
(8/10) normalize_value: column: normothermiaAxillaryTemperature_C
(9/10) normalize_value: column: coolAfterIntervention


In [102]:
COMBINE_harmonizer.column_info(df)

(0/20) center: (364/0)
(1/20) center.orig: (364/0)
(2/20) subjectID: (364/0)
(3/20) subjectID.orig: (364/0)
(4/20) uniqueID: (364/0)
(5/20) uniqueID.orig: (364/0)
(6/20) normothermiaAtEndIntervention: (364/0)
(7/20) normothermiaAtEndIntervention.orig: (364/0)
(8/20) normothermiaDate: (364/0)
(9/20) normothermiaDate.orig: (313/51)
(10/20) normothermiaTime: (364/0)
(11/20) normothermiaTime.orig: (313/51)
(12/20) normothermiaAxillaryTemperature_C: (364/0)
(13/20) normothermiaAxillaryTemperature_C.orig: (268/96)
(14/20) noNormothermiaReason: (364/0)
(15/20) noNormothermiaReason.orig: (50/314)
(16/20) coolAfterIntervention: (364/0)
(17/20) coolAfterIntervention.orig: (55/309)
(18/20) coolAfterInterventionText: (364/0)
(19/20) coolAfterInterventionText.orig: (25/339)


## 03-02-post-blood-value

In [103]:
base_filename = '03-02-post-blood-value.csv'

In [104]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/9) normalize_value: column: center
(1/9) normalize_value: column: subjectID
(2/9) normalize_value: column: uniqueID
(3/9) normalize_value: column: post_BloodValueASTSGOT_UPerL
(4/9) normalize_value: column: post_BloodValueASTSGOT_UPerLDate
(5/9) normalize_value: column: post_BloodValueALTSGPT_UPerL
(6/9) normalize_value: column: post_BloodValueALTSGPT_UPerLDate
(7/9) normalize_value: column: post_BloodValueTotalBilirubin_mgPerdL
(8/9) normalize_value: column: post_BloodValueTotalBilirubin_mgPerdLDate


In [105]:
COMBINE_harmonizer.column_info(df)


(0/18) center: (364/0)
(1/18) center.orig: (364/0)
(2/18) subjectID: (364/0)
(3/18) subjectID.orig: (364/0)
(4/18) uniqueID: (364/0)
(5/18) uniqueID.orig: (364/0)
(6/18) post_BloodValueASTSGOT_UPerL: (364/0)
(7/18) post_BloodValueASTSGOT_UPerL.orig: (272/92)
(8/18) post_BloodValueASTSGOT_UPerLDate: (364/0)
(9/18) post_BloodValueASTSGOT_UPerLDate.orig: (275/89)
(10/18) post_BloodValueALTSGPT_UPerL: (364/0)
(11/18) post_BloodValueALTSGPT_UPerL.orig: (278/86)
(12/18) post_BloodValueALTSGPT_UPerLDate: (364/0)
(13/18) post_BloodValueALTSGPT_UPerLDate.orig: (281/83)
(14/18) post_BloodValueTotalBilirubin_mgPerdL: (364/0)
(15/18) post_BloodValueTotalBilirubin_mgPerdL.orig: (281/83)
(16/18) post_BloodValueTotalBilirubin_mgPerdLDate: (364/0)
(17/18) post_BloodValueTotalBilirubin_mgPerdLDate.orig: (283/81)


## 03-03-post-imaging

In [106]:
base_filename = '03-03-post-imaging.csv'

In [107]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/39) normalize_value: column: center
(1/39) normalize_value: column: subjectID
(2/39) normalize_value: column: uniqueID
(3/39) normalize_value: column: post_HeadSonogram
(4/39) normalize_value: column: post_HeadSonogramResultText
(5/39) normalize_value: column: post_HeadCT
(6/39) normalize_value: column: post_HeadCTResultText
(7/39) normalize_value: column: post_BrainMRI
(8/39) normalize_value: column: post_BrainMRIResultText
(9/39) normalize_value: column: post_HeadSonogramDate
(10/39) normalize_value: column: post_HeadSonogramTime
(11/39) normalize_value: column: post_HeadSonogramResult1
(12/39) normalize_value: column: post_HeadSonogramResult2
(13/39) normalize_value: column: post_HeadSonogramResult3
(14/39) normalize_value: column: post_HeadSonogramResult4
(15/39) normalize_value: column: post_HeadSonogramResult5
(16/39) normalize_value: column: post_HeadSonogramResult6
(17/39) normalize_value: column: post_HeadSonogramResult7
(18/39) normalize_value: column: post_HeadSonogramRes

In [108]:
COMBINE_harmonizer.column_info(df)


(0/78) center: (361/0)
(1/78) center.orig: (361/0)
(2/78) subjectID: (361/0)
(3/78) subjectID.orig: (361/0)
(4/78) uniqueID: (361/0)
(5/78) uniqueID.orig: (361/0)
(6/78) post_HeadSonogram: (361/0)
(7/78) post_HeadSonogram.orig: (361/0)
(8/78) post_HeadSonogramDate: (361/0)
(9/78) post_HeadSonogramDate.orig: (148/213)
(10/78) post_HeadSonogramTime: (361/0)
(11/78) post_HeadSonogramTime.orig: (146/215)
(12/78) post_HeadSonogramResult1: (361/0)
(13/78) post_HeadSonogramResult1.orig: (147/214)
(14/78) post_HeadSonogramResult2: (361/0)
(15/78) post_HeadSonogramResult2.orig: (21/340)
(16/78) post_HeadSonogramResult3: (361/0)
(17/78) post_HeadSonogramResult3.orig: (8/353)
(18/78) post_HeadSonogramResult4: (361/0)
(19/78) post_HeadSonogramResult4.orig: (2/359)
(20/78) post_HeadSonogramResult5: (361/0)
(21/78) post_HeadSonogramResult5.orig: (0/361)
(22/78) post_HeadSonogramResult6: (361/0)
(23/78) post_HeadSonogramResult6.orig: (0/361)
(24/78) post_HeadSonogramResult7: (361/0)
(25/78) post_Head

## 03-04-post-normo-neuro-exam

In [109]:
base_filename = '03-04-post-neuro-exam.csv'

In [110]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/21) normalize_value: column: center
(1/21) normalize_value: column: subjectID
(2/21) normalize_value: column: uniqueID
(3/21) normalize_value: column: post_NeuroExamTone
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIETone value: ** value_float: ** value_int: **
(4/21) normalize_value: column: post_NeuroExamRespiration
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIERespiratory value: ** value_float: ** value_int: **
(5/21) normalize_value: column: post_NeuroExamSeizure
(6/21) normalize_value: column: post_NeuroExamSedate
(7/21) normalize_value: column: post_NeuroExamClonusSustained
(8/21) normalize_value: column: post_NeuroExamFistedHand
(9/21) normalize_value: column: post_NeuroExamAbnormalMovement
(10/21) normalize_value: column: post_NeuroExamGagReflexAbsent
(11/21) normalize_value: column: post_Neur

In [111]:
COMBINE_harmonizer.column_info(df)

(0/42) center: (364/0)
(1/42) center.orig: (364/0)
(2/42) subjectID: (364/0)
(3/42) subjectID.orig: (364/0)
(4/42) uniqueID: (364/0)
(5/42) uniqueID.orig: (364/0)
(6/42) post_NeuroExamDate: (364/0)
(7/42) post_NeuroExamDate.orig: (308/56)
(8/42) post_NeuroExamTime: (364/0)
(9/42) post_NeuroExamTime.orig: (307/57)
(10/42) post_NeuroExamLevelConsciousness: (364/0)
(11/42) post_NeuroExamLevelConsciousness.orig: (308/56)
(12/42) post_NeuroExamSpontaneousActivity: (364/0)
(13/42) post_NeuroExamSpontaneousActivity.orig: (308/56)
(14/42) post_NeuroExamPosture: (364/0)
(15/42) post_NeuroExamPosture.orig: (307/57)
(16/42) post_NeuroExamTone: (364/0)
(17/42) post_NeuroExamTone.orig: (308/56)
(18/42) post_NeuroExamSuck: (364/0)
(19/42) post_NeuroExamSuck.orig: (306/58)
(20/42) post_NeuroExamMoro: (364/0)
(21/42) post_NeuroExamMoro.orig: (289/75)
(22/42) post_NeuroExamPupils: (364/0)
(23/42) post_NeuroExamPupils.orig: (290/74)
(24/42) post_NeuroExamHeartRate: (364/0)
(25/42) post_NeuroExamHeartRat

## 03-05-mri

In [112]:
_FLATTEN_IDS = ['MRIReader']
base_filename = '03-05-mri.csv'

In [113]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/77) normalize_value: column: center
(1/77) normalize_value: column: subjectID
(2/77) normalize_value: column: uniqueID
(3/77) normalize_value: column: MRIIncrement
(4/77) normalize_value: column: MRIReader
(5/77) normalize_value: column: MRIReadDate
(6/77) normalize_value: column: MRIDate
(7/77) normalize_value: column: MRIStrength_T
(8/77) normalize_value: column: MRIAdequateQuality
(9/77) normalize_value: column: MRIAdequateQuality_c
(10/77) normalize_value: column: MRIT1Axial
(11/77) normalize_value: column: MRIT1Coronal
(12/77) normalize_value: column: MRIT1Sagittal
(13/77) normalize_value: column: MRIT1
(14/77) normalize_value: column: MRIT2Axial
(15/77) normalize_value: column: MRIT2Coronal
(16/77) normalize_value: column: MRIT2Sagittal
(17/77) normalize_value: column: MRIT2
(18/77) normalize_value: column: MRIT2FLAIRAxial
(19/77) normalize_value: column: MRIT2FLAIRCoronal
(20/77) normalize_value: column: MRIT2FLAIRSagittal
(21/77) normalize_value: column: MRIT2FLAIR
(22/77) n

flatten_index: flatten_ids: ['MRIReader'] unique_id_map: {'1': '1', '2': '2', '3': '3'} the_type: object


In [114]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [115]:
COMBINE_harmonizer.column_info(df)

(0/155) center: (671/0)
(1/155) center.orig: (671/0)
(2/155) subjectID: (671/0)
(3/155) subjectID.orig: (671/0)
(4/155) uniqueID: (671/0)
(5/155) uniqueID.orig: (671/0)
(6/155) MRI_ID: (671/0)
(7/155) MRI_ID.orig: (671/0)
(8/155) _flatten_index: (671/0)
(9/155) siteID: (671/0)
(10/155) siteID.orig: (671/0)
(11/155) MRIIncrement: (671/0)
(12/155) MRIIncrement.orig: (671/0)
(13/155) MRIReader: (671/0)
(14/155) MRIReader.orig: (671/0)
(15/155) MRIReadDate: (671/0)
(16/155) MRIReadDate.orig: (661/10)
(17/155) MRIStrength_T: (671/0)
(18/155) MRIStrength_T.orig: (256/415)
(19/155) MRIAdequateQuality: (671/0)
(20/155) MRIAdequateQuality.orig: (266/405)
(21/155) MRIAdequateQuality_c: (671/0)
(22/155) MRIAdequateQuality_c.orig: (670/1)
(23/155) MRIT1Axial: (671/0)
(24/155) MRIT1Axial.orig: (671/0)
(25/155) MRIT1Coronal: (671/0)
(26/155) MRIT1Coronal.orig: (671/0)
(27/155) MRIT1Sagittal: (671/0)
(28/155) MRIT1Sagittal.orig: (671/0)
(29/155) MRIT1: (671/0)
(30/155) MRIT1.orig: (671/0)
(31/155) MR

## 03-05_s-mri

In [116]:
base_filename = '03-05_s-mri.csv'

In [117]:
_VALUE_MAP['MRICerebralAtrophyGlobalLocalMerge']

<function COMBINE_harmonizer.utils_values._build_value_map.<locals>.<lambda>(x)>

In [118]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

(0/66) normalize_value: column: center
(1/66) normalize_value: column: subjectID
(2/66) normalize_value: column: uniqueID
(3/66) normalize_value: column: MRIAvailable
(4/66) normalize_value: column: MRIAvailable_c
(5/66) normalize_value: column: MRINoObtainReason
[WARN] unable to get value: sheet_name: MRINoObtainReason value: - value_float:  value_int: 
[WARN] unable to get value: sheet_name: MRINoObtainReason value: - value_float:  value_int: 
[WARN] unable to get value: sheet_name: MRINoObtainReason value: - value_float:  value_int: 
[WARN] unable to get value: sheet_name: MRINoObtainReason value: - value_float:  value_int: 
[WARN] unable to get value: sheet_name: MRINoObtainReason value: - value_float:  value_int: 
[WARN] unable to get value: sheet_name: MRINoObtainReason value: - value_float:  value_int: 
[WARN] unable to get value: sheet_name: MRINoObtainReason value: - value_float:  value_int: 
[WARN] unable to get value: sheet_name: MRINoObtainReason value: - value_float:  valu

In [119]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [120]:
COMBINE_harmonizer.column_info(df)

(0/132) center: (364/0)
(1/132) center.orig: (364/0)
(2/132) subjectID: (364/0)
(3/132) subjectID.orig: (364/0)
(4/132) uniqueID: (364/0)
(5/132) uniqueID.orig: (364/0)
(6/132) MRIAvailable: (364/0)
(7/132) MRIAvailable.orig: (327/37)
(8/132) MRIAvailable_c: (364/0)
(9/132) MRIAvailable_c.orig: (327/37)
(10/132) MRIObtainWindow: (364/0)
(11/132) MRIObtainWindow.orig: (327/37)
(12/132) MRIObtainWindow_c: (364/0)
(13/132) MRIObtainWindow_c.orig: (200/164)
(14/132) MRIObtainComment: (364/0)
(15/132) MRIObtainComment.orig: (1/363)
(16/132) MRISendRTIDate: (364/0)
(17/132) MRISendRTIDate.orig: (200/164)
(18/132) MRIReceiveRTIDate: (364/0)
(19/132) MRIReceiveRTIDate.orig: (0/364)
(20/132) MRINoObtainReason: (364/0)
(21/132) MRINoObtainReason.orig: (327/37)
(22/132) MRINoObtainReason_c: (364/0)
(23/132) MRINoObtainReason_c.orig: (10/354)
(24/132) MRINoObtainReasonText: (364/0)
(25/132) MRINoObtainReasonText.orig: (5/359)
(26/132) MRI2LevelPatternOfInjury: (364/0)
(27/132) MRI2LevelPatternOfIn

## 02-13-bradycardia

In [121]:
_FLATTEN_IDS = ['bradycardiaEventNumber']
base_filename = '02-13-bradycardia.csv'

In [122]:
filename = os.sep.join([input_dir, base_filename])

df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/13) normalize_value: column: center
(1/13) normalize_value: column: subjectID
(2/13) normalize_value: column: uniqueID
(3/13) normalize_value: column: bradycardiaLess70Greater15min
(4/13) normalize_value: column: bradycardiaEKG
(5/13) normalize_value: column: bradycardiaEKGResultOtherText
(6/13) normalize_value: column: bradycardiaAntiarrhythmiaMedication
(7/13) normalize_value: column: bradycardiaEventNumber
(8/13) normalize_value: column: bradycardiaEKGResult
(9/13) normalize_value: column: bradycardiaDate
(10/13) normalize_value: column: bradycardiaTime
(11/13) normalize_value: column: bradycardiaDuration
(12/13) normalize_value: column: bradycardiaHeartRateMin
flatten_index: flatten_ids: ['bradycardiaEventNumber'] unique_id_map: {np.int64(1): np.int64(1), np.int64(2): np.int64(2), np.int64(3): np.int64(3), np.int64(4): np.int64(4), np.int64(5): np.int64(5), np.int64(6): np.int64(6), np.int64(7): np.int64(7), np.int64(8): np.int64(8), np.int64(9): np.int64(9), np.int64(10): np.in

In [123]:
COMBINE_harmonizer.column_info(df)

(0/27) center: (210/0)
(1/27) center.orig: (210/0)
(2/27) subjectID: (210/0)
(3/27) subjectID.orig: (210/0)
(4/27) uniqueID: (210/0)
(5/27) uniqueID.orig: (210/0)
(6/27) _flatten_index: (210/0)
(7/27) bradycardiaEventNumber: (210/0)
(8/27) bradycardiaEventNumber.orig: (210/0)
(9/27) bradycardiaLess70Greater15min: (210/0)
(10/27) bradycardiaLess70Greater15min.orig: (210/0)
(11/27) bradycardiaEKG: (210/0)
(12/27) bradycardiaEKG.orig: (57/153)
(13/27) bradycardiaEKGResult: (210/0)
(14/27) bradycardiaEKGResult.orig: (3/207)
(15/27) bradycardiaEKGResultOtherText: (210/0)
(16/27) bradycardiaEKGResultOtherText.orig: (1/209)
(17/27) bradycardiaAntiarrhythmiaMedication: (210/0)
(18/27) bradycardiaAntiarrhythmiaMedication.orig: (57/153)
(19/27) bradycardiaDate: (210/0)
(20/27) bradycardiaDate.orig: (57/153)
(21/27) bradycardiaTime: (210/0)
(22/27) bradycardiaTime.orig: (57/153)
(23/27) bradycardiaDuration: (210/0)
(24/27) bradycardiaDuration.orig: (54/156)
(25/27) bradycardiaHeartRateMin: (210/0

## 02-14-adverse-event

In [124]:
_FLATTEN_IDS = ['adverseEventNumber']
base_filename = '02-14-adverse-event.csv'

In [125]:
filename = os.sep.join([input_dir, base_filename])

df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/58) normalize_value: column: center
(1/58) normalize_value: column: subjectID
(2/58) normalize_value: column: uniqueID
(3/58) normalize_value: column: SAECardiacExperienceComment
(4/58) normalize_value: column: SAEMetabolicAcidosisComment
(5/58) normalize_value: column: SAEThrombosisExperienceComment
(6/58) normalize_value: column: SAEBleedingExperienceComment
(7/58) normalize_value: column: SAEAlterationSkinIntegrityComment
(8/58) normalize_value: column: SAEDeathComment
(9/58) normalize_value: column: adverseEventNumber
(10/58) normalize_value: column: SAECardiacExperienceOnsetDate
(11/58) normalize_value: column: SAECardiacExperienceOnsetTime
(12/58) normalize_value: column: SAECardiacExperienceResolveDate
(13/58) normalize_value: column: SAECardiacExperienceResolveTime
(14/58) normalize_value: column: SAECardiacExperienceDueToHypothermia
(15/58) normalize_value: column: SAECardiacExperienceActionTaken
(16/58) normalize_value: column: SAECardiacExperienceOutcome
(17/58) normalize

In [126]:
COMBINE_harmonizer.column_info(df)

(0/117) center: (93/0)
(1/117) center.orig: (93/0)
(2/117) subjectID: (93/0)
(3/117) subjectID.orig: (93/0)
(4/117) uniqueID: (93/0)
(5/117) uniqueID.orig: (93/0)
(6/117) _flatten_index: (93/0)
(7/117) adverseEventNumber: (93/0)
(8/117) adverseEventNumber.orig: (93/0)
(9/117) SAECardiacExperienceOnsetDate: (93/0)
(10/117) SAECardiacExperienceOnsetDate.orig: (12/81)
(11/117) SAECardiacExperienceOnsetTime: (93/0)
(12/117) SAECardiacExperienceOnsetTime.orig: (12/81)
(13/117) SAECardiacExperienceResolveDate: (93/0)
(14/117) SAECardiacExperienceResolveDate.orig: (11/82)
(15/117) SAECardiacExperienceResolveTime: (93/0)
(16/117) SAECardiacExperienceResolveTime.orig: (11/82)
(17/117) SAECardiacExperienceDueToHypothermia: (93/0)
(18/117) SAECardiacExperienceDueToHypothermia.orig: (12/81)
(19/117) SAECardiacExperienceActionTaken: (93/0)
(20/117) SAECardiacExperienceActionTaken.orig: (12/81)
(21/117) SAECardiacExperienceOutcome: (93/0)
(22/117) SAECardiacExperienceOutcome.orig: (12/81)
(23/117) S

## 02-15-violation

In [127]:
_FLATTEN_IDS = ['violationNumber']
base_filename = '02-15-violation.csv'

In [128]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: violationOtherText
(4/12) normalize_value: column: violationOtherCirumstanceText
(5/12) normalize_value: column: violationComment
(6/12) normalize_value: column: violationNumber
(7/12) normalize_value: column: violationDate
(8/12) normalize_value: column: violationNature
(9/12) normalize_value: column: violationTreatmentAssign
(10/12) normalize_value: column: violationTreatmentReceive
(11/12) normalize_value: column: violationCircumstance
flatten_index: flatten_ids: ['violationNumber'] unique_id_map: {np.int64(1): np.int64(1), np.int64(2): np.int64(2), np.int64(3): np.int64(3), np.int64(4): np.int64(4)} the_type: int64


In [129]:
COMBINE_harmonizer.column_info(df)

(0/25) center: (104/0)
(1/25) center.orig: (104/0)
(2/25) subjectID: (104/0)
(3/25) subjectID.orig: (104/0)
(4/25) uniqueID: (104/0)
(5/25) uniqueID.orig: (104/0)
(6/25) _flatten_index: (104/0)
(7/25) violationNumber: (104/0)
(8/25) violationNumber.orig: (104/0)
(9/25) violationDate: (104/0)
(10/25) violationDate.orig: (104/0)
(11/25) violationNature: (104/0)
(12/25) violationNature.orig: (104/0)
(13/25) violationTreatmentAssign: (104/0)
(14/25) violationTreatmentAssign.orig: (0/104)
(15/25) violationTreatmentReceive: (104/0)
(16/25) violationTreatmentReceive.orig: (0/104)
(17/25) violationOtherText: (104/0)
(18/25) violationOtherText.orig: (102/2)
(19/25) violationCircumstance: (104/0)
(20/25) violationCircumstance.orig: (104/0)
(21/25) violationOtherCirumstanceText: (104/0)
(22/25) violationOtherCirumstanceText.orig: (94/10)
(23/25) violationComment: (104/0)
(24/25) violationComment.orig: (51/53)


## 02-16-interrupt

In [130]:
_FLATTEN_IDS = ['interruptNumber']
base_filename = '02-16-interrupt.csv'

In [131]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)


(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: interrupt
(4/12) normalize_value: column: interruptReasonText
(5/12) normalize_value: column: interruptNumber
(6/12) normalize_value: column: interruptReason
(7/12) normalize_value: column: interruptDate
(8/12) normalize_value: column: interruptTime
(9/12) normalize_value: column: interruptRestartDate
(10/12) normalize_value: column: interruptRestartTime
(11/12) normalize_value: column: interruptRestartEsophagealTemperature_C
flatten_index: flatten_ids: ['interruptNumber'] unique_id_map: {np.int64(1): np.int64(1), np.int64(2): np.int64(2)} the_type: int64


In [132]:
COMBINE_harmonizer.column_info(df)

(0/25) center: (249/0)
(1/25) center.orig: (249/0)
(2/25) subjectID: (249/0)
(3/25) subjectID.orig: (249/0)
(4/25) uniqueID: (249/0)
(5/25) uniqueID.orig: (249/0)
(6/25) _flatten_index: (249/0)
(7/25) interruptNumber: (249/0)
(8/25) interruptNumber.orig: (249/0)
(9/25) interrupt: (249/0)
(10/25) interrupt.orig: (249/0)
(11/25) interruptReason: (249/0)
(12/25) interruptReason.orig: (20/229)
(13/25) interruptReasonText: (249/0)
(14/25) interruptReasonText.orig: (16/233)
(15/25) interruptDate: (249/0)
(16/25) interruptDate.orig: (20/229)
(17/25) interruptTime: (249/0)
(18/25) interruptTime.orig: (19/230)
(19/25) interruptRestartDate: (249/0)
(20/25) interruptRestartDate.orig: (14/235)
(21/25) interruptRestartTime: (249/0)
(22/25) interruptRestartTime.orig: (14/235)
(23/25) interruptRestartEsophagealTemperature_C: (249/0)
(24/25) interruptRestartEsophagealTemperature_C.orig: (12/237)


## 02-17-discontinue

In [133]:
base_filename = '02-17-discontinue.csv'

In [134]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/15) normalize_value: column: center
(1/15) normalize_value: column: subjectID
(2/15) normalize_value: column: uniqueID
(3/15) normalize_value: column: discontinueBeforeEndPeriod
(4/15) normalize_value: column: discontinueOtherText
(5/15) normalize_value: column: discontinueDate
(6/15) normalize_value: column: discontinueTime
(7/15) normalize_value: column: discontinueParentsWithdraw
(8/15) normalize_value: column: discontinuePhysicianWithdraw
(9/15) normalize_value: column: discontinueAdverseEvent
(10/15) normalize_value: column: discontinueECMO
(11/15) normalize_value: column: discontinueDNR
(12/15) normalize_value: column: discontinueWdrawSupport
(13/15) normalize_value: column: discontinueDeath
(14/15) normalize_value: column: discontinueOther


In [135]:
COMBINE_harmonizer.column_info(df)

(0/30) center: (364/0)
(1/30) center.orig: (364/0)
(2/30) subjectID: (364/0)
(3/30) subjectID.orig: (364/0)
(4/30) uniqueID: (364/0)
(5/30) uniqueID.orig: (364/0)
(6/30) discontinueDate: (364/0)
(7/30) discontinueDate.orig: (55/309)
(8/30) discontinueTime: (364/0)
(9/30) discontinueTime.orig: (55/309)
(10/30) discontinueBeforeEndPeriod: (364/0)
(11/30) discontinueBeforeEndPeriod.orig: (364/0)
(12/30) discontinueParentsWithdraw: (364/0)
(13/30) discontinueParentsWithdraw.orig: (53/311)
(14/30) discontinuePhysicianWithdraw: (364/0)
(15/30) discontinuePhysicianWithdraw.orig: (53/311)
(16/30) discontinueAdverseEvent: (364/0)
(17/30) discontinueAdverseEvent.orig: (53/311)
(18/30) discontinueECMO: (364/0)
(19/30) discontinueECMO.orig: (53/311)
(20/30) discontinueDNR: (364/0)
(21/30) discontinueDNR.orig: (53/311)
(22/30) discontinueWdrawSupport: (364/0)
(23/30) discontinueWdrawSupport.orig: (53/311)
(24/30) discontinueDeath: (364/0)
(25/30) discontinueDeath.orig: (53/311)
(26/30) discontinueO

## 04-16-wdraw-support

In [136]:
base_filename = '04-16-wdraw-support.csv'

In [137]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/16) normalize_value: column: center
(1/16) normalize_value: column: subjectID
(2/16) normalize_value: column: uniqueID
(3/16) normalize_value: column: wdrawSupportDiscussedWithFamily
(4/16) normalize_value: column: wdrawSupportRecommendSolelyByClinicalTeam
(5/16) normalize_value: column: wdrawSupportNeurologicalExam
(6/16) normalize_value: column: wdrawSupportImagingStudy
(7/16) normalize_value: column: wdrawSupportEEGFinding
(8/16) normalize_value: column: wdrawSupportMultisystemOrganFailureOtherThanCNS
(9/16) normalize_value: column: wdrawSupportBrainBloodFlowScan
(10/16) normalize_value: column: wdrawSupportParentWish
(11/16) normalize_value: column: wdrawSupportOther
(12/16) normalize_value: column: wdrawSupportOtherText
(13/16) normalize_value: column: wdrawSupport
(14/16) normalize_value: column: wdrawSupportDate
(15/16) normalize_value: column: wdrawSupportTime


In [138]:
COMBINE_harmonizer.column_info(df)

(0/32) center: (364/0)
(1/32) center.orig: (364/0)
(2/32) subjectID: (364/0)
(3/32) subjectID.orig: (364/0)
(4/32) uniqueID: (364/0)
(5/32) uniqueID.orig: (364/0)
(6/32) wdrawSupport: (364/0)
(7/32) wdrawSupport.orig: (59/305)
(8/32) wdrawSupportDate: (364/0)
(9/32) wdrawSupportDate.orig: (44/320)
(10/32) wdrawSupportTime: (364/0)
(11/32) wdrawSupportTime.orig: (43/321)
(12/32) wdrawSupportDiscussedWithFamily: (364/0)
(13/32) wdrawSupportDiscussedWithFamily.orig: (364/0)
(14/32) wdrawSupportRecommendSolelyByClinicalTeam: (364/0)
(15/32) wdrawSupportRecommendSolelyByClinicalTeam.orig: (59/305)
(16/32) wdrawSupportNeurologicalExam: (364/0)
(17/32) wdrawSupportNeurologicalExam.orig: (59/305)
(18/32) wdrawSupportImagingStudy: (364/0)
(19/32) wdrawSupportImagingStudy.orig: (59/305)
(20/32) wdrawSupportEEGFinding: (364/0)
(21/32) wdrawSupportEEGFinding.orig: (59/305)
(22/32) wdrawSupportMultisystemOrganFailureOtherThanCNS: (364/0)
(23/32) wdrawSupportMultisystemOrganFailureOtherThanCNS.orig:

## 04-17-limit-care

In [139]:
base_filename = '04-17-limit-care.csv'

In [140]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/14) normalize_value: column: center


(1/14) normalize_value: column: subjectID
(2/14) normalize_value: column: uniqueID
(3/14) normalize_value: column: limitCareDiscussedWithFamily
(4/14) normalize_value: column: limitCareRecommendSolelyByClinicalTeam
(5/14) normalize_value: column: limitCareAgreedByFamilyAndCareTeam
(6/14) normalize_value: column: limitCareNoFurtherMechanicalVentilationAndIntubation
(7/14) normalize_value: column: limitCareNoFurtherVentilationWithBagAndMask
(8/14) normalize_value: column: limitCareNoFurtherMedicationsToSupportBP
(9/14) normalize_value: column: limitCareNoFurtherChestCompression
(10/14) normalize_value: column: limitCareNoFurtherEmergencyMedication
(11/14) normalize_value: column: limitCareDNR
(12/14) normalize_value: column: limitCareDNRDate
(13/14) normalize_value: column: limitCareDNRTime


In [141]:
COMBINE_harmonizer.column_info(df)

(0/28) center: (364/0)
(1/28) center.orig: (364/0)
(2/28) subjectID: (364/0)
(3/28) subjectID.orig: (364/0)
(4/28) uniqueID: (364/0)
(5/28) uniqueID.orig: (364/0)
(6/28) limitCareDiscussedWithFamily: (364/0)
(7/28) limitCareDiscussedWithFamily.orig: (364/0)
(8/28) limitCareRecommendSolelyByClinicalTeam: (364/0)
(9/28) limitCareRecommendSolelyByClinicalTeam.orig: (45/319)
(10/28) limitCareAgreedByFamilyAndCareTeam: (364/0)
(11/28) limitCareAgreedByFamilyAndCareTeam.orig: (45/319)
(12/28) limitCareNoFurtherMechanicalVentilationAndIntubation: (364/0)
(13/28) limitCareNoFurtherMechanicalVentilationAndIntubation.orig: (40/324)
(14/28) limitCareNoFurtherVentilationWithBagAndMask: (364/0)
(15/28) limitCareNoFurtherVentilationWithBagAndMask.orig: (40/324)
(16/28) limitCareNoFurtherMedicationsToSupportBP: (364/0)
(17/28) limitCareNoFurtherMedicationsToSupportBP.orig: (40/324)
(18/28) limitCareNoFurtherChestCompression: (364/0)
(19/28) limitCareNoFurtherChestCompression.orig: (40/324)
(20/28) li

## 04-01-status

In [142]:
base_filename = '04-01-status.csv'

In [143]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/31) normalize_value: column: center
(1/31) normalize_value: column: subjectID
(2/31) normalize_value: column: uniqueID
(3/31) normalize_value: column: homeTherapyStatus
(4/31) normalize_value: column: homeTherapyVentilator
(5/31) normalize_value: column: homeTherapyOxygen
(6/31) normalize_value: column: homeTherapyGavageTubeFeed
(7/31) normalize_value: column: homeTherapyGastrostomyTubeFeed
(8/31) normalize_value: column: homeTherapyTemperatureBlanket
(9/31) normalize_value: column: homeTherapyAnticonvulsantMedication
(10/31) normalize_value: column: homeTherapyOther
(11/31) normalize_value: column: homeTherapyOtherText
(12/31) normalize_value: column: deathAutopsy
(13/31) normalize_value: column: deathCauseText
(14/31) normalize_value: column: status
(15/31) normalize_value: column: statusDate
(16/31) normalize_value: column: dischargeWeight_g
(17/31) normalize_value: column: dischargeLength_cm
(18/31) normalize_value: column: dischargeHeadCircumference_cm
(19/31) normalize_value: 

In [144]:
COMBINE_harmonizer.column_info(df)

(0/62) center: (364/0)
(1/62) center.orig: (364/0)
(2/62) subjectID: (364/0)
(3/62) subjectID.orig: (364/0)
(4/62) uniqueID: (364/0)
(5/62) uniqueID.orig: (364/0)
(6/62) status: (364/0)
(7/62) status.orig: (364/0)
(8/62) statusDate: (364/0)
(9/62) statusDate.orig: (364/0)
(10/62) dischargeDate: (364/0)
(11/62) dischargeDate.orig: (290/74)
(12/62) dischargeWeight_g: (364/0)
(13/62) dischargeWeight_g.orig: (335/29)
(14/62) dischargeLength_cm: (364/0)
(15/62) dischargeLength_cm.orig: (316/48)
(16/62) dischargeHeadCircumference_cm: (364/0)
(17/62) dischargeHeadCircumference_cm.orig: (318/46)
(18/62) transferReason: (364/0)
(19/62) transferReason.orig: (26/338)
(20/62) transferDate: (364/0)
(21/62) transferDate.orig: (23/341)
(22/62) transferWeight_g: (364/0)
(23/62) transferWeight_g.orig: (24/340)
(24/62) transferLength_cm: (364/0)
(25/62) transferLength_cm.orig: (21/343)
(26/62) transferHeadCircumference_cm: (364/0)
(27/62) transferHeadCircumference_cm.orig: (22/342)
(28/62) transferOutco

## 04-12-neuro-exam

In [145]:
base_filename = '04-12-neuro-exam.csv'

In [146]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/22) normalize_value: column: center
(1/22) normalize_value: column: subjectID
(2/22) normalize_value: column: uniqueID
(3/22) normalize_value: column: dischargeNeuroExamTone
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIETone value: ** value_float: ** value_int: **
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIETone value: ** value_float: ** value_int: **
(4/22) normalize_value: column: dischargeNeuroExamRespiration
(5/22) normalize_value: column: dischargeNeuroExamSeizure
(6/22) normalize_value: column: dischargeNeuroExamSedate
(7/22) normalize_value: column: dischargeNeuroExamClonusSustained
(8/22) normalize_value: column: dischargeNeuroExamFistedHand
(9/22) normalize_value: column: dischargeNeuroExamAbnormalMovement
(10/22) normalize_value: column: dischargeNeuroExamGagReflexAbsent
(11/22) normalize

In [147]:
COMBINE_harmonizer.column_info(df)

(0/44) center: (364/0)
(1/44) center.orig: (364/0)
(2/44) subjectID: (364/0)
(3/44) subjectID.orig: (364/0)
(4/44) uniqueID: (364/0)
(5/44) uniqueID.orig: (364/0)
(6/44) dischargeNeuroExamStatus: (364/0)
(7/44) dischargeNeuroExamStatus.orig: (331/33)
(8/44) dischargeNeuroExamDate: (364/0)
(9/44) dischargeNeuroExamDate.orig: (333/31)
(10/44) dischargeNeuroExamTime: (364/0)
(11/44) dischargeNeuroExamTime.orig: (326/38)
(12/44) dischargeNeuroExamLevelConsciousness: (364/0)
(13/44) dischargeNeuroExamLevelConsciousness.orig: (333/31)
(14/44) dischargeNeuroExamSpontaneousActivity: (364/0)
(15/44) dischargeNeuroExamSpontaneousActivity.orig: (332/32)
(16/44) dischargeNeuroExamPosture: (364/0)
(17/44) dischargeNeuroExamPosture.orig: (332/32)
(18/44) dischargeNeuroExamTone: (364/0)
(19/44) dischargeNeuroExamTone.orig: (333/31)
(20/44) dischargeNeuroExamSuck: (364/0)
(21/44) dischargeNeuroExamSuck.orig: (332/32)
(22/44) dischargeNeuroExamMoro: (364/0)
(23/44) dischargeNeuroExamMoro.orig: (330/34)

## 04-02-cardiovascular

In [148]:
base_filename = '04-02-cardiovascular.csv'

In [149]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/10) normalize_value: column: center
(1/10) normalize_value: column: subjectID
(2/10) normalize_value: column: uniqueID
(3/10) normalize_value: column: dischargeCardiomegaly
(4/10) normalize_value: column: dischargeCardiacFailure
(5/10) normalize_value: column: dischargeCardiacDysfunctionByEcho
(6/10) normalize_value: column: dischargeCardiacIschemiaByEKG
(7/10) normalize_value: column: dischargeHypotension
(8/10) normalize_value: column: dischargeArrhythmia
(9/10) normalize_value: column: dischargeInotropicAgent


In [150]:
COMBINE_harmonizer.column_info(df)

(0/20) center: (364/0)
(1/20) center.orig: (364/0)
(2/20) subjectID: (364/0)
(3/20) subjectID.orig: (364/0)
(4/20) uniqueID: (364/0)
(5/20) uniqueID.orig: (364/0)
(6/20) dischargeCardiomegaly: (364/0)
(7/20) dischargeCardiomegaly.orig: (364/0)
(8/20) dischargeCardiacFailure: (364/0)
(9/20) dischargeCardiacFailure.orig: (364/0)
(10/20) dischargeCardiacDysfunctionByEcho: (364/0)
(11/20) dischargeCardiacDysfunctionByEcho.orig: (364/0)
(12/20) dischargeCardiacIschemiaByEKG: (364/0)
(13/20) dischargeCardiacIschemiaByEKG.orig: (364/0)
(14/20) dischargeHypotension: (364/0)
(15/20) dischargeHypotension.orig: (364/0)
(16/20) dischargeArrhythmia: (364/0)
(17/20) dischargeArrhythmia.orig: (364/0)
(18/20) dischargeInotropicAgent: (364/0)
(19/20) dischargeInotropicAgent.orig: (364/0)


## 04-03-respiratory

In [151]:
base_filename = '04-03-respiratory.csv'

In [152]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/13) normalize_value: column: center
(1/13) normalize_value: column: subjectID
(2/13) normalize_value: column: uniqueID
(3/13) normalize_value: column: dischargeMeconiumAspirationSyndrome
(4/13) normalize_value: column: dischargePPHN


(5/13) normalize_value: column: dischargePulmonaryHemorrhage
(6/13) normalize_value: column: dischargePenumonia
(7/13) normalize_value: column: dischargeChronicLungDisease
(8/13) normalize_value: column: dischargeECMO
(9/13) normalize_value: column: dischargeINO
(10/13) normalize_value: column: dischargeVentilator_day
(11/13) normalize_value: column: dischargeOxygen_day
(12/13) normalize_value: column: dischargeCPAP_day


In [153]:
COMBINE_harmonizer.column_info(df)

(0/26) center: (364/0)
(1/26) center.orig: (364/0)
(2/26) subjectID: (364/0)
(3/26) subjectID.orig: (364/0)
(4/26) uniqueID: (364/0)
(5/26) uniqueID.orig: (364/0)
(6/26) dischargeMeconiumAspirationSyndrome: (364/0)
(7/26) dischargeMeconiumAspirationSyndrome.orig: (364/0)
(8/26) dischargePPHN: (364/0)
(9/26) dischargePPHN.orig: (364/0)
(10/26) dischargePulmonaryHemorrhage: (364/0)
(11/26) dischargePulmonaryHemorrhage.orig: (364/0)
(12/26) dischargePenumonia: (364/0)
(13/26) dischargePenumonia.orig: (364/0)
(14/26) dischargeChronicLungDisease: (364/0)
(15/26) dischargeChronicLungDisease.orig: (364/0)
(16/26) dischargeECMO: (364/0)
(17/26) dischargeECMO.orig: (364/0)
(18/26) dischargeINO: (364/0)
(19/26) dischargeINO.orig: (364/0)
(20/26) dischargeVentilator_day: (364/0)
(21/26) dischargeVentilator_day.orig: (364/0)
(22/26) dischargeOxygen_day: (364/0)
(23/26) dischargeOxygen_day.orig: (364/0)
(24/26) dischargeCPAP_day: (364/0)
(25/26) dischargeCPAP_day.orig: (364/0)


## 04-04-hematology

In [154]:
base_filename = '04-04-hematology.csv'

In [155]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/4) normalize_value: column: center
(1/4) normalize_value: column: subjectID
(2/4) normalize_value: column: uniqueID
(3/4) normalize_value: column: dischargeDIC


In [156]:
COMBINE_harmonizer.column_info(df)

(0/8) center: (364/0)
(1/8) center.orig: (364/0)
(2/8) subjectID: (364/0)
(3/8) subjectID.orig: (364/0)
(4/8) uniqueID: (364/0)
(5/8) uniqueID.orig: (364/0)
(6/8) dischargeDIC: (364/0)
(7/8) dischargeDIC.orig: (364/0)


## 04-05-metabolic

In [157]:
base_filename = '04-05-metabolic.csv'

In [158]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/6) normalize_value: column: center
(1/6) normalize_value: column: subjectID
(2/6) normalize_value: column: uniqueID
(3/6) normalize_value: column: dischargeHypoglycemia
(4/6) normalize_value: column: dischargeHypocalcemia
(5/6) normalize_value: column: dischargeHypomagnesemia


In [159]:
COMBINE_harmonizer.column_info(df)

(0/12) center: (364/0)
(1/12) center.orig: (364/0)
(2/12) subjectID: (364/0)
(3/12) subjectID.orig: (364/0)
(4/12) uniqueID: (364/0)
(5/12) uniqueID.orig: (364/0)
(6/12) dischargeHypoglycemia: (364/0)
(7/12) dischargeHypoglycemia.orig: (364/0)
(8/12) dischargeHypocalcemia: (364/0)
(9/12) dischargeHypocalcemia.orig: (364/0)
(10/12) dischargeHypomagnesemia: (364/0)
(11/12) dischargeHypomagnesemia.orig: (364/0)


## 04-06-renal

In [160]:
base_filename = '04-06-renal.csv'

In [161]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/6) normalize_value: column: center
(1/6) normalize_value: column: subjectID
(2/6) normalize_value: column: uniqueID
(3/6) normalize_value: column: dischargeOliguria
(4/6) normalize_value: column: dischargeAnuria
(5/6) normalize_value: column: dischargeDialysis


In [162]:
COMBINE_harmonizer.column_info(df)

(0/12) center: (364/0)
(1/12) center.orig: (364/0)
(2/12) subjectID: (364/0)
(3/12) subjectID.orig: (364/0)
(4/12) uniqueID: (364/0)
(5/12) uniqueID.orig: (364/0)
(6/12) dischargeOliguria: (364/0)
(7/12) dischargeOliguria.orig: (364/0)
(8/12) dischargeAnuria: (364/0)
(9/12) dischargeAnuria.orig: (364/0)
(10/12) dischargeDialysis: (364/0)
(11/12) dischargeDialysis.orig: (364/0)


## 04-07-gastrointestinal

In [163]:
base_filename = '04-07-gastrointestinal.csv'

In [164]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/9) normalize_value: column: center
(1/9) normalize_value: column: subjectID
(2/9) normalize_value: column: uniqueID
(3/9) normalize_value: column: dischargeFullNippleFeed
(4/9) normalize_value: column: dischargeNEC
(5/9) normalize_value: column: dischargeHepaticDysfunction
(6/9) normalize_value: column: dischargeEnteralFeedStart_day
(7/9) normalize_value: column: dischargeTubeFeedingDuration_day
(8/9) normalize_value: column: dischargeFullNippleFeed_day


In [165]:
COMBINE_harmonizer.column_info(df)

(0/18) center: (364/0)
(1/18) center.orig: (364/0)
(2/18) subjectID: (364/0)
(3/18) subjectID.orig: (364/0)
(4/18) uniqueID: (364/0)
(5/18) uniqueID.orig: (364/0)
(6/18) dischargeEnteralFeedStart_day: (364/0)
(7/18) dischargeEnteralFeedStart_day.orig: (337/27)
(8/18) dischargeTubeFeedingDuration_day: (364/0)
(9/18) dischargeTubeFeedingDuration_day.orig: (343/21)
(10/18) dischargeFullNippleFeed: (364/0)
(11/18) dischargeFullNippleFeed.orig: (364/0)
(12/18) dischargeFullNippleFeed_day: (364/0)
(13/18) dischargeFullNippleFeed_day.orig: (237/127)
(14/18) dischargeNEC: (364/0)
(15/18) dischargeNEC.orig: (364/0)
(16/18) dischargeHepaticDysfunction: (364/0)
(17/18) dischargeHepaticDysfunction.orig: (364/0)


## 04-08-skin

In [166]:
base_filename = '04-08-skin.csv'

In [167]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/16) normalize_value: column: center
(1/16) normalize_value: column: subjectID
(2/16) normalize_value: column: uniqueID
(3/16) normalize_value: column: dischargeAlteredSkinItegrityPostIntervention
(4/16) normalize_value: column: dischargeErythema
(5/16) normalize_value: column: dischargeSclerema
(6/16) normalize_value: column: dischargeCyanosis
(7/16) normalize_value: column: dischargeSubFatNecrosis
(8/16) normalize_value: column: dischargeErythemaOnsetDate
(9/16) normalize_value: column: dischargeErythemaResolveDate
(10/16) normalize_value: column: dischargeScleremaOnsetDate
(11/16) normalize_value: column: dischargeScleremaResolveDate
(12/16) normalize_value: column: dischargeCyanosisOnsetDate
(13/16) normalize_value: column: dischargeCyanosisResolveDate
(14/16) normalize_value: column: dischargeSubFatNecrosisOnsetDate
(15/16) normalize_value: column: dischargeSubFatNecrosisResolveDate


In [168]:
COMBINE_harmonizer.column_info(df)

(0/32) center: (364/0)
(1/32) center.orig: (364/0)
(2/32) subjectID: (364/0)
(3/32) subjectID.orig: (364/0)
(4/32) uniqueID: (364/0)
(5/32) uniqueID.orig: (364/0)
(6/32) dischargeAlteredSkinItegrityPostIntervention: (364/0)
(7/32) dischargeAlteredSkinItegrityPostIntervention.orig: (364/0)
(8/32) dischargeErythema: (364/0)
(9/32) dischargeErythema.orig: (28/336)
(10/32) dischargeErythemaOnsetDate: (364/0)
(11/32) dischargeErythemaOnsetDate.orig: (13/351)
(12/32) dischargeErythemaResolveDate: (364/0)
(13/32) dischargeErythemaResolveDate.orig: (10/354)
(14/32) dischargeSclerema: (364/0)
(15/32) dischargeSclerema.orig: (28/336)
(16/32) dischargeScleremaOnsetDate: (364/0)
(17/32) dischargeScleremaOnsetDate.orig: (0/364)
(18/32) dischargeScleremaResolveDate: (364/0)
(19/32) dischargeScleremaResolveDate.orig: (0/364)
(20/32) dischargeCyanosis: (364/0)
(21/32) dischargeCyanosis.orig: (28/336)
(22/32) dischargeCyanosisOnsetDate: (364/0)
(23/32) dischargeCyanosisOnsetDate.orig: (1/363)
(24/32) d

## 04-09-auditory

In [169]:
base_filename = '04-09-auditory.csv'

In [170]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/5) normalize_value: column: center
(1/5) normalize_value: column: subjectID
(2/5) normalize_value: column: uniqueID
(3/5) normalize_value: column: dischargeHearingTest
(4/5) normalize_value: column: dischargeHearingTestNormal


In [171]:
COMBINE_harmonizer.column_info(df)

(0/10) center: (364/0)
(1/10) center.orig: (364/0)
(2/10) subjectID: (364/0)
(3/10) subjectID.orig: (364/0)
(4/10) uniqueID: (364/0)
(5/10) uniqueID.orig: (364/0)
(6/10) dischargeHearingTest: (364/0)
(7/10) dischargeHearingTest.orig: (364/0)
(8/10) dischargeHearingTestNormal: (364/0)
(9/10) dischargeHearingTestNormal.orig: (290/74)


## 04-10. Surgery

In [172]:
base_filename = '04-10-surgery.csv'

In [173]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/7) normalize_value: column: center
(1/7) normalize_value: column: subjectID
(2/7) normalize_value: column: uniqueID
(3/7) normalize_value: column: dischargeMajorSurgery
(4/7) normalize_value: column: dischargeSurgeryCode1
(5/7) normalize_value: column: dischargeSurgeryCode2
(6/7) normalize_value: column: dischargeSurgeryCode3


In [174]:
COMBINE_harmonizer.column_info(df)

(0/14) center: (364/0)
(1/14) center.orig: (364/0)
(2/14) subjectID: (364/0)
(3/14) subjectID.orig: (364/0)
(4/14) uniqueID: (364/0)
(5/14) uniqueID.orig: (364/0)
(6/14) dischargeMajorSurgery: (364/0)
(7/14) dischargeMajorSurgery.orig: (364/0)
(8/14) dischargeSurgeryCode1: (364/0)
(9/14) dischargeSurgeryCode1.orig: (42/322)
(10/14) dischargeSurgeryCode2: (364/0)
(11/14) dischargeSurgeryCode2.orig: (12/352)
(12/14) dischargeSurgeryCode3: (364/0)
(13/14) dischargeSurgeryCode3.orig: (5/359)


## 04-11-infection

In [175]:
base_filename = '04-11-infection.csv'

In [176]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/11) normalize_value: column: center
(1/11) normalize_value: column: subjectID
(2/11) normalize_value: column: uniqueID
(3/11) normalize_value: column: dischargeSepticemia
(4/11) normalize_value: column: dischargeMeningitisEncephalitis
(5/11) normalize_value: column: dischargeSepticemiaOrganismCode1
(6/11) normalize_value: column: dischargeSepticemiaOrganismCode2
(7/11) normalize_value: column: dischargeSepticemiaOrganismCode3
(8/11) normalize_value: column: dischargeMeningitisOrganismCode1
(9/11) normalize_value: column: dischargeMeningitisOrganismCode2
(10/11) normalize_value: column: dischargeMeningitisOrganismCode3


In [177]:
COMBINE_harmonizer.column_info(df)

(0/22) center: (364/0)
(1/22) center.orig: (364/0)
(2/22) subjectID: (364/0)
(3/22) subjectID.orig: (364/0)
(4/22) uniqueID: (364/0)
(5/22) uniqueID.orig: (364/0)
(6/22) dischargeSepticemia: (364/0)
(7/22) dischargeSepticemia.orig: (364/0)
(8/22) dischargeSepticemiaOrganismCode1: (364/0)
(9/22) dischargeSepticemiaOrganismCode1.orig: (13/351)
(10/22) dischargeSepticemiaOrganismCode2: (364/0)
(11/22) dischargeSepticemiaOrganismCode2.orig: (1/363)
(12/22) dischargeSepticemiaOrganismCode3: (364/0)
(13/22) dischargeSepticemiaOrganismCode3.orig: (0/364)
(14/22) dischargeMeningitisEncephalitis: (364/0)
(15/22) dischargeMeningitisEncephalitis.orig: (364/0)
(16/22) dischargeMeningitisOrganismCode1: (364/0)
(17/22) dischargeMeningitisOrganismCode1.orig: (0/364)
(18/22) dischargeMeningitisOrganismCode2: (364/0)
(19/22) dischargeMeningitisOrganismCode2.orig: (0/364)
(20/22) dischargeMeningitisOrganismCode3: (364/0)
(21/22) dischargeMeningitisOrganismCode3.orig: (0/364)


## 04-13-seizure

In [178]:
base_filename = '04-13-seizure.csv'

In [179]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/18) normalize_value: column: center
(1/18) normalize_value: column: subjectID
(2/18) normalize_value: column: uniqueID
(3/18) normalize_value: column: dischargeSeizure
(4/18) normalize_value: column: dischargeSeizurePreIntervention
(5/18) normalize_value: column: dischargeSeizureAfterBaseline
(6/18) normalize_value: column: dischargeSeizureMaintenance
(7/18) normalize_value: column: dischargeSeizureRewarming
(8/18) normalize_value: column: dischargeSeizurePostIntervention
(9/18) normalize_value: column: dischargeEEG
(10/18) normalize_value: column: dischargeEEGFindingConsistentWithSeizure
(11/18) normalize_value: column: dischargeEEGAbnormalBackgroundActivity
(12/18) normalize_value: column: dischargeAnticonvulsantsGreater72H
(13/18) normalize_value: column: dischargeEEGFindingConsistentWithSeizureDate
(14/18) normalize_value: column: dischargeEEGFindingConsistentWithSeizureTime
(15/18) normalize_value: column: dischargeEEGAbnormalBackgroundActivityDate
(16/18) normalize_value: colu

In [180]:
COMBINE_harmonizer.column_info(df)

(0/36) center: (364/0)
(1/36) center.orig: (364/0)
(2/36) subjectID: (364/0)
(3/36) subjectID.orig: (364/0)
(4/36) uniqueID: (364/0)
(5/36) uniqueID.orig: (364/0)
(6/36) dischargeSeizure: (364/0)
(7/36) dischargeSeizure.orig: (364/0)
(8/36) dischargeSeizurePreIntervention: (364/0)
(9/36) dischargeSeizurePreIntervention.orig: (177/187)
(10/36) dischargeSeizureAfterBaseline: (364/0)
(11/36) dischargeSeizureAfterBaseline.orig: (177/187)
(12/36) dischargeSeizureMaintenance: (364/0)
(13/36) dischargeSeizureMaintenance.orig: (177/187)
(14/36) dischargeSeizureRewarming: (364/0)
(15/36) dischargeSeizureRewarming.orig: (177/187)
(16/36) dischargeSeizurePostIntervention: (364/0)
(17/36) dischargeSeizurePostIntervention.orig: (177/187)
(18/36) dischargeEEG: (364/0)
(19/36) dischargeEEG.orig: (364/0)
(20/36) dischargeEEGFindingConsistentWithSeizure: (364/0)
(21/36) dischargeEEGFindingConsistentWithSeizure.orig: (263/101)
(22/36) dischargeEEGFindingConsistentWithSeizureDate: (364/0)
(23/36) dischar

## 04-14-birth-defect

In [181]:
base_filename = '04-14-birth-defect.csv'

In [182]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/7) normalize_value: column: center
(1/7) normalize_value: column: subjectID
(2/7) normalize_value: column: uniqueID
(3/7) normalize_value: column: dischargeSyndromeMalformation
(4/7) normalize_value: column: dischargeBirthDefectCode1
(5/7) normalize_value: column: dischargeBirthDefectCode2
(6/7) normalize_value: column: dischargeBirthDefectCode3


In [183]:
COMBINE_harmonizer.column_info(df)

(0/14) center: (364/0)
(1/14) center.orig: (364/0)
(2/14) subjectID: (364/0)
(3/14) subjectID.orig: (364/0)
(4/14) uniqueID: (364/0)
(5/14) uniqueID.orig: (364/0)
(6/14) dischargeSyndromeMalformation: (364/0)
(7/14) dischargeSyndromeMalformation.orig: (364/0)
(8/14) dischargeBirthDefectCode1: (364/0)
(9/14) dischargeBirthDefectCode1.orig: (11/353)
(10/14) dischargeBirthDefectCode2: (364/0)
(11/14) dischargeBirthDefectCode2.orig: (1/363)
(12/14) dischargeBirthDefectCode3: (364/0)
(13/14) dischargeBirthDefectCode3.orig: (0/364)


## 04-15-home-therapy

In [184]:
base_filename = '04-15-home-therapy.csv'

In [185]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: dischargeHomeTherapy
(4/12) normalize_value: column: dischargeHomeTherapyVentilator
(5/12) normalize_value: column: dischargeHomeTherapyOxygen
(6/12) normalize_value: column: dischargeHomeTherapyGavageTubeFeed
(7/12) normalize_value: column: dischargeHomeTherapyGastrostomyTubeFeed
(8/12) normalize_value: column: dischargeHomeTherapyTemperatureBlanket
(9/12) normalize_value: column: dischargeHomeTherapyAnticonvulsantMedication
(10/12) normalize_value: column: dischargeHomeTherapyOther
(11/12) normalize_value: column: dischargeHomeTherapyOtherText


In [186]:
COMBINE_harmonizer.column_info(df)

(0/24) center: (364/0)
(1/24) center.orig: (364/0)
(2/24) subjectID: (364/0)
(3/24) subjectID.orig: (364/0)
(4/24) uniqueID: (364/0)
(5/24) uniqueID.orig: (364/0)
(6/24) dischargeHomeTherapy: (364/0)
(7/24) dischargeHomeTherapy.orig: (314/50)
(8/24) dischargeHomeTherapyVentilator: (364/0)
(9/24) dischargeHomeTherapyVentilator.orig: (118/246)
(10/24) dischargeHomeTherapyOxygen: (364/0)
(11/24) dischargeHomeTherapyOxygen.orig: (118/246)
(12/24) dischargeHomeTherapyGavageTubeFeed: (364/0)
(13/24) dischargeHomeTherapyGavageTubeFeed.orig: (118/246)
(14/24) dischargeHomeTherapyGastrostomyTubeFeed: (364/0)
(15/24) dischargeHomeTherapyGastrostomyTubeFeed.orig: (118/246)
(16/24) dischargeHomeTherapyTemperatureBlanket: (364/0)
(17/24) dischargeHomeTherapyTemperatureBlanket.orig: (118/246)
(18/24) dischargeHomeTherapyAnticonvulsantMedication: (364/0)
(19/24) dischargeHomeTherapyAnticonvulsantMedication.orig: (118/246)
(20/24) dischargeHomeTherapyOther: (364/0)
(21/24) dischargeHomeTherapyOther.or