In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import os
import re
import pydoc

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 00-0. Variables

In [2]:
study_name = COMBINE_harmonizer.STUDY_LH
sheet_name = COMBINE_harmonizer.SHEET_MAIN

root_dir = '..'
COMBINE_harmonizer.init(f'{root_dir}/config.yaml')


In [3]:
input_dir = f"{cfg.config['out_dir']}/out-{study_name}"
data_dict_filename = f"{root_dir}/{COMBINE_harmonizer.DATA_DICTIONARY_EXCEL}"
out_dir = f"{cfg.config['out_dir']}/out-{study_name}-normalized"

os.makedirs(out_dir, exist_ok=True)


In [4]:
COMBINE_harmonizer.init_mapping(data_dict_filename, study_name)
_VALUE_MAP = COMBINE_harmonizer.build_value_map(data_dict_filename, sheet_name)

[INFO] init_mapping (0/125): signOfHIETone
[INFO] init_mapping (1/125): signOfHIELvlOfCons
[INFO] init_mapping (2/125): signOfHIEPosture
[INFO] init_mapping (3/125): signOfHIEMoro
[INFO] init_mapping (4/125): signOfHIESuck
[INFO] init_mapping (5/125): signOfHIERespiratory
[INFO] init_mapping (6/125): signOfHIEHeartRate
[INFO] init_mapping (7/125): signOfHIEPupils
[INFO] init_mapping (8/125): signOfHIESpontaneousActivity
[INFO] init_mapping (9/125): noNeuroExamReason
[INFO] init_mapping (10/125): consentStatus
[INFO] init_mapping (11/125): treatmentAssign
[INFO] init_mapping (12/125): targetTreatmentTemperature
[INFO] init_mapping (13/125): blanketType
[INFO] init_mapping (14/125): encephalopathyLevel
[INFO] init_mapping (15/125): infantAge
[INFO] init_mapping (16/125): infantSex
[INFO] init_mapping (17/125): ethnicity
[INFO] init_mapping (18/125): education
[INFO] init_mapping (19/125): insurance
[INFO] init_mapping (20/125): race
[INFO] init_mapping (21/125): maritalStatus
[INFO] init

build_value_map: (0/1108) variable: center type: center
build_value_map: (1/1108) variable: subjectID type: text
build_value_map: (2/1108) variable: siteID type: text
build_value_map: (3/1108) variable: birthDate type: date
build_value_map: (4/1108) variable: birthNumber type: int
build_value_map: (5/1108) variable: screenComment type: text
build_value_map: (6/1108) variable: coreTempLess32p5CGreaterEq2Hr_e type: bool
build_value_map: (7/1108) variable: coreTempLess33p5CGreater1Hr_e type: bool
build_value_map: (8/1108) variable: coreTempLess34CGreater1Hr_e type: bool
build_value_map: (9/1108) variable: first6HrCoolByClinicalProtocol_e type: bool
build_value_map: (10/1108) variable: chromosomalAbnormality_e type: bool
build_value_map: (11/1108) variable: majorCongenitalAnomaly_e type: bool
build_value_map: (12/1108) variable: birthWeightLessEq1800g_e type: bool
build_value_map: (13/1108) variable: infantUnlikelySurvive_e type: bool
build_value_map: (14/1108) variable: first60MinAllBlood

In [5]:
_ORDER_MAP = COMBINE_harmonizer.build_variable_order_map(data_dict_filename, sheet_name)

## 00-screening

In [6]:
base_filename = '00-02-screening.csv'

In [7]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/28) normalize_value: column: center
(1/28) normalize_value: column: subjectID
(2/28) normalize_value: column: uniqueID
(3/28) normalize_value: column: siteID
(4/28) normalize_value: column: birthDate
(5/28) normalize_value: column: birthNumber
(6/28) normalize_value: column: coreTempLess34CGreater1Hr_e
(7/28) normalize_value: column: first6HrCoolByClinicalProtocol_e
(8/28) normalize_value: column: chromosomalAbnormality_e
(9/28) normalize_value: column: majorCongenitalAnomaly_e
(10/28) normalize_value: column: birthWeightLessEq1800g_e
(11/28) normalize_value: column: infantUnlikelySurvive_e
(12/28) normalize_value: column: first60MinAllBloodGasPHGreater7p15BaseDeficitLess10mEqPerL_e
(13/28) normalize_value: column: postnatalAgeLess6HrOrGreater24Hr_e
(14/28) normalize_value: column: enrolledConflictingTrial_e
(15/28) normalize_value: column: first60MinAnyBloodGasPHLessEq7_i
(16/28) normalize_value: column: first60MinAnyBloodGasBaseDeficitGreaterEq16mEqPerL_i
(17/28) normalize_value: 

In [8]:
COMBINE_harmonizer.column_info(df)

(0/56) center: (3088/0)
(1/56) center.orig: (3088/0)
(2/56) subjectID: (3088/0)
(3/56) subjectID.orig: (3088/0)
(4/56) uniqueID: (3088/0)
(5/56) uniqueID.orig: (3088/0)
(6/56) siteID: (3088/0)
(7/56) siteID.orig: (3088/0)
(8/56) birthNumber: (3088/0)
(9/56) birthNumber.orig: (3087/1)
(10/56) coreTempLess34CGreater1Hr_e: (3088/0)
(11/56) coreTempLess34CGreater1Hr_e.orig: (3088/0)
(12/56) first6HrCoolByClinicalProtocol_e: (3088/0)
(13/56) first6HrCoolByClinicalProtocol_e.orig: (1295/1793)
(14/56) chromosomalAbnormality_e: (3088/0)
(15/56) chromosomalAbnormality_e.orig: (3088/0)
(16/56) majorCongenitalAnomaly_e: (3088/0)
(17/56) majorCongenitalAnomaly_e.orig: (3088/0)
(18/56) birthWeightLessEq1800g_e: (3088/0)
(19/56) birthWeightLessEq1800g_e.orig: (3088/0)
(20/56) infantUnlikelySurvive_e: (3088/0)
(21/56) infantUnlikelySurvive_e.orig: (3088/0)
(22/56) first60MinAllBloodGasPHGreater7p15BaseDeficitLess10mEqPerL_e: (3088/0)
(23/56) first60MinAllBloodGasPHGreater7p15BaseDeficitLess10mEqPerL_

## 01-main-screening

In [9]:
base_filename = '01-02-screening.csv'

In [10]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/28) normalize_value: column: center
(1/28) normalize_value: column: subjectID
(2/28) normalize_value: column: uniqueID
(3/28) normalize_value: column: siteID
(4/28) normalize_value: column: birthDate
(5/28) normalize_value: column: birthNumber
(6/28) normalize_value: column: coreTempLess34CGreater1Hr_e
(7/28) normalize_value: column: first6HrCoolByClinicalProtocol_e
(8/28) normalize_value: column: chromosomalAbnormality_e
(9/28) normalize_value: column: majorCongenitalAnomaly_e
(10/28) normalize_value: column: birthWeightLessEq1800g_e
(11/28) normalize_value: column: infantUnlikelySurvive_e
(12/28) normalize_value: column: first60MinAllBloodGasPHGreater7p15BaseDeficitLess10mEqPerL_e
(13/28) normalize_value: column: postnatalAgeLess6HrOrGreater24Hr_e
(14/28) normalize_value: column: enrolledConflictingTrial_e
(15/28) normalize_value: column: first60MinAnyBloodGasPHLessEq7_i
(16/28) normalize_value: column: first60MinAnyBloodGasBaseDeficitGreaterEq16mEqPerL_i
(17/28) normalize_value: 

In [11]:
COMBINE_harmonizer.column_info(df)

(0/56) center: (168/0)
(1/56) center.orig: (168/0)
(2/56) subjectID: (168/0)
(3/56) subjectID.orig: (168/0)
(4/56) uniqueID: (168/0)
(5/56) uniqueID.orig: (168/0)
(6/56) siteID: (168/0)
(7/56) siteID.orig: (168/0)
(8/56) birthNumber: (168/0)
(9/56) birthNumber.orig: (168/0)
(10/56) coreTempLess34CGreater1Hr_e: (168/0)
(11/56) coreTempLess34CGreater1Hr_e.orig: (168/0)
(12/56) first6HrCoolByClinicalProtocol_e: (168/0)
(13/56) first6HrCoolByClinicalProtocol_e.orig: (0/168)
(14/56) chromosomalAbnormality_e: (168/0)
(15/56) chromosomalAbnormality_e.orig: (168/0)
(16/56) majorCongenitalAnomaly_e: (168/0)
(17/56) majorCongenitalAnomaly_e.orig: (168/0)
(18/56) birthWeightLessEq1800g_e: (168/0)
(19/56) birthWeightLessEq1800g_e.orig: (168/0)
(20/56) infantUnlikelySurvive_e: (168/0)
(21/56) infantUnlikelySurvive_e.orig: (168/0)
(22/56) first60MinAllBloodGasPHGreater7p15BaseDeficitLess10mEqPerL_e: (168/0)
(23/56) first60MinAllBloodGasPHGreater7p15BaseDeficitLess10mEqPerL_e.orig: (153/15)
(24/56) p

## 01-12-neuro-exam

In [12]:
base_filename = '00-12-neuro-exam.csv'

In [13]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/19) normalize_value: column: center
(1/19) normalize_value: column: subjectID
(2/19) normalize_value: column: uniqueID
(3/19) normalize_value: column: pre_NeuroExamSeizure
(4/19) normalize_value: column: pre_NeuroExam
(5/19) normalize_value: column: pre_NeuroExamSignModerateSevereHIE3Category
(6/19) normalize_value: column: pre_NeuroExamRespiration
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIERespiratory value: ** value_float: ** value_int: **
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIERespiratory value: ** value_float: ** value_int: **
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIERespiratory value: ** value_float: ** value_int: **
[WARN] unable to float: val (**/<class 'str'>) e: could not convert strin

In [14]:
COMBINE_harmonizer.column_info(df)

(0/38) center: (3088/0)
(1/38) center.orig: (3088/0)
(2/38) subjectID: (3088/0)
(3/38) subjectID.orig: (3088/0)
(4/38) uniqueID: (3088/0)
(5/38) uniqueID.orig: (3088/0)
(6/38) pre_NeuroExam: (3088/0)
(7/38) pre_NeuroExam.orig: (816/2272)
(8/38) pre_NoNeuroExamReason: (3088/0)
(9/38) pre_NoNeuroExamReason.orig: (63/3025)
(10/38) pre_NeuroExamSignModerateSevereHIE3Category: (3088/0)
(11/38) pre_NeuroExamSignModerateSevereHIE3Category.orig: (734/2354)
(12/38) pre_NeuroExamLevelConsciousness: (3088/0)
(13/38) pre_NeuroExamLevelConsciousness.orig: (730/2358)
(14/38) pre_NeuroExamSpontaneousActivity: (3088/0)
(15/38) pre_NeuroExamSpontaneousActivity.orig: (730/2358)
(16/38) pre_NeuroExamPosture: (3088/0)
(17/38) pre_NeuroExamPosture.orig: (727/2361)
(18/38) pre_NeuroExamTone: (3088/0)
(19/38) pre_NeuroExamTone.orig: (730/2358)
(20/38) pre_NeuroExamSuck: (3088/0)
(21/38) pre_NeuroExamSuck.orig: (719/2369)
(22/38) pre_NeuroExamMoro: (3088/0)
(23/38) pre_NeuroExamMoro.orig: (706/2382)
(24/38) p

## 01-12-neuro-exam

In [15]:
base_filename = '01-12-neuro-exam.csv'

In [16]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/19) normalize_value: column: center
(1/19) normalize_value: column: subjectID
(2/19) normalize_value: column: uniqueID
(3/19) normalize_value: column: pre_NeuroExamSeizure
(4/19) normalize_value: column: pre_NeuroExam
(5/19) normalize_value: column: pre_NeuroExamSignModerateSevereHIE3Category
(6/19) normalize_value: column: pre_NeuroExamRespiration
[WARN] unable to float: val (**/<class 'str'>) e: could not convert string to float: '**'
[WARN] unable to get value: sheet_name: signOfHIERespiratory value: ** value_float: ** value_int: **
(7/19) normalize_value: column: pre_NeuroExamSedate
(8/19) normalize_value: column: pre_NoNeuroExamReason
(9/19) normalize_value: column: pre_NeuroExamLevelConsciousness
(10/19) normalize_value: column: pre_NeuroExamSpontaneousActivity
(11/19) normalize_value: column: pre_NeuroExamPosture
(12/19) normalize_value: column: pre_NeuroExamTone
(13/19) normalize_value: column: pre_NeuroExamSuck
(14/19) normalize_value: column: pre_NeuroExamMoro
(15/19) norm

In [17]:
COMBINE_harmonizer.column_info(df)

(0/38) center: (168/0)
(1/38) center.orig: (168/0)
(2/38) subjectID: (168/0)
(3/38) subjectID.orig: (168/0)
(4/38) uniqueID: (168/0)
(5/38) uniqueID.orig: (168/0)
(6/38) pre_NeuroExam: (168/0)
(7/38) pre_NeuroExam.orig: (168/0)
(8/38) pre_NoNeuroExamReason: (168/0)
(9/38) pre_NoNeuroExamReason.orig: (0/168)
(10/38) pre_NeuroExamSignModerateSevereHIE3Category: (168/0)
(11/38) pre_NeuroExamSignModerateSevereHIE3Category.orig: (168/0)
(12/38) pre_NeuroExamLevelConsciousness: (168/0)
(13/38) pre_NeuroExamLevelConsciousness.orig: (168/0)
(14/38) pre_NeuroExamSpontaneousActivity: (168/0)
(15/38) pre_NeuroExamSpontaneousActivity.orig: (168/0)
(16/38) pre_NeuroExamPosture: (168/0)
(17/38) pre_NeuroExamPosture.orig: (167/1)
(18/38) pre_NeuroExamTone: (168/0)
(19/38) pre_NeuroExamTone.orig: (168/0)
(20/38) pre_NeuroExamSuck: (168/0)
(21/38) pre_NeuroExamSuck.orig: (168/0)
(22/38) pre_NeuroExamMoro: (168/0)
(23/38) pre_NeuroExamMoro.orig: (164/4)
(24/38) pre_NeuroExamPupils: (168/0)
(25/38) pre_N

## 01-03-maternal-demographics

In [18]:
base_filename = '01-03-maternal-demographics.csv'

In [19]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: motherAge_year
(4/12) normalize_value: column: motherRace
(5/12) normalize_value: column: motherRaceOther1
(6/12) normalize_value: column: motherRaceOther2
(7/12) normalize_value: column: motherRaceOther3
(8/12) normalize_value: column: motherRaceOther4
(9/12) normalize_value: column: motherEthnicity
(10/12) normalize_value: column: motherMaritalStatus
(11/12) normalize_value: column: motherEducation


In [20]:
COMBINE_harmonizer.column_info(df)

(0/24) center: (168/0)
(1/24) center.orig: (168/0)
(2/24) subjectID: (168/0)
(3/24) subjectID.orig: (168/0)
(4/24) uniqueID: (168/0)
(5/24) uniqueID.orig: (168/0)
(6/24) motherAge_year: (168/0)
(7/24) motherAge_year.orig: (168/0)
(8/24) motherRace: (168/0)
(9/24) motherRace.orig: (168/0)
(10/24) motherRaceOther1: (168/0)
(11/24) motherRaceOther1.orig: (0/168)
(12/24) motherRaceOther2: (168/0)
(13/24) motherRaceOther2.orig: (0/168)
(14/24) motherRaceOther3: (168/0)
(15/24) motherRaceOther3.orig: (0/168)
(16/24) motherRaceOther4: (168/0)
(17/24) motherRaceOther4.orig: (0/168)
(18/24) motherEthnicity: (168/0)
(19/24) motherEthnicity.orig: (168/0)
(20/24) motherMaritalStatus: (168/0)
(21/24) motherMaritalStatus.orig: (168/0)
(22/24) motherEducation: (168/0)
(23/24) motherEducation.orig: (167/1)


## 01-04-pregnancy-history

In [21]:
base_filename = '01-04-pregnancy-history.csv'

In [22]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: multipleBirth
(4/12) normalize_value: column: prenatalCare
(5/12) normalize_value: column: hypertensionEclampsia
(6/12) normalize_value: column: antepartumHemorrhage
(7/12) normalize_value: column: thyroidMalfunction
(8/12) normalize_value: column: diabetes
(9/12) normalize_value: column: gravida
(10/12) normalize_value: column: parity
(11/12) normalize_value: column: numFetus


In [23]:
COMBINE_harmonizer.column_info(df)

(0/24) center: (168/0)
(1/24) center.orig: (168/0)
(2/24) subjectID: (168/0)
(3/24) subjectID.orig: (168/0)
(4/24) uniqueID: (168/0)
(5/24) uniqueID.orig: (168/0)
(6/24) gravida: (168/0)
(7/24) gravida.orig: (168/0)
(8/24) parity: (168/0)
(9/24) parity.orig: (168/0)
(10/24) multipleBirth: (168/0)
(11/24) multipleBirth.orig: (168/0)
(12/24) numFetus: (168/0)
(13/24) numFetus.orig: (3/165)
(14/24) prenatalCare: (168/0)
(15/24) prenatalCare.orig: (168/0)
(16/24) hypertensionEclampsia: (168/0)
(17/24) hypertensionEclampsia.orig: (168/0)
(18/24) antepartumHemorrhage: (168/0)
(19/24) antepartumHemorrhage.orig: (168/0)
(20/24) thyroidMalfunction: (168/0)
(21/24) thyroidMalfunction.orig: (168/0)
(22/24) diabetes: (168/0)
(23/24) diabetes.orig: (168/0)


## 01-05-labor-delivery

In [24]:
base_filename = '01-05-labor-delivery.csv'

In [25]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/29) normalize_value: column: center
(1/29) normalize_value: column: subjectID
(2/29) normalize_value: column: uniqueID
(3/29) normalize_value: column: fetalDecelerate
(4/29) normalize_value: column: cordMishap
(5/29) normalize_value: column: uterineRupture
(6/29) normalize_value: column: shoulderDystocia
(7/29) normalize_value: column: placentalProblem
(8/29) normalize_value: column: maternalHemorrhage
(9/29) normalize_value: column: maternalTrauma
(10/29) normalize_value: column: maternalCardioRespiratoryArrest
(11/29) normalize_value: column: maternalSeizure
(12/29) normalize_value: column: pyrexiaGreater37p6C
(13/29) normalize_value: column: chorioamnionitis
(14/29) normalize_value: column: placentalPathologyPerformed
(15/29) normalize_value: column: histologicChorioamionitis
(16/29) normalize_value: column: laborAntibiotics
(17/29) normalize_value: column: ruptureGreater18Hr
(18/29) normalize_value: column: labor
(19/29) normalize_value: column: laborAntibioticsCode1
(20/29) nor

In [26]:
COMBINE_harmonizer.column_info(df)

(0/58) center: (168/0)
(1/58) center.orig: (168/0)
(2/58) subjectID: (168/0)
(3/58) subjectID.orig: (168/0)
(4/58) uniqueID: (168/0)
(5/58) uniqueID.orig: (168/0)
(6/58) maternalAdmissionDate: (168/0)
(7/58) maternalAdmissionDate.orig: (155/13)
(8/58) maternalAdmissionTime: (168/0)
(9/58) maternalAdmissionTime.orig: (121/47)
(10/58) ruptureDate: (168/0)
(11/58) ruptureDate.orig: (160/8)
(12/58) ruptureTime: (168/0)
(13/58) ruptureTime.orig: (150/18)
(14/58) ruptureGreater18Hr: (168/0)
(15/58) ruptureGreater18Hr.orig: (18/150)
(16/58) labor: (168/0)
(17/58) labor.orig: (168/0)
(18/58) laborOnsetDate: (168/0)
(19/58) laborOnsetDate.orig: (116/52)
(20/58) laborOnsetTime: (168/0)
(21/58) laborOnsetTime.orig: (85/83)
(22/58) deliveryMode: (168/0)
(23/58) deliveryMode.orig: (168/0)
(24/58) fetalDecelerate: (168/0)
(25/58) fetalDecelerate.orig: (168/0)
(26/58) cordMishap: (168/0)
(27/58) cordMishap.orig: (168/0)
(28/58) uterineRupture: (168/0)
(29/58) uterineRupture.orig: (168/0)
(30/58) shou

## 01-06-birth

In [27]:
base_filename = '01-06-birth.csv'

In [28]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/50) normalize_value: column: center
(1/50) normalize_value: column: subjectID
(2/50) normalize_value: column: uniqueID
(3/50) normalize_value: column: birthDate
(4/50) normalize_value: column: randomInfantAge
(5/50) normalize_value: column: encephalopathyLevel
(6/50) normalize_value: column: infantOutborn
(7/50) normalize_value: column: outbornInHospital
(8/50) normalize_value: column: outbornOutHospital
(9/50) normalize_value: column: deliveryResuscitation
(10/50) normalize_value: column: deliveryOxygen
(11/50) normalize_value: column: deliveryBaggingAndMask
(12/50) normalize_value: column: deliveryChestCompression
(13/50) normalize_value: column: deliveryIntubation
(14/50) normalize_value: column: deliveryDrug
(15/50) normalize_value: column: at10MinContinueResuscitation
(16/50) normalize_value: column: at10MinOxygen
(17/50) normalize_value: column: at10MinBaggingAndMask
(18/50) normalize_value: column: at10MinChestCompression
(19/50) normalize_value: column: at10MinIntubation
(20

In [29]:
COMBINE_harmonizer.column_info(df)

(0/100) center: (168/0)
(1/100) center.orig: (168/0)
(2/100) subjectID: (168/0)
(3/100) subjectID.orig: (168/0)
(4/100) uniqueID: (168/0)
(5/100) uniqueID.orig: (168/0)
(6/100) encephalopathyLevel: (168/0)
(7/100) encephalopathyLevel.orig: (168/0)
(8/100) randomInfantAge: (168/0)
(9/100) randomInfantAge.orig: (168/0)
(10/100) birthTime: (168/0)
(11/100) birthTime.orig: (168/0)
(12/100) birthWeight_g: (168/0)
(13/100) birthWeight_g.orig: (168/0)
(14/100) birthLength_cm: (168/0)
(15/100) birthLength_cm.orig: (166/2)
(16/100) birthHeadCircumference_cm: (168/0)
(17/100) birthHeadCircumference_cm.orig: (165/3)
(18/100) birthGestationalAge_week: (168/0)
(19/100) birthGestationalAge_week.orig: (168/0)
(20/100) infantSex: (168/0)
(21/100) infantSex.orig: (168/0)
(22/100) infantOutborn: (168/0)
(23/100) infantOutborn.orig: (168/0)
(24/100) outbornInHospital: (168/0)
(25/100) outbornInHospital.orig: (146/22)
(26/100) outbornOutHospital: (168/0)
(27/100) outbornOutHospital.orig: (146/22)
(28/100)

## 01-07-pre-temperature

In [30]:
base_filename = '01-07-pre-temperature.csv'

In [31]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/15) normalize_value: column: center
(1/15) normalize_value: column: subjectID
(2/15) normalize_value: column: uniqueID
(3/15) normalize_value: column: pre_TemperatureMinDate
(4/15) normalize_value: column: pre_TemperatureMinTime
(5/15) normalize_value: column: pre_SkinTemperatureMin_C
(6/15) normalize_value: column: pre_AxillaryTemperatureMin_C
(7/15) normalize_value: column: pre_EsophagealTemperatureMin_C
(8/15) normalize_value: column: pre_ServoSetMin_C
(9/15) normalize_value: column: pre_TemperatureMaxDate
(10/15) normalize_value: column: pre_TemperatureMaxTime
(11/15) normalize_value: column: pre_SkinTemperatureMax_C
(12/15) normalize_value: column: pre_AxillaryTemperatureMax_C
(13/15) normalize_value: column: pre_EsophagealTemperatureMax_C
(14/15) normalize_value: column: pre_ServoSetMax_C


In [32]:
COMBINE_harmonizer.column_info(df)

(0/30) center: (155/0)
(1/30) center.orig: (155/0)
(2/30) subjectID: (155/0)
(3/30) subjectID.orig: (155/0)
(4/30) uniqueID: (155/0)
(5/30) uniqueID.orig: (155/0)
(6/30) pre_TemperatureMinDate: (155/0)
(7/30) pre_TemperatureMinDate.orig: (155/0)
(8/30) pre_TemperatureMinTime: (155/0)
(9/30) pre_TemperatureMinTime.orig: (153/2)
(10/30) pre_SkinTemperatureMin_C: (155/0)
(11/30) pre_SkinTemperatureMin_C.orig: (66/89)
(12/30) pre_AxillaryTemperatureMin_C: (155/0)
(13/30) pre_AxillaryTemperatureMin_C.orig: (147/8)
(14/30) pre_EsophagealTemperatureMin_C: (155/0)
(15/30) pre_EsophagealTemperatureMin_C.orig: (0/155)
(16/30) pre_ServoSetMin_C: (155/0)
(17/30) pre_ServoSetMin_C.orig: (69/86)
(18/30) pre_TemperatureMaxDate: (155/0)
(19/30) pre_TemperatureMaxDate.orig: (151/4)
(20/30) pre_TemperatureMaxTime: (155/0)
(21/30) pre_TemperatureMaxTime.orig: (148/7)
(22/30) pre_SkinTemperatureMax_C: (155/0)
(23/30) pre_SkinTemperatureMax_C.orig: (70/85)
(24/30) pre_AxillaryTemperatureMax_C: (155/0)
(25/

## 01-08-pre-cardiovascular

In [33]:
base_filename = '01-08-pre-cardiovascular.csv'

In [34]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: pre_CardioVolumeExpand
(4/12) normalize_value: column: pre_CardioInotropicAgent
(5/12) normalize_value: column: pre_CardioBloodTransfusion
(6/12) normalize_value: column: pre_CardioPlatelets
(7/12) normalize_value: column: pre_CardioDate
(8/12) normalize_value: column: pre_CardioTime
(9/12) normalize_value: column: pre_CardioSystolicBloodPressure_mmHg
(10/12) normalize_value: column: pre_CardioDiastolicBloodPressure_mmHg
(11/12) normalize_value: column: pre_CardioHeartRate_BPM


In [35]:
COMBINE_harmonizer.column_info(df)

(0/24) center: (167/0)
(1/24) center.orig: (167/0)
(2/24) subjectID: (167/0)
(3/24) subjectID.orig: (167/0)
(4/24) uniqueID: (167/0)
(5/24) uniqueID.orig: (167/0)
(6/24) pre_CardioDate: (167/0)
(7/24) pre_CardioDate.orig: (167/0)
(8/24) pre_CardioTime: (167/0)
(9/24) pre_CardioTime.orig: (164/3)
(10/24) pre_CardioSystolicBloodPressure_mmHg: (167/0)
(11/24) pre_CardioSystolicBloodPressure_mmHg.orig: (158/9)
(12/24) pre_CardioDiastolicBloodPressure_mmHg: (167/0)
(13/24) pre_CardioDiastolicBloodPressure_mmHg.orig: (158/9)
(14/24) pre_CardioHeartRate_BPM: (167/0)
(15/24) pre_CardioHeartRate_BPM.orig: (162/5)
(16/24) pre_CardioVolumeExpand: (167/0)
(17/24) pre_CardioVolumeExpand.orig: (167/0)
(18/24) pre_CardioInotropicAgent: (167/0)
(19/24) pre_CardioInotropicAgent.orig: (167/0)
(20/24) pre_CardioBloodTransfusion: (167/0)
(21/24) pre_CardioBloodTransfusion.orig: (167/0)
(22/24) pre_CardioPlatelets: (167/0)
(23/24) pre_CardioPlatelets.orig: (167/0)


## 01-09-pre-infection

In [36]:
base_filename = '01-09-pre-infection.csv'

In [37]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/14) normalize_value: column: center
(1/14) normalize_value: column: subjectID
(2/14) normalize_value: column: uniqueID
(3/14) normalize_value: column: pre_PositiveCulture
(4/14) normalize_value: column: pre_Antibiotics
(5/14) normalize_value: column: pre_PositiveCultureSrc
(6/14) normalize_value: column: pre_PositiveCultureDate
(7/14) normalize_value: column: pre_PositiveCultureTime
(8/14) normalize_value: column: pre_PositiveCultureOrganismCode1
(9/14) normalize_value: column: pre_PositiveCultureOrganismCode2
(10/14) normalize_value: column: pre_PositiveCultureOrganismCode3
(11/14) normalize_value: column: pre_AntibioticsCode1
(12/14) normalize_value: column: pre_AntibioticsCode2
(13/14) normalize_value: column: pre_AntibioticsCode3


In [38]:
COMBINE_harmonizer.column_info(df)

(0/28) center: (168/0)
(1/28) center.orig: (168/0)
(2/28) subjectID: (168/0)
(3/28) subjectID.orig: (168/0)
(4/28) uniqueID: (168/0)
(5/28) uniqueID.orig: (168/0)
(6/28) pre_PositiveCulture: (168/0)
(7/28) pre_PositiveCulture.orig: (168/0)
(8/28) pre_PositiveCultureSrc: (168/0)
(9/28) pre_PositiveCultureSrc.orig: (1/167)
(10/28) pre_PositiveCultureDate: (168/0)
(11/28) pre_PositiveCultureDate.orig: (1/167)
(12/28) pre_PositiveCultureTime: (168/0)
(13/28) pre_PositiveCultureTime.orig: (0/168)
(14/28) pre_PositiveCultureOrganismCode1: (168/0)
(15/28) pre_PositiveCultureOrganismCode1.orig: (1/167)
(16/28) pre_PositiveCultureOrganismCode2: (168/0)
(17/28) pre_PositiveCultureOrganismCode2.orig: (0/168)
(18/28) pre_PositiveCultureOrganismCode3: (168/0)
(19/28) pre_PositiveCultureOrganismCode3.orig: (0/168)
(20/28) pre_Antibiotics: (168/0)
(21/28) pre_Antibiotics.orig: (166/2)
(22/28) pre_AntibioticsCode1: (168/0)
(23/28) pre_AntibioticsCode1.orig: (146/22)
(24/28) pre_AntibioticsCode2: (168/

## 01-10-pre-other-med

In [39]:
base_filename = '01-10-pre-other-med.csv'

In [40]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/18) normalize_value: column: center
(1/18) normalize_value: column: subjectID
(2/18) normalize_value: column: uniqueID
(3/18) normalize_value: column: pre_OtherMedTargetDate
(4/18) normalize_value: column: pre_OtherMedTargetTime
(5/18) normalize_value: column: pre_Anticonvulsants
(6/18) normalize_value: column: pre_Anticonvulsants1
(7/18) normalize_value: column: pre_Anticonvulsants2
(8/18) normalize_value: column: pre_Anticonvulsants3
(9/18) normalize_value: column: pre_AnalgesicsSedatives1
(10/18) normalize_value: column: pre_AnalgesicsSedatives2
(11/18) normalize_value: column: pre_AnalgesicsSedatives3
(12/18) normalize_value: column: pre_Antipyretics1
(13/18) normalize_value: column: pre_Antipyretics2
(14/18) normalize_value: column: pre_Antipyretics3
(15/18) normalize_value: column: pre_Paralytics1
(16/18) normalize_value: column: pre_Paralytics2
(17/18) normalize_value: column: pre_Paralytics3


In [41]:
COMBINE_harmonizer.column_info(df)

(0/36) center: (142/0)
(1/36) center.orig: (142/0)
(2/36) subjectID: (142/0)
(3/36) subjectID.orig: (142/0)
(4/36) uniqueID: (142/0)
(5/36) uniqueID.orig: (142/0)
(6/36) pre_OtherMedTargetDate: (142/0)
(7/36) pre_OtherMedTargetDate.orig: (3/139)
(8/36) pre_OtherMedTargetTime: (142/0)
(9/36) pre_OtherMedTargetTime.orig: (2/140)
(10/36) pre_Anticonvulsants: (142/0)
(11/36) pre_Anticonvulsants.orig: (1/141)
(12/36) pre_Anticonvulsants1: (142/0)
(13/36) pre_Anticonvulsants1.orig: (97/45)
(14/36) pre_Anticonvulsants2: (142/0)
(15/36) pre_Anticonvulsants2.orig: (15/127)
(16/36) pre_Anticonvulsants3: (142/0)
(17/36) pre_Anticonvulsants3.orig: (0/142)
(18/36) pre_AnalgesicsSedatives1: (142/0)
(19/36) pre_AnalgesicsSedatives1.orig: (51/91)
(20/36) pre_AnalgesicsSedatives2: (142/0)
(21/36) pre_AnalgesicsSedatives2.orig: (23/119)
(22/36) pre_AnalgesicsSedatives3: (142/0)
(23/36) pre_AnalgesicsSedatives3.orig: (1/141)
(24/36) pre_Antipyretics1: (142/0)
(25/36) pre_Antipyretics1.orig: (2/140)
(26/3

## 01-11-pre-imaging

In [42]:
base_filename = '01-11-pre-imaging.csv'

In [43]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/39) normalize_value: column: center
(1/39) normalize_value: column: subjectID
(2/39) normalize_value: column: uniqueID
(3/39) normalize_value: column: pre_HeadSonogram
(4/39) normalize_value: column: pre_HeadSonogramResultText
(5/39) normalize_value: column: pre_HeadCT
(6/39) normalize_value: column: pre_HeadCTResultText
(7/39) normalize_value: column: pre_BrainMRI
(8/39) normalize_value: column: pre_BrainMRIResultText
(9/39) normalize_value: column: pre_HeadSonogramDate
(10/39) normalize_value: column: pre_HeadSonogramTime
(11/39) normalize_value: column: pre_HeadSonogramResult1
(12/39) normalize_value: column: pre_HeadSonogramResult2
(13/39) normalize_value: column: pre_HeadSonogramResult3
(14/39) normalize_value: column: pre_HeadSonogramResult4
(15/39) normalize_value: column: pre_HeadSonogramResult5
(16/39) normalize_value: column: pre_HeadSonogramResult6
(17/39) normalize_value: column: pre_HeadSonogramResult7
(18/39) normalize_value: column: pre_HeadSonogramResult8
(19/39) nor

In [44]:
COMBINE_harmonizer.column_info(df)

(0/78) center: (168/0)
(1/78) center.orig: (168/0)
(2/78) subjectID: (168/0)
(3/78) subjectID.orig: (168/0)
(4/78) uniqueID: (168/0)
(5/78) uniqueID.orig: (168/0)
(6/78) pre_HeadSonogram: (168/0)
(7/78) pre_HeadSonogram.orig: (168/0)
(8/78) pre_HeadSonogramDate: (168/0)
(9/78) pre_HeadSonogramDate.orig: (30/138)
(10/78) pre_HeadSonogramTime: (168/0)
(11/78) pre_HeadSonogramTime.orig: (27/141)
(12/78) pre_HeadSonogramResult1: (168/0)
(13/78) pre_HeadSonogramResult1.orig: (29/139)
(14/78) pre_HeadSonogramResult2: (168/0)
(15/78) pre_HeadSonogramResult2.orig: (5/163)
(16/78) pre_HeadSonogramResult3: (168/0)
(17/78) pre_HeadSonogramResult3.orig: (1/167)
(18/78) pre_HeadSonogramResult4: (168/0)
(19/78) pre_HeadSonogramResult4.orig: (1/167)
(20/78) pre_HeadSonogramResult5: (168/0)
(21/78) pre_HeadSonogramResult5.orig: (0/168)
(22/78) pre_HeadSonogramResult6: (168/0)
(23/78) pre_HeadSonogramResult6.orig: (0/168)
(24/78) pre_HeadSonogramResult7: (168/0)
(25/78) pre_HeadSonogramResult7.orig: (0

## 02-01-temperature

In [45]:
_FLATTEN_IDS = ['temperatureTimeSlot_min']
base_filename = '02-01-temperature.csv'

In [46]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/11) normalize_value: column: center
(1/11) normalize_value: column: subjectID
(2/11) normalize_value: column: uniqueID
(3/11) normalize_value: column: temperatureTimeSlot_min
(4/11) normalize_value: column: temperatureDate
(5/11) normalize_value: column: temperatureTime
(6/11) normalize_value: column: skinTemperature_C
(7/11) normalize_value: column: axillaryTemperature_C
(8/11) normalize_value: column: esophagealTemperature_C
(9/11) normalize_value: column: blanketTemperature_C
(10/11) normalize_value: column: servoSetTemperature_C


flatten_index: flatten_ids: ['temperatureTimeSlot_min'] unique_id_map: {np.int64(2): np.int64(2), np.int64(15): np.int64(15), np.int64(30): np.int64(30), np.int64(45): np.int64(45), np.int64(60): np.int64(60), np.int64(75): np.int64(75), np.int64(90): np.int64(90), np.int64(105): np.int64(105), np.int64(120): np.int64(120), np.int64(135): np.int64(135), np.int64(150): np.int64(150), np.int64(165): np.int64(165), np.int64(180): np.int64(180), np.int64(240): np.int64(240), np.int64(300): np.int64(300), np.int64(360): np.int64(360), np.int64(420): np.int64(420), np.int64(480): np.int64(480), np.int64(540): np.int64(540), np.int64(600): np.int64(600), np.int64(660): np.int64(660), np.int64(720): np.int64(720), np.int64(960): np.int64(960), np.int64(1200): np.int64(1200), np.int64(1440): np.int64(1440), np.int64(1680): np.int64(1680), np.int64(1920): np.int64(1920), np.int64(2160): np.int64(2160), np.int64(2400): np.int64(2400), np.int64(2640): np.int64(2640), np.int64(2880): np.int64(2880)

In [47]:
# XXX hack for _flatten_index: 2 as 0
is_flatten_index_2 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 2
df.loc[is_flatten_index_2, COMBINE_harmonizer.FLATTEN_INDEX] = 0

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([   0  240  480  720  960 1200 1440 1680 1920 2160 2400 2640 2880 3120
 3360 3600 3840 4080 4320 4560 4800 5040 5280 5520 5760 6000 6240 6480
   15   30   45   60   75   90  105  120  135  150  165  180  300  360
  420  540  600  660 6720 6960 7200 7440] / int64)


In [48]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [49]:
COMBINE_harmonizer.column_info(df)

(0/23) center: (6164/0)
(1/23) center.orig: (6164/0)
(2/23) subjectID: (6164/0)
(3/23) subjectID.orig: (6164/0)
(4/23) uniqueID: (6164/0)
(5/23) uniqueID.orig: (6164/0)
(6/23) _flatten_index: (6164/0)
(7/23) temperatureTimeSlot_min: (6164/0)
(8/23) temperatureTimeSlot_min.orig: (6164/0)
(9/23) temperatureDate: (6164/0)
(10/23) temperatureDate.orig: (6137/27)
(11/23) temperatureTime: (6164/0)
(12/23) temperatureTime.orig: (6127/37)
(13/23) skinTemperature_C: (6164/0)
(14/23) skinTemperature_C.orig: (5683/481)
(15/23) axillaryTemperature_C: (6164/0)
(16/23) axillaryTemperature_C.orig: (4569/1595)
(17/23) esophagealTemperature_C: (6164/0)
(18/23) esophagealTemperature_C.orig: (5814/350)
(19/23) blanketTemperature_C: (6164/0)
(20/23) blanketTemperature_C.orig: (3497/2667)
(21/23) servoSetTemperature_C: (6164/0)
(22/23) servoSetTemperature_C.orig: (4959/1205)


## 02-02-cardiovascular

In [50]:
_FLATTEN_IDS = ['cardioTimeSlot_min']
base_filename = '02-02-cardiovascular.csv'

In [51]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/13) normalize_value: column: center
(1/13) normalize_value: column: subjectID
(2/13) normalize_value: column: uniqueID
(3/13) normalize_value: column: cardioVolumeExpand
(4/13) normalize_value: column: cardioInotropicAgent
(5/13) normalize_value: column: cardioBloodTransfusion
(6/13) normalize_value: column: cardioPlatelets
(7/13) normalize_value: column: cardioTimeSlot_min
(8/13) normalize_value: column: cardioDate
(9/13) normalize_value: column: cardioTime
(10/13) normalize_value: column: cardioSystolicBloodPressure_mmHg
(11/13) normalize_value: column: cardioDiastolicBloodPressure_mmHg
(12/13) normalize_value: column: cardioHeartRate_BPM
flatten_index: flatten_ids: ['cardioTimeSlot_min'] unique_id_map: {np.int64(1): np.int64(1), np.int64(4): np.int64(4), np.int64(8): np.int64(8), np.int64(12): np.int64(12), np.int64(16): np.int64(16), np.int64(20): np.int64(20), np.int64(24): np.int64(24), np.int64(28): np.int64(28), np.int64(32): np.int64(32), np.int64(36): np.int64(36), np.int6

In [52]:
# XXX hack for _flatten_index: 1 as 0
is_flatten_index_1 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 1
df.loc[is_flatten_index_1, COMBINE_harmonizer.FLATTEN_INDEX] = 0

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([ 0  4  8 12 16 20 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 84 88 92
 96] / int64)


In [53]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [54]:
COMBINE_harmonizer.column_info(df)

(0/27) center: (4056/0)
(1/27) center.orig: (4056/0)
(2/27) subjectID: (4056/0)
(3/27) subjectID.orig: (4056/0)
(4/27) uniqueID: (4056/0)
(5/27) uniqueID.orig: (4056/0)
(6/27) _flatten_index: (4056/0)
(7/27) cardioTimeSlot_min: (4056/0)
(8/27) cardioTimeSlot_min.orig: (4056/0)
(9/27) cardioDate: (4056/0)
(10/27) cardioDate.orig: (4053/3)
(11/27) cardioTime: (4056/0)
(12/27) cardioTime.orig: (4053/3)
(13/27) cardioSystolicBloodPressure_mmHg: (4056/0)
(14/27) cardioSystolicBloodPressure_mmHg.orig: (3657/399)
(15/27) cardioDiastolicBloodPressure_mmHg: (4056/0)
(16/27) cardioDiastolicBloodPressure_mmHg.orig: (3656/400)
(17/27) cardioHeartRate_BPM: (4056/0)
(18/27) cardioHeartRate_BPM.orig: (4025/31)
(19/27) cardioVolumeExpand: (4056/0)
(20/27) cardioVolumeExpand.orig: (4056/0)
(21/27) cardioInotropicAgent: (4056/0)
(22/27) cardioInotropicAgent.orig: (4056/0)
(23/27) cardioBloodTransfusion: (4056/0)
(24/27) cardioBloodTransfusion.orig: (4056/0)
(25/27) cardioPlatelets: (4056/0)
(26/27) card

## 02-03-respiratory

In [55]:
_FLATTEN_IDS = ['respiratoryTimeSlot_min']
base_filename = '02-03-respiratory.csv'

In [56]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: respiratoryTimeSlot_min
(4/12) normalize_value: column: respiratoryDate
(5/12) normalize_value: column: respiratoryTime
(6/12) normalize_value: column: respiratorySupportType
(7/12) normalize_value: column: respiratoryFiO2
(8/12) normalize_value: column: respiratoryRate_Hz
(9/12) normalize_value: column: respiratoryPIP_cmH2O
(10/12) normalize_value: column: respiratoryMAP_cmH2O
(11/12) normalize_value: column: respiratoryPEEP_cmH2O
flatten_index: flatten_ids: ['respiratoryTimeSlot_min'] unique_id_map: {np.int64(0): np.int64(0), np.int64(24): np.int64(24), np.int64(48): np.int64(48), np.int64(72): np.int64(72), np.int64(96): np.int64(96)} the_type: int64


In [57]:
COMBINE_harmonizer.column_info(df)

(0/25) center: (815/0)
(1/25) center.orig: (815/0)
(2/25) subjectID: (815/0)
(3/25) subjectID.orig: (815/0)
(4/25) uniqueID: (815/0)
(5/25) uniqueID.orig: (815/0)
(6/25) _flatten_index: (815/0)
(7/25) respiratoryTimeSlot_min: (815/0)
(8/25) respiratoryTimeSlot_min.orig: (815/0)
(9/25) respiratoryDate: (815/0)
(10/25) respiratoryDate.orig: (814/1)
(11/25) respiratoryTime: (815/0)
(12/25) respiratoryTime.orig: (814/1)
(13/25) respiratorySupportType: (815/0)
(14/25) respiratorySupportType.orig: (813/2)
(15/25) respiratoryFiO2: (815/0)
(16/25) respiratoryFiO2.orig: (412/403)
(17/25) respiratoryRate_Hz: (815/0)
(18/25) respiratoryRate_Hz.orig: (275/540)
(19/25) respiratoryPIP_cmH2O: (815/0)
(20/25) respiratoryPIP_cmH2O.orig: (269/546)
(21/25) respiratoryMAP_cmH2O: (815/0)
(22/25) respiratoryMAP_cmH2O.orig: (262/553)
(23/25) respiratoryPEEP_cmH2O: (815/0)
(24/25) respiratoryPEEP_cmH2O.orig: (227/588)


## 02-04-blood-gas

In [58]:
_FLATTEN_IDS = ['bloodGasTimeSlot_min']
base_filename = '02-04-blood-gas.csv'


In [59]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/11) normalize_value: column: center
(1/11) normalize_value: column: subjectID
(2/11) normalize_value: column: uniqueID
(3/11) normalize_value: column: bloodGasTimeSlot_min
(4/11) normalize_value: column: bloodGasDate
(5/11) normalize_value: column: bloodGasTime
(6/11) normalize_value: column: bloodGasPH
(7/11) normalize_value: column: bloodGasPCO2_mmHg
(8/11) normalize_value: column: bloodGasPO2_mmHg
(9/11) normalize_value: column: bloodGasHCO3_mEqPerL
(10/11) normalize_value: column: bloodGasBaseDeficit_mEqPerL
flatten_index: flatten_ids: ['bloodGasTimeSlot_min'] unique_id_map: {np.int64(0): np.int64(0), np.int64(4): np.int64(4), np.int64(8): np.int64(8), np.int64(12): np.int64(12), np.int64(24): np.int64(24), np.int64(48): np.int64(48), np.int64(72): np.int64(72), np.int64(96): np.int64(96)} the_type: int64


## 02-05-hematology

In [60]:
_FLATTEN_IDS = ['hematologyTimeSlot_min']
base_filename = '02-05-hematology.csv'

In [61]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

# XXX hematology in LH is counted as kcPermuL
cbc_columns = [
    'hematologyWBC_cPermuL',
    'hematologyPolymorphNeutrophils_cPermuL',
    'hematologyMonocytes_cPermuL',
    'hematologyLymphocytes_cPermuL',
    'hematologyPlatelet_cPermuL'
]
for column in cbc_columns:
    df.loc[:, column] = df[column] * 1000

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/14) normalize_value: column: center
(1/14) normalize_value: column: subjectID
(2/14) normalize_value: column: uniqueID
(3/14) normalize_value: column: hematologyTimeSlot_min
(4/14) normalize_value: column: hematologyDate
(5/14) normalize_value: column: hematologyTime
(6/14) normalize_value: column: hematologyWBC_cPermuL
(7/14) normalize_value: column: hematologyHemoglobin_gPerdL
(8/14) normalize_value: column: hematologyPolymorphNeutrophils_cPermuL
(9/14) normalize_value: column: hematologyMonocytes_cPermuL
(10/14) normalize_value: column: hematologyLymphocytes_cPermuL
(11/14) normalize_value: column: hematologyPlatelet_cPermuL
(12/14) normalize_value: column: hematologyPT_s
(13/14) normalize_value: column: hematologyPTT_s
flatten_index: flatten_ids: ['hematologyTimeSlot_min'] unique_id_map: {np.int64(0): np.int64(0), np.int64(24): np.int64(24), np.int64(48): np.int64(48), np.int64(72): np.int64(72), np.int64(96): np.int64(96)} the_type: int64


In [62]:
COMBINE_harmonizer.column_info(df)

(0/29) center: (763/0)
(1/29) center.orig: (763/0)
(2/29) subjectID: (763/0)
(3/29) subjectID.orig: (763/0)
(4/29) uniqueID: (763/0)
(5/29) uniqueID.orig: (763/0)
(6/29) _flatten_index: (763/0)
(7/29) hematologyTimeSlot_min: (763/0)
(8/29) hematologyTimeSlot_min.orig: (763/0)
(9/29) hematologyDate: (763/0)
(10/29) hematologyDate.orig: (625/138)
(11/29) hematologyTime: (763/0)
(12/29) hematologyTime.orig: (564/199)
(13/29) hematologyWBC_cPermuL: (763/0)
(14/29) hematologyWBC_cPermuL.orig: (454/309)
(15/29) hematologyHemoglobin_gPerdL: (763/0)
(16/29) hematologyHemoglobin_gPerdL.orig: (475/288)
(17/29) hematologyPolymorphNeutrophils_cPermuL: (763/0)
(18/29) hematologyPolymorphNeutrophils_cPermuL.orig: (400/363)
(19/29) hematologyMonocytes_cPermuL: (763/0)
(20/29) hematologyMonocytes_cPermuL.orig: (399/364)
(21/29) hematologyLymphocytes_cPermuL: (763/0)
(22/29) hematologyLymphocytes_cPermuL.orig: (405/358)
(23/29) hematologyPlatelet_cPermuL: (763/0)
(24/29) hematologyPlatelet_cPermuL.orig

## 02-05_s-hematology

In [63]:
base_filename = '02-05_s-hematology.csv'

In [64]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

# XXX hematology in LH is counted as kcPermuL
cbc_columns = [
    'hematologyPlateletMin_cPermuL'
]
for column in cbc_columns:
    df.loc[:, column] = df[column] * 1000

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/5) normalize_value: column: center
(1/5) normalize_value: column: subjectID
(2/5) normalize_value: column: uniqueID
(3/5) normalize_value: column: hematologyHematocritMin
(4/5) normalize_value: column: hematologyPlateletMin_cPermuL


In [65]:
COMBINE_harmonizer.column_info(df)

(0/10) center: (168/0)
(1/10) center.orig: (168/0)
(2/10) subjectID: (168/0)
(3/10) subjectID.orig: (168/0)
(4/10) uniqueID: (168/0)
(5/10) uniqueID.orig: (168/0)
(6/10) hematologyHematocritMin: (168/0)
(7/10) hematologyHematocritMin.orig: (163/5)
(8/10) hematologyPlateletMin_cPermuL: (168/0)
(9/10) hematologyPlateletMin_cPermuL.orig: (161/7)


## 02-06_s-blood-value

In [66]:
base_filename = '02-06_s-blood-value.csv'

In [67]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/39) normalize_value: column: center
(1/39) normalize_value: column: subjectID
(2/39) normalize_value: column: uniqueID
(3/39) normalize_value: column: bloodValuePHMin
(4/39) normalize_value: column: bloodValuePHMinDate
(5/39) normalize_value: column: bloodValuePHMax
(6/39) normalize_value: column: bloodValuePHMaxDate
(7/39) normalize_value: column: bloodValueHCO3Min_mEqPerL
(8/39) normalize_value: column: bloodValueHCO3Min_mEqPerLDate
(9/39) normalize_value: column: bloodValueBaseDeficitMax_mEqPerL
(10/39) normalize_value: column: bloodValueBaseDeficitMax_mEqPerLDate
(11/39) normalize_value: column: bloodValueSerumNaMin_mEqPerL
(12/39) normalize_value: column: bloodValueSerumNaMin_mEqPerLDate
(13/39) normalize_value: column: bloodValueSerumNaMax_mEqPerL
(14/39) normalize_value: column: bloodValueSerumNaMax_mEqPerLDate
(15/39) normalize_value: column: bloodValueSerumKMin_mEqPerL
(16/39) normalize_value: column: bloodValueSerumKMin_mEqPerLDate
(17/39) normalize_value: column: bloodVal

In [68]:
COMBINE_harmonizer.column_info(df)

(0/78) center: (168/0)
(1/78) center.orig: (168/0)
(2/78) subjectID: (168/0)
(3/78) subjectID.orig: (168/0)
(4/78) uniqueID: (168/0)
(5/78) uniqueID.orig: (168/0)
(6/78) bloodValuePHMin: (168/0)
(7/78) bloodValuePHMin.orig: (156/12)
(8/78) bloodValuePHMinDate: (168/0)
(9/78) bloodValuePHMinDate.orig: (156/12)
(10/78) bloodValueHCO3Min_mEqPerL: (168/0)
(11/78) bloodValueHCO3Min_mEqPerL.orig: (155/13)
(12/78) bloodValueHCO3Min_mEqPerLDate: (168/0)
(13/78) bloodValueHCO3Min_mEqPerLDate.orig: (155/13)
(14/78) bloodValueSerumNaMin_mEqPerL: (168/0)
(15/78) bloodValueSerumNaMin_mEqPerL.orig: (168/0)
(16/78) bloodValueSerumNaMin_mEqPerLDate: (168/0)
(17/78) bloodValueSerumNaMin_mEqPerLDate.orig: (168/0)
(18/78) bloodValueSerumKMin_mEqPerL: (168/0)
(19/78) bloodValueSerumKMin_mEqPerL.orig: (168/0)
(20/78) bloodValueSerumKMin_mEqPerLDate: (168/0)
(21/78) bloodValueSerumKMin_mEqPerLDate.orig: (168/0)
(22/78) bloodValueClMin_mEqPerL: (168/0)
(23/78) bloodValueClMin_mEqPerL.orig: (166/2)
(24/78) bl

## 02-07-infection

In [69]:
_FLATTEN_IDS = ['positiveCultureNumber']
base_filename = '02-07-infection.csv'

In [70]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/15) normalize_value: column: center
(1/15) normalize_value: column: subjectID
(2/15) normalize_value: column: uniqueID
(3/15) normalize_value: column: positiveCulture
(4/15) normalize_value: column: antibiotics
(5/15) normalize_value: column: positiveCultureSrc
(6/15) normalize_value: column: positiveCultureDate
(7/15) normalize_value: column: positiveCultureTime
(8/15) normalize_value: column: positiveCultureOrganismCode1
(9/15) normalize_value: column: positiveCultureOrganismCode2
(10/15) normalize_value: column: positiveCultureOrganismCode3
(11/15) normalize_value: column: antibioticsCode1
(12/15) normalize_value: column: antibioticsCode2
(13/15) normalize_value: column: antibioticsCode3
(14/15) normalize_value: column: positiveCultureNumber
flatten_index: flatten_ids: ['positiveCultureNumber'] unique_id_map: {np.int64(1): np.int64(1)} the_type: int64


In [71]:
COMBINE_harmonizer.column_info(df)

(0/31) center: (168/0)
(1/31) center.orig: (168/0)
(2/31) subjectID: (168/0)
(3/31) subjectID.orig: (168/0)
(4/31) uniqueID: (168/0)
(5/31) uniqueID.orig: (168/0)
(6/31) _flatten_index: (168/0)
(7/31) positiveCultureNumber: (168/0)
(8/31) positiveCultureNumber.orig: (168/0)
(9/31) positiveCulture: (168/0)
(10/31) positiveCulture.orig: (163/5)
(11/31) positiveCultureSrc: (168/0)
(12/31) positiveCultureSrc.orig: (2/166)
(13/31) positiveCultureDate: (168/0)
(14/31) positiveCultureDate.orig: (2/166)
(15/31) positiveCultureTime: (168/0)
(16/31) positiveCultureTime.orig: (2/166)
(17/31) positiveCultureOrganismCode1: (168/0)
(18/31) positiveCultureOrganismCode1.orig: (2/166)
(19/31) positiveCultureOrganismCode2: (168/0)
(20/31) positiveCultureOrganismCode2.orig: (0/168)
(21/31) positiveCultureOrganismCode3: (168/0)
(22/31) positiveCultureOrganismCode3.orig: (0/168)
(23/31) antibiotics: (168/0)
(24/31) antibiotics.orig: (157/11)
(25/31) antibioticsCode1: (168/0)
(26/31) antibioticsCode1.orig: 

## 02-08-other-med

In [72]:
_FLATTEN_IDS = ['otherMedTimeSlot_min']
base_filename = '02-08-other-med.csv'

In [73]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/24) normalize_value: column: center
(1/24) normalize_value: column: subjectID
(2/24) normalize_value: column: uniqueID
(3/24) normalize_value: column: otherMedTimeSlot_min
(4/24) normalize_value: column: otherMedTargetDate
(5/24) normalize_value: column: otherMedTargetTime
(6/24) normalize_value: column: anticonvulsants
(7/24) normalize_value: column: anticonvulsants1
(8/24) normalize_value: column: anticonvulsants2
(9/24) normalize_value: column: anticonvulsants3
(10/24) normalize_value: column: analgesicsSedatives
(11/24) normalize_value: column: analgesicsSedatives1
(12/24) normalize_value: column: analgesicsSedatives2
(13/24) normalize_value: column: analgesicsSedatives3
(14/24) normalize_value: column: antipyretics
(15/24) normalize_value: column: antipyretics1
(16/24) normalize_value: column: antipyretics2
(17/24) normalize_value: column: antipyretics3
(18/24) normalize_value: column: paralytics
(19/24) normalize_value: column: paralytics1
(20/24) normalize_value: column: para

In [74]:
# XXX hack for _flatten_index: 1 as 0
is_flatten_index_1 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 1
df.loc[is_flatten_index_1, COMBINE_harmonizer.FLATTEN_INDEX] = 0

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([ 0 24 48 72 96] / int64)


In [75]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [76]:
COMBINE_harmonizer.column_info(df)

(0/49) center: (778/0)
(1/49) center.orig: (778/0)
(2/49) subjectID: (778/0)
(3/49) subjectID.orig: (778/0)
(4/49) uniqueID: (778/0)
(5/49) uniqueID.orig: (778/0)
(6/49) _flatten_index: (778/0)
(7/49) otherMedTimeSlot_min: (778/0)
(8/49) otherMedTimeSlot_min.orig: (778/0)
(9/49) otherMedTargetDate: (778/0)
(10/49) otherMedTargetDate.orig: (25/753)
(11/49) otherMedTargetTime: (778/0)
(12/49) otherMedTargetTime.orig: (25/753)
(13/49) anticonvulsants: (778/0)
(14/49) anticonvulsants.orig: (5/773)
(15/49) anticonvulsants1: (778/0)
(16/49) anticonvulsants1.orig: (421/357)
(17/49) anticonvulsants2: (778/0)
(18/49) anticonvulsants2.orig: (68/710)
(19/49) anticonvulsants3: (778/0)
(20/49) anticonvulsants3.orig: (10/768)
(21/49) analgesicsSedatives: (778/0)
(22/49) analgesicsSedatives.orig: (6/772)
(23/49) analgesicsSedatives1: (778/0)
(24/49) analgesicsSedatives1.orig: (231/547)
(25/49) analgesicsSedatives2: (778/0)
(26/49) analgesicsSedatives2.orig: (76/702)
(27/49) analgesicsSedatives3: (778

## 02-09-imaging

In [77]:
_FLATTEN_IDS = ['imagingNumber']
base_filename = '02-09-imaging.csv'

In [78]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

(0/40) normalize_value: column: center
(1/40) normalize_value: column: subjectID
(2/40) normalize_value: column: uniqueID
(3/40) normalize_value: column: headSonogram
(4/40) normalize_value: column: headSonogramResultText
(5/40) normalize_value: column: headCT
(6/40) normalize_value: column: headCTResultText
(7/40) normalize_value: column: brainMRI
(8/40) normalize_value: column: brainMRIResultText
(9/40) normalize_value: column: imagingNumber
(10/40) normalize_value: column: headSonogramDate
(11/40) normalize_value: column: headSonogramTime
(12/40) normalize_value: column: headSonogramResult1
(13/40) normalize_value: column: headSonogramResult2
(14/40) normalize_value: column: headSonogramResult3
(15/40) normalize_value: column: headSonogramResult4
(16/40) normalize_value: column: headSonogramResult5
(17/40) normalize_value: column: headSonogramResult6
(18/40) normalize_value: column: headSonogramResult7
(19/40) normalize_value: column: headSonogramResult8
(20/40) normalize_value: col

In [79]:
# XXX hack for _flatten_index: 2 as 1
is_flatten_index_2 = df[COMBINE_harmonizer.FLATTEN_INDEX] == 2
df.loc[is_flatten_index_2, COMBINE_harmonizer.FLATTEN_INDEX] = 1

print(f"_flatten_index: ({df[COMBINE_harmonizer.FLATTEN_INDEX].unique()} / {df[COMBINE_harmonizer.FLATTEN_INDEX].dtype})")

_flatten_index: ([1] / int64)


In [80]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [81]:
COMBINE_harmonizer.column_info(df)

(0/81) center: (168/0)
(1/81) center.orig: (168/0)
(2/81) subjectID: (168/0)
(3/81) subjectID.orig: (168/0)
(4/81) uniqueID: (168/0)
(5/81) uniqueID.orig: (168/0)
(6/81) _flatten_index: (168/0)
(7/81) imagingNumber: (168/0)
(8/81) imagingNumber.orig: (168/0)
(9/81) headSonogram: (168/0)
(10/81) headSonogram.orig: (168/0)
(11/81) headSonogramDate: (168/0)
(12/81) headSonogramDate.orig: (79/89)
(13/81) headSonogramTime: (168/0)
(14/81) headSonogramTime.orig: (79/89)
(15/81) headSonogramResult1: (168/0)
(16/81) headSonogramResult1.orig: (79/89)
(17/81) headSonogramResult2: (168/0)
(18/81) headSonogramResult2.orig: (11/157)
(19/81) headSonogramResult3: (168/0)
(20/81) headSonogramResult3.orig: (3/165)
(21/81) headSonogramResult4: (168/0)
(22/81) headSonogramResult4.orig: (1/167)
(23/81) headSonogramResult5: (168/0)
(24/81) headSonogramResult5.orig: (0/168)
(25/81) headSonogramResult6: (168/0)
(26/81) headSonogramResult6.orig: (0/168)
(27/81) headSonogramResult7: (168/0)
(28/81) headSonogra

## 03-01-post-intervention-temperature

In [82]:
_FLATTEN_IDS = ['post_TemperatureTimeSlot_day']
base_filename = '03-01-post-temperature.csv'

In [83]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/8) normalize_value: column: center
(1/8) normalize_value: column: subjectID
(2/8) normalize_value: column: uniqueID
(3/8) normalize_value: column: post_TemperatureTimeSlot_day
(4/8) normalize_value: column: post_TemperatureDate
(5/8) normalize_value: column: post_TemperatureTime
(6/8) normalize_value: column: post_SkinTemperature_C
(7/8) normalize_value: column: post_AxillaryTemperature_C
flatten_index: flatten_ids: ['post_TemperatureTimeSlot_day'] unique_id_map: {np.int64(5): np.int64(5), np.int64(6): np.int64(6), np.int64(7): np.int64(7)} the_type: int64


In [84]:
COMBINE_harmonizer.column_info(df)

(0/17) center: (450/0)
(1/17) center.orig: (450/0)
(2/17) subjectID: (450/0)
(3/17) subjectID.orig: (450/0)
(4/17) uniqueID: (450/0)
(5/17) uniqueID.orig: (450/0)
(6/17) _flatten_index: (450/0)
(7/17) post_TemperatureTimeSlot_day: (450/0)
(8/17) post_TemperatureTimeSlot_day.orig: (450/0)
(9/17) post_TemperatureDate: (450/0)
(10/17) post_TemperatureDate.orig: (437/13)
(11/17) post_TemperatureTime: (450/0)
(12/17) post_TemperatureTime.orig: (432/18)
(13/17) post_SkinTemperature_C: (450/0)
(14/17) post_SkinTemperature_C.orig: (229/221)
(15/17) post_AxillaryTemperature_C: (450/0)
(16/17) post_AxillaryTemperature_C.orig: (417/33)


## 03-03-post-intervention-imaging

In [85]:
base_filename = '03-03-post-imaging.csv'

In [86]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/39) normalize_value: column: center
(1/39) normalize_value: column: subjectID
(2/39) normalize_value: column: uniqueID
(3/39) normalize_value: column: post_HeadSonogram
(4/39) normalize_value: column: post_HeadSonogramResultText
(5/39) normalize_value: column: post_HeadCT
(6/39) normalize_value: column: post_HeadCTResultText
(7/39) normalize_value: column: post_BrainMRI
(8/39) normalize_value: column: post_BrainMRIResultText
(9/39) normalize_value: column: post_HeadSonogramDate
(10/39) normalize_value: column: post_HeadSonogramTime
(11/39) normalize_value: column: post_HeadSonogramResult1
(12/39) normalize_value: column: post_HeadSonogramResult2
(13/39) normalize_value: column: post_HeadSonogramResult3
(14/39) normalize_value: column: post_HeadSonogramResult4
(15/39) normalize_value: column: post_HeadSonogramResult5
(16/39) normalize_value: column: post_HeadSonogramResult6
(17/39) normalize_value: column: post_HeadSonogramResult7
(18/39) normalize_value: column: post_HeadSonogramRes

In [87]:
COMBINE_harmonizer.column_info(df)


(0/78) center: (168/0)
(1/78) center.orig: (168/0)
(2/78) subjectID: (168/0)
(3/78) subjectID.orig: (168/0)
(4/78) uniqueID: (168/0)
(5/78) uniqueID.orig: (168/0)
(6/78) post_HeadSonogram: (168/0)
(7/78) post_HeadSonogram.orig: (168/0)
(8/78) post_HeadSonogramDate: (168/0)
(9/78) post_HeadSonogramDate.orig: (14/154)
(10/78) post_HeadSonogramTime: (168/0)
(11/78) post_HeadSonogramTime.orig: (14/154)
(12/78) post_HeadSonogramResult1: (168/0)
(13/78) post_HeadSonogramResult1.orig: (14/154)
(14/78) post_HeadSonogramResult2: (168/0)
(15/78) post_HeadSonogramResult2.orig: (3/165)
(16/78) post_HeadSonogramResult3: (168/0)
(17/78) post_HeadSonogramResult3.orig: (1/167)
(18/78) post_HeadSonogramResult4: (168/0)
(19/78) post_HeadSonogramResult4.orig: (0/168)
(20/78) post_HeadSonogramResult5: (168/0)
(21/78) post_HeadSonogramResult5.orig: (0/168)
(22/78) post_HeadSonogramResult6: (168/0)
(23/78) post_HeadSonogramResult6.orig: (0/168)
(24/78) post_HeadSonogramResult7: (168/0)
(25/78) post_HeadSono

## 03-04-post-intervention-neuro-exam

In [88]:
base_filename = '03-04-post-neuro-exam.csv'

In [89]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/21) normalize_value: column: center
(1/21) normalize_value: column: subjectID
(2/21) normalize_value: column: uniqueID
(3/21) normalize_value: column: post_NeuroExamSeizure
(4/21) normalize_value: column: post_NeuroExamSedate
(5/21) normalize_value: column: post_NeuroExamHypertonia
(6/21) normalize_value: column: post_NeuroExamClonusSustained
(7/21) normalize_value: column: post_NeuroExamFistedHand
(8/21) normalize_value: column: post_NeuroExamAbnormalMovement
(9/21) normalize_value: column: post_NeuroExamGagReflexAbsent
(10/21) normalize_value: column: post_NeuroExamDate
(11/21) normalize_value: column: post_NeuroExamTime
(12/21) normalize_value: column: post_NeuroExamLevelConsciousness
(13/21) normalize_value: column: post_NeuroExamSpontaneousActivity
(14/21) normalize_value: column: post_NeuroExamPosture
(15/21) normalize_value: column: post_NeuroExamTone
(16/21) normalize_value: column: post_NeuroExamSuck
(17/21) normalize_value: column: post_NeuroExamMoro
(18/21) normalize_valu

In [90]:
COMBINE_harmonizer.column_info(df)

(0/42) center: (163/0)
(1/42) center.orig: (163/0)
(2/42) subjectID: (163/0)
(3/42) subjectID.orig: (163/0)
(4/42) uniqueID: (163/0)
(5/42) uniqueID.orig: (163/0)
(6/42) post_NeuroExamDate: (163/0)
(7/42) post_NeuroExamDate.orig: (152/11)
(8/42) post_NeuroExamTime: (163/0)
(9/42) post_NeuroExamTime.orig: (150/13)
(10/42) post_NeuroExamLevelConsciousness: (163/0)
(11/42) post_NeuroExamLevelConsciousness.orig: (152/11)
(12/42) post_NeuroExamSpontaneousActivity: (163/0)
(13/42) post_NeuroExamSpontaneousActivity.orig: (152/11)
(14/42) post_NeuroExamPosture: (163/0)
(15/42) post_NeuroExamPosture.orig: (152/11)
(16/42) post_NeuroExamTone: (163/0)
(17/42) post_NeuroExamTone.orig: (124/39)
(18/42) post_NeuroExamSuck: (163/0)
(19/42) post_NeuroExamSuck.orig: (149/14)
(20/42) post_NeuroExamMoro: (163/0)
(21/42) post_NeuroExamMoro.orig: (147/16)
(22/42) post_NeuroExamPupils: (163/0)
(23/42) post_NeuroExamPupils.orig: (151/12)
(24/42) post_NeuroExamHeartRate: (163/0)
(25/42) post_NeuroExamHeartRat

## 03-05-mri

In [91]:
_FLATTEN_IDS = ['MRIReader']
base_filename = '03-05-mri.csv'

In [92]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)


(0/92) normalize_value: column: center
(1/92) normalize_value: column: subjectID
(2/92) normalize_value: column: uniqueID
(3/92) normalize_value: column: MRIReader
(4/92) normalize_value: column: MRIReadDate
(5/92) normalize_value: column: MRIDate
(6/92) normalize_value: column: MRIStrength_T
(7/92) normalize_value: column: MRIStrength_c
(8/92) normalize_value: column: MRIAdequateQuality
(9/92) normalize_value: column: MRIAdequateQuality_c
(10/92) normalize_value: column: MRIT1Axial
(11/92) normalize_value: column: MRIT1Axial_c
(12/92) normalize_value: column: MRIT1Coronal
(13/92) normalize_value: column: MRIT1Coronal_c
(14/92) normalize_value: column: MRIT1Sagittal
(15/92) normalize_value: column: MRIT1Sagittal_c
(16/92) normalize_value: column: MRIT2Axial
(17/92) normalize_value: column: MRIT2Axial_c
(18/92) normalize_value: column: MRIT2Coronal
(19/92) normalize_value: column: MRIT2Coronal_c
(20/92) normalize_value: column: MRIT2Sagittal
(21/92) normalize_value: column: MRIT2Sagitta

flatten_index: flatten_ids: ['MRIReader'] unique_id_map: {'1': '1', '2': '2'} the_type: object
[WARN] not in order_map: subjectID_with_postfix
[WARN] not in order_map: subjectID_postfix
[WARN] not in order_map: subjectID_with_postfix
[WARN] not in order_map: subjectID_postfix


In [93]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [94]:
COMBINE_harmonizer.column_info(df)

(0/185) center: (244/0)
(1/185) center.orig: (244/0)
(2/185) subjectID: (244/0)
(3/185) subjectID.orig: (244/0)
(4/185) uniqueID: (244/0)
(5/185) uniqueID.orig: (244/0)
(6/185) MRI_ID: (244/0)
(7/185) MRI_ID.orig: (244/0)
(8/185) _flatten_index: (244/0)
(9/185) MRIID: (244/0)
(10/185) MRIID.orig: (244/0)
(11/185) MRIReader: (244/0)
(12/185) MRIReader.orig: (244/0)
(13/185) MRIReadDate: (244/0)
(14/185) MRIReadDate.orig: (244/0)
(15/185) MRIStrength_T: (244/0)
(16/185) MRIStrength_T.orig: (244/0)
(17/185) MRIStrength_c: (244/0)
(18/185) MRIStrength_c.orig: (114/130)
(19/185) MRIAdequateQuality: (244/0)
(20/185) MRIAdequateQuality.orig: (239/5)
(21/185) MRIAdequateQuality_c: (244/0)
(22/185) MRIAdequateQuality_c.orig: (244/0)
(23/185) MRIT1Axial: (244/0)
(24/185) MRIT1Axial.orig: (244/0)
(25/185) MRIT1Axial_c: (244/0)
(26/185) MRIT1Axial_c.orig: (172/72)
(27/185) MRIT1Coronal: (244/0)
(28/185) MRIT1Coronal.orig: (244/0)
(29/185) MRIT1Coronal_c: (244/0)
(30/185) MRIT1Coronal_c.orig: (47/1

## 03-05_s-mri

In [95]:
base_filename = '03-05_s-mri.csv'

In [96]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

(0/18) normalize_value: column: center
(1/18) normalize_value: column: subjectID
(2/18) normalize_value: column: uniqueID
(3/18) normalize_value: column: MRIAvailable
(4/18) normalize_value: column: MRIObtain
(5/18) normalize_value: column: MRINoObtainReason
[WARN] unable to float: val (no arrangement with satellite site/<class 'str'>) e: could not convert string to float: 'no arrangement with satellite site'
[WARN] unable to get value: sheet_name: MRINoObtainReason value: no arrangement with satellite site value_float: no arrangement with satellite site value_int: no arrangement with satellite site
(6/18) normalize_value: column: MRIDate
(7/18) normalize_value: column: MRITime
(8/18) normalize_value: column: MRISendRTIDate
(9/18) normalize_value: column: MRIRead
(10/18) normalize_value: column: MRIScore
[WARN] unable to float: val (1A/<class 'str'>) e: could not convert string to float: '1A'
[WARN] unable to get value: sheet_name: MRINRNPatternOfInjury value: 1A value_float: 1A value_

In [97]:
out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

In [98]:
COMBINE_harmonizer.column_info(df)

(0/36) center: (168/0)
(1/36) center.orig: (168/0)
(2/36) subjectID: (168/0)
(3/36) subjectID.orig: (168/0)
(4/36) uniqueID: (168/0)
(5/36) uniqueID.orig: (168/0)
(6/36) MRIAvailable: (168/0)
(7/36) MRIAvailable.orig: (150/18)
(8/36) MRIObtain: (168/0)
(9/36) MRIObtain.orig: (148/20)
(10/36) MRISendRTIDate: (168/0)
(11/36) MRISendRTIDate.orig: (146/22)
(12/36) MRINoObtainReason: (168/0)
(13/36) MRINoObtainReason.orig: (1/167)
(14/36) MRIRead: (168/0)
(15/36) MRIRead.orig: (147/21)
(16/36) MRIScore: (168/0)
(17/36) MRIScore.orig: (147/21)
(18/36) MRI2LevelPatternOfInjury: (168/0)
(19/36) MRI2LevelPatternOfInjury.orig: (128/40)
(20/36) MRIAge_day: (168/0)
(21/36) MRIAge_day.orig: (128/40)
(22/36) MRINRNPatternOfInjury: (168/0)
(23/36) MRINRNPatternOfInjury.orig: (128/40)
(24/36) MRINotDone: (168/0)
(25/36) MRINotDone.orig: (21/147)
(26/36) MRIUnread: (168/0)
(27/36) MRIUnread.orig: (19/149)
(28/36) MRIDate: (168/0)
(29/36) MRIDate.orig: (147/21)
(30/36) MRITime: (168/0)
(31/36) MRITime.o

## 02-11-elevated-temperature

In [99]:
_FLATTEN_IDS = ['elevatedTempNumber', 'elevatedTempMin']
base_filename = '02-11-elevated-temperature.csv'

In [100]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/17) normalize_value: column: center
(1/17) normalize_value: column: subjectID
(2/17) normalize_value: column: uniqueID
(3/17) normalize_value: column: elevatedTempBath
(4/17) normalize_value: column: elevatedTempBlanketrol
(5/17) normalize_value: column: elevatedTempNumber
(6/17) normalize_value: column: elevatedTempMin
(7/17) normalize_value: column: elevatedTempDate
(8/17) normalize_value: column: elevatedTempTime
(9/17) normalize_value: column: elevatedTempSkinTemperature_C
(10/17) normalize_value: column: elevatedTempAxillaryTemperature_C
(11/17) normalize_value: column: elevatedTempEsophagealTemperature_C
(12/17) normalize_value: column: elevatedTempServoSet_C
(13/17) normalize_value: column: elevatedTempDevice
(14/17) normalize_value: column: elevatedTempDeviceMode
(15/17) normalize_value: column: elevatedTempAirTemperature_C
(16/17) normalize_value: column: elevatedTempNoBathReason
flatten_index: flatten_ids: ['elevatedTempNumber', 'elevatedTempMin'] unique_id_map: {'1@0': '1

In [101]:
COMBINE_harmonizer.column_info(df)

(0/35) center: (445/0)
(1/35) center.orig: (445/0)
(2/35) subjectID: (445/0)
(3/35) subjectID.orig: (445/0)
(4/35) uniqueID: (445/0)
(5/35) uniqueID.orig: (445/0)
(6/35) _flatten_index: (445/0)
(7/35) elevatedTempNumber: (445/0)
(8/35) elevatedTempNumber.orig: (445/0)
(9/35) elevatedTempMin: (445/0)
(10/35) elevatedTempMin.orig: (445/0)
(11/35) elevatedTempDate: (445/0)
(12/35) elevatedTempDate.orig: (407/38)
(13/35) elevatedTempTime: (445/0)
(14/35) elevatedTempTime.orig: (396/49)
(15/35) elevatedTempSkinTemperature_C: (445/0)
(16/35) elevatedTempSkinTemperature_C.orig: (265/180)
(17/35) elevatedTempAxillaryTemperature_C: (445/0)
(18/35) elevatedTempAxillaryTemperature_C.orig: (259/186)
(19/35) elevatedTempEsophagealTemperature_C: (445/0)
(20/35) elevatedTempEsophagealTemperature_C.orig: (299/146)
(21/35) elevatedTempServoSet_C: (445/0)
(22/35) elevatedTempServoSet_C.orig: (225/220)
(23/35) elevatedTempDevice: (445/0)
(24/35) elevatedTempDevice.orig: (334/111)
(25/35) elevatedTempDevi

## 02-12-fluctuated-temperature

In [102]:
_FLATTEN_IDS = ['fluctuateTempNumber', 'fluctuateTempMin']
base_filename = '02-12-fluctuated-temperature.csv'

In [103]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: fluctuateTempNumber
(4/12) normalize_value: column: fluctuateTempMin
(5/12) normalize_value: column: fluctuateTempDate
(6/12) normalize_value: column: fluctuateTempTime
(7/12) normalize_value: column: fluctuateTempSkinTemperature_C
(8/12) normalize_value: column: fluctuateTempAxillaryTemperature_C
(9/12) normalize_value: column: fluctuateTempEsophagealTemperature_C
(10/12) normalize_value: column: fluctuateTempBlanketrol_C
(11/12) normalize_value: column: fluctuateTempServoSet_C
flatten_index: flatten_ids: ['fluctuateTempNumber', 'fluctuateTempMin'] unique_id_map: {'1@10': '1@10', '1@20': '1@20', '1@30': '1@30', '1@40': '1@40', '1@50': '1@50', '1@60': '1@60', '2@10': '2@10', '2@20': '2@20', '2@30': '2@30', '2@40': '2@40', '2@50': '2@50', '2@60': '2@60', '3@10': '3@10', '3@20': '3@20', '3@30': '3@30', '3@40': '3@40', '3@50': '3@50', '3

## 02-14-adverse-event

In [104]:
_FLATTEN_IDS = ['adverseEventNumber']
base_filename = '02-14-adverse-event.csv'


In [105]:
filename = os.sep.join([input_dir, base_filename])

df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/56) normalize_value: column: center
(1/56) normalize_value: column: subjectID
(2/56) normalize_value: column: uniqueID
(3/56) normalize_value: column: SAECardiacExperienceComment
(4/56) normalize_value: column: SAEMetabolicAcidosisComment
(5/56) normalize_value: column: SAEThrombosisExperienceComment
(6/56) normalize_value: column: SAEBleedingExperienceComment
(7/56) normalize_value: column: SAEAlterationSkinIntegrity
(8/56) normalize_value: column: SAEAlterationSkinIntegrityComment
(9/56) normalize_value: column: SAEDeathComment
(10/56) normalize_value: column: adverseEventNumber
(11/56) normalize_value: column: SAECardiacExperienceOnsetDate
(12/56) normalize_value: column: SAECardiacExperienceOnsetTime
(13/56) normalize_value: column: SAECardiacExperienceResolveDate
(14/56) normalize_value: column: SAECardiacExperienceResolveTime
(15/56) normalize_value: column: SAECardiacExperienceActionTaken
(16/56) normalize_value: column: SAECardiacExperienceOutcome
(17/56) normalize_value: co

In [106]:
COMBINE_harmonizer.column_info(df)

(0/113) center: (18/0)
(1/113) center.orig: (18/0)
(2/113) subjectID: (18/0)
(3/113) subjectID.orig: (18/0)
(4/113) uniqueID: (18/0)
(5/113) uniqueID.orig: (18/0)
(6/113) _flatten_index: (18/0)
(7/113) adverseEventNumber: (18/0)
(8/113) adverseEventNumber.orig: (18/0)
(9/113) SAECardiacExperienceOnsetDate: (18/0)
(10/113) SAECardiacExperienceOnsetDate.orig: (0/18)
(11/113) SAECardiacExperienceOnsetTime: (18/0)
(12/113) SAECardiacExperienceOnsetTime.orig: (0/18)
(13/113) SAECardiacExperienceResolveDate: (18/0)
(14/113) SAECardiacExperienceResolveDate.orig: (0/18)
(15/113) SAECardiacExperienceResolveTime: (18/0)
(16/113) SAECardiacExperienceResolveTime.orig: (0/18)
(17/113) SAECardiacExperienceDueToHypothermia: (18/0)
(18/113) SAECardiacExperienceDueToHypothermia.orig: (0/18)
(19/113) SAECardiacExperienceActionTaken: (18/0)
(20/113) SAECardiacExperienceActionTaken.orig: (0/18)
(21/113) SAECardiacExperienceOutcome: (18/0)
(22/113) SAECardiacExperienceOutcome.orig: (0/18)
(23/113) SAECardi

## 02-15-violation

In [107]:
_FLATTEN_IDS = ['violationNumber']
base_filename = '02-15-violation.csv'

In [108]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/10) normalize_value: column: center
(1/10) normalize_value: column: subjectID
(2/10) normalize_value: column: uniqueID
(3/10) normalize_value: column: violationOtherText
(4/10) normalize_value: column: violationOtherCirumstanceText
(5/10) normalize_value: column: violationComment
(6/10) normalize_value: column: violationNumber
(7/10) normalize_value: column: violationDate
(8/10) normalize_value: column: violationNature
(9/10) normalize_value: column: violationCircumstance
flatten_index: flatten_ids: ['violationNumber'] unique_id_map: {np.int64(1): np.int64(1), np.int64(2): np.int64(2), np.int64(3): np.int64(3)} the_type: int64


In [109]:
COMBINE_harmonizer.column_info(df)

(0/21) center: (52/0)
(1/21) center.orig: (52/0)
(2/21) subjectID: (52/0)
(3/21) subjectID.orig: (52/0)
(4/21) uniqueID: (52/0)
(5/21) uniqueID.orig: (52/0)
(6/21) _flatten_index: (52/0)
(7/21) violationNumber: (52/0)
(8/21) violationNumber.orig: (52/0)
(9/21) violationDate: (52/0)
(10/21) violationDate.orig: (52/0)
(11/21) violationNature: (52/0)
(12/21) violationNature.orig: (52/0)
(13/21) violationOtherText: (52/0)
(14/21) violationOtherText.orig: (36/16)
(15/21) violationCircumstance: (52/0)
(16/21) violationCircumstance.orig: (52/0)
(17/21) violationOtherCirumstanceText: (52/0)
(18/21) violationOtherCirumstanceText.orig: (41/11)
(19/21) violationComment: (52/0)
(20/21) violationComment.orig: (34/18)


## 02-17-discontinue

In [110]:
base_filename = '02-17-discontinue.csv'

In [111]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/13) normalize_value: column: center
(1/13) normalize_value: column: subjectID
(2/13) normalize_value: column: uniqueID
(3/13) normalize_value: column: discontinueBeforeEndPeriod
(4/13) normalize_value: column: discontinueParentsWithdraw
(5/13) normalize_value: column: discontinuePhysicianWithdraw
(6/13) normalize_value: column: discontinueAdverseEvent
(7/13) normalize_value: column: discontinueECMO
(8/13) normalize_value: column: discontinueDeath
(9/13) normalize_value: column: discontinueOther
(10/13) normalize_value: column: discontinueOtherText
(11/13) normalize_value: column: discontinueDate
(12/13) normalize_value: column: discontinueTime


In [112]:
COMBINE_harmonizer.column_info(df)

(0/26) center: (168/0)
(1/26) center.orig: (168/0)
(2/26) subjectID: (168/0)
(3/26) subjectID.orig: (168/0)
(4/26) uniqueID: (168/0)
(5/26) uniqueID.orig: (168/0)
(6/26) discontinueDate: (168/0)
(7/26) discontinueDate.orig: (23/145)
(8/26) discontinueTime: (168/0)
(9/26) discontinueTime.orig: (23/145)
(10/26) discontinueBeforeEndPeriod: (168/0)
(11/26) discontinueBeforeEndPeriod.orig: (168/0)
(12/26) discontinueParentsWithdraw: (168/0)
(13/26) discontinueParentsWithdraw.orig: (23/145)
(14/26) discontinuePhysicianWithdraw: (168/0)
(15/26) discontinuePhysicianWithdraw.orig: (23/145)
(16/26) discontinueAdverseEvent: (168/0)
(17/26) discontinueAdverseEvent.orig: (23/145)
(18/26) discontinueECMO: (168/0)
(19/26) discontinueECMO.orig: (23/145)
(20/26) discontinueDeath: (168/0)
(21/26) discontinueDeath.orig: (23/145)
(22/26) discontinueOther: (168/0)
(23/26) discontinueOther.orig: (23/145)
(24/26) discontinueOtherText: (168/0)
(25/26) discontinueOtherText.orig: (9/159)


## 04-16-wdraw-support

In [113]:
base_filename = '04-16-wdraw-support.csv'

In [114]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/16) normalize_value: column: center
(1/16) normalize_value: column: subjectID
(2/16) normalize_value: column: uniqueID
(3/16) normalize_value: column: wdrawSupportDiscussedWithFamily
(4/16) normalize_value: column: wdrawSupportNeurologicalExam
(5/16) normalize_value: column: wdrawSupportImagingStudy
(6/16) normalize_value: column: wdrawSupportEEGFinding
(7/16) normalize_value: column: wdrawSupportMultisystemOrganFailureOtherThanCNS
(8/16) normalize_value: column: wdrawSupportBrainBloodFlowScan
(9/16) normalize_value: column: wdrawSupportParentWish
(10/16) normalize_value: column: wdrawSupportOther
(11/16) normalize_value: column: wdrawSupportOtherText
(12/16) normalize_value: column: wdrawSupport
(13/16) normalize_value: column: wdrawSupportRecommendSolelyByClinicalTeam
(14/16) normalize_value: column: wdrawSupportDate
(15/16) normalize_value: column: wdrawSupportTime


In [115]:
COMBINE_harmonizer.column_info(df)

(0/32) center: (168/0)
(1/32) center.orig: (168/0)
(2/32) subjectID: (168/0)
(3/32) subjectID.orig: (168/0)
(4/32) uniqueID: (168/0)
(5/32) uniqueID.orig: (168/0)
(6/32) wdrawSupport: (168/0)
(7/32) wdrawSupport.orig: (19/149)
(8/32) wdrawSupportDate: (168/0)
(9/32) wdrawSupportDate.orig: (18/150)
(10/32) wdrawSupportTime: (168/0)
(11/32) wdrawSupportTime.orig: (18/150)
(12/32) wdrawSupportDiscussedWithFamily: (168/0)
(13/32) wdrawSupportDiscussedWithFamily.orig: (168/0)
(14/32) wdrawSupportRecommendSolelyByClinicalTeam: (168/0)
(15/32) wdrawSupportRecommendSolelyByClinicalTeam.orig: (19/149)
(16/32) wdrawSupportNeurologicalExam: (168/0)
(17/32) wdrawSupportNeurologicalExam.orig: (19/149)
(18/32) wdrawSupportImagingStudy: (168/0)
(19/32) wdrawSupportImagingStudy.orig: (19/149)
(20/32) wdrawSupportEEGFinding: (168/0)
(21/32) wdrawSupportEEGFinding.orig: (19/149)
(22/32) wdrawSupportMultisystemOrganFailureOtherThanCNS: (168/0)
(23/32) wdrawSupportMultisystemOrganFailureOtherThanCNS.orig:

## 04-17-limit-care

In [116]:
base_filename = '04-17-limit-care.csv'

In [117]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/14) normalize_value: column: center
(1/14) normalize_value: column: subjectID
(2/14) normalize_value: column: uniqueID
(3/14) normalize_value: column: limitCareDiscussedWithFamily
(4/14) normalize_value: column: limitCareAgreedByFamilyAndCareTeam
(5/14) normalize_value: column: limitCareNoFurtherMechanicalVentilationAndIntubation
(6/14) normalize_value: column: limitCareNoFurtherVentilationWithBagAndMask
(7/14) normalize_value: column: limitCareNoFurtherMedicationsToSupportBP
(8/14) normalize_value: column: limitCareNoFurtherChestCompression
(9/14) normalize_value: column: limitCareNoFurtherEmergencyMedication
(10/14) normalize_value: column: limitCareDNR
(11/14) normalize_value: column: limitCareRecommendSolelyByClinicalTeam
(12/14) normalize_value: column: limitCareDNRDate
(13/14) normalize_value: column: limitCareDNRTime


In [118]:
COMBINE_harmonizer.column_info(df)

(0/28) center: (168/0)
(1/28) center.orig: (168/0)
(2/28) subjectID: (168/0)
(3/28) subjectID.orig: (168/0)
(4/28) uniqueID: (168/0)
(5/28) uniqueID.orig: (168/0)
(6/28) limitCareDiscussedWithFamily: (168/0)
(7/28) limitCareDiscussedWithFamily.orig: (168/0)
(8/28) limitCareRecommendSolelyByClinicalTeam: (168/0)
(9/28) limitCareRecommendSolelyByClinicalTeam.orig: (14/154)
(10/28) limitCareAgreedByFamilyAndCareTeam: (168/0)
(11/28) limitCareAgreedByFamilyAndCareTeam.orig: (13/155)
(12/28) limitCareNoFurtherMechanicalVentilationAndIntubation: (168/0)
(13/28) limitCareNoFurtherMechanicalVentilationAndIntubation.orig: (13/155)
(14/28) limitCareNoFurtherVentilationWithBagAndMask: (168/0)
(15/28) limitCareNoFurtherVentilationWithBagAndMask.orig: (13/155)
(16/28) limitCareNoFurtherMedicationsToSupportBP: (168/0)
(17/28) limitCareNoFurtherMedicationsToSupportBP.orig: (13/155)
(18/28) limitCareNoFurtherChestCompression: (168/0)
(19/28) limitCareNoFurtherChestCompression.orig: (13/155)
(20/28) li

## 04-01-status

In [119]:
base_filename = '04-01-status.csv'

In [120]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/31) normalize_value: column: center
(1/31) normalize_value: column: subjectID
(2/31) normalize_value: column: uniqueID
(3/31) normalize_value: column: homeTherapyStatus
(4/31) normalize_value: column: homeTherapyVentilator
(5/31) normalize_value: column: homeTherapyOxygen
(6/31) normalize_value: column: homeTherapyGavageTubeFeed
(7/31) normalize_value: column: homeTherapyGastrostomyTubeFeed
(8/31) normalize_value: column: homeTherapyTemperatureBlanket
(9/31) normalize_value: column: homeTherapyAnticonvulsantMedication
(10/31) normalize_value: column: homeTherapyOther
(11/31) normalize_value: column: homeTherapyOtherText
(12/31) normalize_value: column: deathAutopsy
(13/31) normalize_value: column: deathCauseText
(14/31) normalize_value: column: status
(15/31) normalize_value: column: dischargeDate
(16/31) normalize_value: column: dischargeWeight_g
(17/31) normalize_value: column: dischargeLength_cm
(18/31) normalize_value: column: dischargeHeadCircumference_cm
(19/31) normalize_valu

In [121]:
COMBINE_harmonizer.column_info(df)

(0/62) center: (168/0)
(1/62) center.orig: (168/0)
(2/62) subjectID: (168/0)
(3/62) subjectID.orig: (168/0)
(4/62) uniqueID: (168/0)
(5/62) uniqueID.orig: (168/0)
(6/62) status: (168/0)
(7/62) status.orig: (168/0)
(8/62) statusDate: (168/0)
(9/62) statusDate.orig: (168/0)
(10/62) dischargeStatus: (168/0)
(11/62) dischargeStatus.orig: (168/0)
(12/62) dischargeDate: (168/0)
(13/62) dischargeDate.orig: (136/32)
(14/62) dischargeWeight_g: (168/0)
(15/62) dischargeWeight_g.orig: (136/32)
(16/62) dischargeLength_cm: (168/0)
(17/62) dischargeLength_cm.orig: (126/42)
(18/62) dischargeHeadCircumference_cm: (168/0)
(19/62) dischargeHeadCircumference_cm.orig: (128/40)
(20/62) transferReason: (168/0)
(21/62) transferReason.orig: (16/152)
(22/62) transferDate: (168/0)
(23/62) transferDate.orig: (16/152)
(24/62) transferWeight_g: (168/0)
(25/62) transferWeight_g.orig: (16/152)
(26/62) transferLength_cm: (168/0)
(27/62) transferLength_cm.orig: (13/155)
(28/62) transferHeadCircumference_cm: (168/0)
(2

## 04-12-neuro-exam

In [122]:
base_filename = '04-12-neuro-exam.csv'

In [123]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/20) normalize_value: column: center
(1/20) normalize_value: column: subjectID
(2/20) normalize_value: column: uniqueID
(3/20) normalize_value: column: dischargeNeuroExamSeizure
(4/20) normalize_value: column: dischargeNeuroExamHypertonia
(5/20) normalize_value: column: dischargeNeuroExamClonusSustained
(6/20) normalize_value: column: dischargeNeuroExamFistedHand
(7/20) normalize_value: column: dischargeNeuroExamAbnormalMovement
(8/20) normalize_value: column: dischargeNeuroExamGagReflexAbsent
(9/20) normalize_value: column: dischargeNeuroExamDate
(10/20) normalize_value: column: dischargeNeuroExamTime
(11/20) normalize_value: column: dischargeNeuroExamLevelConsciousness
(12/20) normalize_value: column: dischargeNeuroExamSpontaneousActivity
(13/20) normalize_value: column: dischargeNeuroExamPosture
(14/20) normalize_value: column: dischargeNeuroExamTone
(15/20) normalize_value: column: dischargeNeuroExamSuck
(16/20) normalize_value: column: dischargeNeuroExamMoro
(17/20) normalize_va

In [124]:
COMBINE_harmonizer.column_info(df)

(0/40) center: (168/0)
(1/40) center.orig: (168/0)
(2/40) subjectID: (168/0)
(3/40) subjectID.orig: (168/0)
(4/40) uniqueID: (168/0)
(5/40) uniqueID.orig: (168/0)
(6/40) dischargeNeuroExamDate: (168/0)
(7/40) dischargeNeuroExamDate.orig: (157/11)
(8/40) dischargeNeuroExamTime: (168/0)
(9/40) dischargeNeuroExamTime.orig: (153/15)
(10/40) dischargeNeuroExamLevelConsciousness: (168/0)
(11/40) dischargeNeuroExamLevelConsciousness.orig: (157/11)
(12/40) dischargeNeuroExamSpontaneousActivity: (168/0)
(13/40) dischargeNeuroExamSpontaneousActivity.orig: (157/11)
(14/40) dischargeNeuroExamPosture: (168/0)
(15/40) dischargeNeuroExamPosture.orig: (157/11)
(16/40) dischargeNeuroExamTone: (168/0)
(17/40) dischargeNeuroExamTone.orig: (129/39)
(18/40) dischargeNeuroExamSuck: (168/0)
(19/40) dischargeNeuroExamSuck.orig: (157/11)
(20/40) dischargeNeuroExamMoro: (168/0)
(21/40) dischargeNeuroExamMoro.orig: (155/13)
(22/40) dischargeNeuroExamPupils: (168/0)
(23/40) dischargeNeuroExamPupils.orig: (156/12)

## 04-02-cardiovascular

In [125]:
base_filename = '04-02-cardiovascular.csv'

In [126]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/9) normalize_value: column: center
(1/9) normalize_value: column: subjectID
(2/9) normalize_value: column: uniqueID
(3/9) normalize_value: column: dischargeCardiomegaly
(4/9) normalize_value: column: dischargeCardiacFailure
(5/9) normalize_value: column: dischargeCardiacDysfunctionByEcho
(6/9) normalize_value: column: dischargeCardiacIschemiaByEKG
(7/9) normalize_value: column: dischargeHypotension
(8/9) normalize_value: column: dischargeArrhythmia


In [127]:
COMBINE_harmonizer.column_info(df)

(0/18) center: (168/0)
(1/18) center.orig: (168/0)
(2/18) subjectID: (168/0)
(3/18) subjectID.orig: (168/0)
(4/18) uniqueID: (168/0)
(5/18) uniqueID.orig: (168/0)
(6/18) dischargeCardiomegaly: (168/0)
(7/18) dischargeCardiomegaly.orig: (168/0)
(8/18) dischargeCardiacFailure: (168/0)
(9/18) dischargeCardiacFailure.orig: (168/0)
(10/18) dischargeCardiacDysfunctionByEcho: (168/0)
(11/18) dischargeCardiacDysfunctionByEcho.orig: (168/0)
(12/18) dischargeCardiacIschemiaByEKG: (168/0)
(13/18) dischargeCardiacIschemiaByEKG.orig: (168/0)
(14/18) dischargeHypotension: (168/0)
(15/18) dischargeHypotension.orig: (168/0)
(16/18) dischargeArrhythmia: (168/0)
(17/18) dischargeArrhythmia.orig: (168/0)


## 04-03-respiratory

In [128]:
base_filename = '04-03-respiratory.csv'

In [129]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/25) normalize_value: column: center
(1/25) normalize_value: column: subjectID
(2/25) normalize_value: column: uniqueID
(3/25) normalize_value: column: dischargeMeconiumAspirationSyndrome
(4/25) normalize_value: column: dischargePPHN
(5/25) normalize_value: column: dischargePulmonaryHemorrhage
(6/25) normalize_value: column: dischargePenumonia
(7/25) normalize_value: column: dischargeChronicLungDisease
(8/25) normalize_value: column: dischargeECMO
(9/25) normalize_value: column: dischargeINO
(10/25) normalize_value: column: dischargeVentilator_day
(11/25) normalize_value: column: dischargeOxygen_day
(12/25) normalize_value: column: dischargeCPAP_day
(13/25) normalize_value: column: dischargePulmonaryStartDate1
(14/25) normalize_value: column: dischargePulmonaryEndDate1
(15/25) normalize_value: column: dischargePulmonaryStartTime1
(16/25) normalize_value: column: dischargePulmonaryEndTime1
(17/25) normalize_value: column: dischargePulmonaryStartDate2
(18/25) normalize_value: column: d

In [130]:
COMBINE_harmonizer.column_info(df)

(0/50) center: (168/0)
(1/50) center.orig: (168/0)
(2/50) subjectID: (168/0)
(3/50) subjectID.orig: (168/0)
(4/50) uniqueID: (168/0)
(5/50) uniqueID.orig: (168/0)
(6/50) dischargeMeconiumAspirationSyndrome: (168/0)
(7/50) dischargeMeconiumAspirationSyndrome.orig: (168/0)
(8/50) dischargePPHN: (168/0)
(9/50) dischargePPHN.orig: (168/0)
(10/50) dischargePulmonaryHemorrhage: (168/0)
(11/50) dischargePulmonaryHemorrhage.orig: (168/0)
(12/50) dischargePenumonia: (168/0)
(13/50) dischargePenumonia.orig: (168/0)
(14/50) dischargeChronicLungDisease: (168/0)
(15/50) dischargeChronicLungDisease.orig: (168/0)
(16/50) dischargeECMO: (168/0)
(17/50) dischargeECMO.orig: (168/0)
(18/50) dischargeINO: (168/0)
(19/50) dischargeINO.orig: (168/0)
(20/50) dischargeVentilator_day: (168/0)
(21/50) dischargeVentilator_day.orig: (168/0)
(22/50) dischargeOxygen_day: (168/0)
(23/50) dischargeOxygen_day.orig: (168/0)
(24/50) dischargeCPAP_day: (168/0)
(25/50) dischargeCPAP_day.orig: (168/0)
(26/50) dischargePulm

## 04-04-hematology

In [131]:
base_filename = '04-04-hematology.csv'

In [132]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/4) normalize_value: column: center
(1/4) normalize_value: column: subjectID
(2/4) normalize_value: column: uniqueID
(3/4) normalize_value: column: dischargeDIC


In [133]:
COMBINE_harmonizer.column_info(df)

(0/8) center: (168/0)
(1/8) center.orig: (168/0)
(2/8) subjectID: (168/0)
(3/8) subjectID.orig: (168/0)
(4/8) uniqueID: (168/0)
(5/8) uniqueID.orig: (168/0)
(6/8) dischargeDIC: (168/0)
(7/8) dischargeDIC.orig: (168/0)


## 04-05-metabolic

In [134]:
base_filename = '04-05-metabolic.csv'

In [135]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/6) normalize_value: column: center
(1/6) normalize_value: column: subjectID
(2/6) normalize_value: column: uniqueID
(3/6) normalize_value: column: dischargeHypoglycemia
(4/6) normalize_value: column: dischargeHypocalcemia
(5/6) normalize_value: column: dischargeHypomagnesemia


In [136]:
COMBINE_harmonizer.column_info(df)

(0/12) center: (168/0)
(1/12) center.orig: (168/0)
(2/12) subjectID: (168/0)
(3/12) subjectID.orig: (168/0)
(4/12) uniqueID: (168/0)
(5/12) uniqueID.orig: (168/0)
(6/12) dischargeHypoglycemia: (168/0)
(7/12) dischargeHypoglycemia.orig: (168/0)
(8/12) dischargeHypocalcemia: (168/0)
(9/12) dischargeHypocalcemia.orig: (168/0)
(10/12) dischargeHypomagnesemia: (168/0)
(11/12) dischargeHypomagnesemia.orig: (168/0)


## 04-06-renal

In [137]:
base_filename = '04-06-renal.csv'

In [138]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/6) normalize_value: column: center
(1/6) normalize_value: column: subjectID
(2/6) normalize_value: column: uniqueID
(3/6) normalize_value: column: dischargeOliguria
(4/6) normalize_value: column: dischargeAnuria
(5/6) normalize_value: column: dischargeDialysis


In [139]:
COMBINE_harmonizer.column_info(df)

(0/12) center: (168/0)
(1/12) center.orig: (168/0)
(2/12) subjectID: (168/0)
(3/12) subjectID.orig: (168/0)
(4/12) uniqueID: (168/0)
(5/12) uniqueID.orig: (168/0)
(6/12) dischargeOliguria: (168/0)
(7/12) dischargeOliguria.orig: (168/0)
(8/12) dischargeAnuria: (168/0)
(9/12) dischargeAnuria.orig: (168/0)
(10/12) dischargeDialysis: (168/0)
(11/12) dischargeDialysis.orig: (168/0)


## 04-07-gastrointestinal

In [140]:
base_filename = '04-07-gastrointestinal.csv'

In [141]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/7) normalize_value: column: center
(1/7) normalize_value: column: subjectID
(2/7) normalize_value: column: uniqueID
(3/7) normalize_value: column: dischargeNEC
(4/7) normalize_value: column: dischargeHepaticDysfunction
(5/7) normalize_value: column: dischargeTubeFeedingDuration_day
(6/7) normalize_value: column: dischargeFullNippleFeed_day


In [142]:
COMBINE_harmonizer.column_info(df)

(0/14) center: (168/0)
(1/14) center.orig: (168/0)
(2/14) subjectID: (168/0)
(3/14) subjectID.orig: (168/0)
(4/14) uniqueID: (168/0)
(5/14) uniqueID.orig: (168/0)
(6/14) dischargeTubeFeedingDuration_day: (168/0)
(7/14) dischargeTubeFeedingDuration_day.orig: (165/3)
(8/14) dischargeFullNippleFeed_day: (168/0)
(9/14) dischargeFullNippleFeed_day.orig: (137/31)
(10/14) dischargeNEC: (168/0)
(11/14) dischargeNEC.orig: (168/0)
(12/14) dischargeHepaticDysfunction: (168/0)
(13/14) dischargeHepaticDysfunction.orig: (168/0)


## 04-08-skin

In [143]:
base_filename = '04-08-skin.csv'

In [144]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/3) normalize_value: column: center
(1/3) normalize_value: column: subjectID
(2/3) normalize_value: column: uniqueID


In [145]:
COMBINE_harmonizer.column_info(df)

(0/6) center: (168/0)
(1/6) center.orig: (168/0)
(2/6) subjectID: (168/0)
(3/6) subjectID.orig: (168/0)
(4/6) uniqueID: (168/0)
(5/6) uniqueID.orig: (168/0)


## 04-09-auditory

In [146]:
base_filename = '04-09-auditory.csv'

In [147]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/5) normalize_value: column: center
(1/5) normalize_value: column: subjectID
(2/5) normalize_value: column: uniqueID
(3/5) normalize_value: column: dischargeHearingTest
(4/5) normalize_value: column: dischargeHearingTestNormal


In [148]:
COMBINE_harmonizer.column_info(df)

(0/10) center: (168/0)


(1/10) center.orig: (168/0)
(2/10) subjectID: (168/0)
(3/10) subjectID.orig: (168/0)
(4/10) uniqueID: (168/0)
(5/10) uniqueID.orig: (168/0)
(6/10) dischargeHearingTest: (168/0)
(7/10) dischargeHearingTest.orig: (168/0)
(8/10) dischargeHearingTestNormal: (168/0)
(9/10) dischargeHearingTestNormal.orig: (131/37)


## 04-10. Surgery

In [149]:
base_filename = '04-10-surgery.csv'

In [150]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/7) normalize_value: column: center
(1/7) normalize_value: column: subjectID
(2/7) normalize_value: column: uniqueID
(3/7) normalize_value: column: dischargeMajorSurgery
(4/7) normalize_value: column: dischargeSurgeryCode1
(5/7) normalize_value: column: dischargeSurgeryCode2
(6/7) normalize_value: column: dischargeSurgeryCode3


In [151]:
COMBINE_harmonizer.column_info(df)

(0/14) center: (168/0)
(1/14) center.orig: (168/0)
(2/14) subjectID: (168/0)
(3/14) subjectID.orig: (168/0)
(4/14) uniqueID: (168/0)
(5/14) uniqueID.orig: (168/0)
(6/14) dischargeMajorSurgery: (168/0)
(7/14) dischargeMajorSurgery.orig: (168/0)
(8/14) dischargeSurgeryCode1: (168/0)
(9/14) dischargeSurgeryCode1.orig: (9/159)
(10/14) dischargeSurgeryCode2: (168/0)
(11/14) dischargeSurgeryCode2.orig: (3/165)
(12/14) dischargeSurgeryCode3: (168/0)
(13/14) dischargeSurgeryCode3.orig: (0/168)


## 04-11-infection

In [152]:
base_filename = '04-11-infection.csv'

In [153]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/11) normalize_value: column: center
(1/11) normalize_value: column: subjectID
(2/11) normalize_value: column: uniqueID
(3/11) normalize_value: column: dischargeSepticemia
(4/11) normalize_value: column: dischargeMeningitisEncephalitis
(5/11) normalize_value: column: dischargeSepticemiaOrganismCode1
(6/11) normalize_value: column: dischargeSepticemiaOrganismCode2
(7/11) normalize_value: column: dischargeSepticemiaOrganismCode3
(8/11) normalize_value: column: dischargeMeningitisOrganismCode1
(9/11) normalize_value: column: dischargeMeningitisOrganismCode2
(10/11) normalize_value: column: dischargeMeningitisOrganismCode3


In [154]:
COMBINE_harmonizer.column_info(df)

(0/22) center: (168/0)
(1/22) center.orig: (168/0)
(2/22) subjectID: (168/0)
(3/22) subjectID.orig: (168/0)
(4/22) uniqueID: (168/0)
(5/22) uniqueID.orig: (168/0)
(6/22) dischargeSepticemia: (168/0)
(7/22) dischargeSepticemia.orig: (168/0)
(8/22) dischargeSepticemiaOrganismCode1: (168/0)
(9/22) dischargeSepticemiaOrganismCode1.orig: (3/165)
(10/22) dischargeSepticemiaOrganismCode2: (168/0)
(11/22) dischargeSepticemiaOrganismCode2.orig: (1/167)
(12/22) dischargeSepticemiaOrganismCode3: (168/0)
(13/22) dischargeSepticemiaOrganismCode3.orig: (1/167)
(14/22) dischargeMeningitisEncephalitis: (168/0)
(15/22) dischargeMeningitisEncephalitis.orig: (168/0)
(16/22) dischargeMeningitisOrganismCode1: (168/0)
(17/22) dischargeMeningitisOrganismCode1.orig: (2/166)
(18/22) dischargeMeningitisOrganismCode2: (168/0)
(19/22) dischargeMeningitisOrganismCode2.orig: (0/168)
(20/22) dischargeMeningitisOrganismCode3: (168/0)
(21/22) dischargeMeningitisOrganismCode3.orig: (0/168)


## 04-13-seizure

In [155]:
base_filename = '04-13-seizure.csv'

In [156]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/17) normalize_value: column: center
(1/17) normalize_value: column: subjectID
(2/17) normalize_value: column: uniqueID
(3/17) normalize_value: column: dischargeSeizure
(4/17) normalize_value: column: dischargeSeizurePreIntervention
(5/17) normalize_value: column: dischargeSeizureMaintenance
(6/17) normalize_value: column: dischargeSeizureRewarming
(7/17) normalize_value: column: dischargeSeizurePostIntervention
(8/17) normalize_value: column: dischargeEEG
(9/17) normalize_value: column: dischargeEEGFindingConsistentWithSeizure
(10/17) normalize_value: column: dischargeEEGAbnormalBackgroundActivity
(11/17) normalize_value: column: dischargeAnticonvulsantsGreater72H
(12/17) normalize_value: column: dischargeEEGFindingConsistentWithSeizureDate
(13/17) normalize_value: column: dischargeEEGFindingConsistentWithSeizureTime
(14/17) normalize_value: column: dischargeEEGAbnormalBackgroundActivityDate
(15/17) normalize_value: column: dischargeEEGAbnormalBackgroundActivityTime
(16/17) normaliz

In [157]:
COMBINE_harmonizer.column_info(df)

(0/34) center: (168/0)
(1/34) center.orig: (168/0)
(2/34) subjectID: (168/0)
(3/34) subjectID.orig: (168/0)
(4/34) uniqueID: (168/0)
(5/34) uniqueID.orig: (168/0)
(6/34) dischargeSeizure: (168/0)
(7/34) dischargeSeizure.orig: (168/0)
(8/34) dischargeSeizurePreIntervention: (168/0)
(9/34) dischargeSeizurePreIntervention.orig: (128/40)
(10/34) dischargeSeizureMaintenance: (168/0)
(11/34) dischargeSeizureMaintenance.orig: (128/40)
(12/34) dischargeSeizureRewarming: (168/0)
(13/34) dischargeSeizureRewarming.orig: (128/40)
(14/34) dischargeSeizurePostIntervention: (168/0)
(15/34) dischargeSeizurePostIntervention.orig: (128/40)
(16/34) dischargeEEG: (168/0)
(17/34) dischargeEEG.orig: (168/0)
(18/34) dischargeEEGFindingConsistentWithSeizure: (168/0)
(19/34) dischargeEEGFindingConsistentWithSeizure.orig: (139/29)
(20/34) dischargeEEGFindingConsistentWithSeizureDate: (168/0)
(21/34) dischargeEEGFindingConsistentWithSeizureDate.orig: (136/32)
(22/34) dischargeEEGFindingConsistentWithSeizureTime:

## 04-14-birth-defect

In [158]:
base_filename = '04-14-birth-defect.csv'

In [159]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/7) normalize_value: column: center
(1/7) normalize_value: column: subjectID
(2/7) normalize_value: column: uniqueID
(3/7) normalize_value: column: dischargeSyndromeMalformation
(4/7) normalize_value: column: dischargeBirthDefectCode1
(5/7) normalize_value: column: dischargeBirthDefectCode2
(6/7) normalize_value: column: dischargeBirthDefectCode3


In [160]:
COMBINE_harmonizer.column_info(df)

(0/14) center: (168/0)
(1/14) center.orig: (168/0)
(2/14) subjectID: (168/0)
(3/14) subjectID.orig: (168/0)
(4/14) uniqueID: (168/0)
(5/14) uniqueID.orig: (168/0)
(6/14) dischargeSyndromeMalformation: (168/0)
(7/14) dischargeSyndromeMalformation.orig: (168/0)
(8/14) dischargeBirthDefectCode1: (168/0)
(9/14) dischargeBirthDefectCode1.orig: (3/165)
(10/14) dischargeBirthDefectCode2: (168/0)
(11/14) dischargeBirthDefectCode2.orig: (0/168)
(12/14) dischargeBirthDefectCode3: (168/0)
(13/14) dischargeBirthDefectCode3.orig: (0/168)


## 04-15-home-therapy

In [161]:
base_filename = '04-15-home-therapy.csv'

In [162]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/11) normalize_value: column: center
(1/11) normalize_value: column: subjectID
(2/11) normalize_value: column: uniqueID
(3/11) normalize_value: column: dischargeHomeTherapyVentilator
(4/11) normalize_value: column: dischargeHomeTherapyOxygen
(5/11) normalize_value: column: dischargeHomeTherapyGavageTubeFeed
(6/11) normalize_value: column: dischargeHomeTherapyGastrostomyTubeFeed
(7/11) normalize_value: column: dischargeHomeTherapyTemperatureBlanket
(8/11) normalize_value: column: dischargeHomeTherapyAnticonvulsantMedication
(9/11) normalize_value: column: dischargeHomeTherapyOther
(10/11) normalize_value: column: dischargeHomeTherapyOtherText


In [163]:
COMBINE_harmonizer.column_info(df)

(0/22) center: (168/0)
(1/22) center.orig: (168/0)
(2/22) subjectID: (168/0)
(3/22) subjectID.orig: (168/0)
(4/22) uniqueID: (168/0)
(5/22) uniqueID.orig: (168/0)
(6/22) dischargeHomeTherapyVentilator: (168/0)
(7/22) dischargeHomeTherapyVentilator.orig: (140/28)
(8/22) dischargeHomeTherapyOxygen: (168/0)
(9/22) dischargeHomeTherapyOxygen.orig: (140/28)
(10/22) dischargeHomeTherapyGavageTubeFeed: (168/0)
(11/22) dischargeHomeTherapyGavageTubeFeed.orig: (140/28)
(12/22) dischargeHomeTherapyGastrostomyTubeFeed: (168/0)
(13/22) dischargeHomeTherapyGastrostomyTubeFeed.orig: (140/28)
(14/22) dischargeHomeTherapyTemperatureBlanket: (168/0)
(15/22) dischargeHomeTherapyTemperatureBlanket.orig: (140/28)
(16/22) dischargeHomeTherapyAnticonvulsantMedication: (168/0)
(17/22) dischargeHomeTherapyAnticonvulsantMedication.orig: (140/28)
(18/22) dischargeHomeTherapyOther: (168/0)
(19/22) dischargeHomeTherapyOther.orig: (140/28)
(20/22) dischargeHomeTherapyOtherText: (168/0)
(21/22) dischargeHomeTherapy