In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import os

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 00-0. Variables

In [2]:
study_name = COMBINE_harmonizer.STUDY_LH
sheet_name = COMBINE_harmonizer.SHEET_FOLLOW_UP

root_dir = '..'

In [3]:
COMBINE_harmonizer.init(f'{root_dir}/config.yaml')

In [4]:
input_dir = f"{cfg.config['out_dir']}/out-{study_name}"
data_dict_filename = f"{root_dir}/{COMBINE_harmonizer.DATA_DICTIONARY_EXCEL}"
out_dir = f"{cfg.config['out_dir']}/out-{study_name}-normalized"

os.makedirs(out_dir, exist_ok=True)

In [5]:
COMBINE_harmonizer.init_mapping(data_dict_filename, study_name)
_VALUE_MAP = COMBINE_harmonizer.build_value_map(data_dict_filename, sheet_name)

[INFO] init_mapping (0/125): signOfHIETone
[INFO] init_mapping (1/125): signOfHIELvlOfCons
[INFO] init_mapping (2/125): signOfHIEPosture
[INFO] init_mapping (3/125): signOfHIEMoro
[INFO] init_mapping (4/125): signOfHIESuck
[INFO] init_mapping (5/125): signOfHIERespiratory
[INFO] init_mapping (6/125): signOfHIEHeartRate
[INFO] init_mapping (7/125): signOfHIEPupils
[INFO] init_mapping (8/125): signOfHIESpontaneousActivity
[INFO] init_mapping (9/125): noNeuroExamReason
[INFO] init_mapping (10/125): consentStatus
[INFO] init_mapping (11/125): treatmentAssign
[INFO] init_mapping (12/125): targetTreatmentTemperature
[INFO] init_mapping (13/125): blanketType
[INFO] init_mapping (14/125): encephalopathyLevel
[INFO] init_mapping (15/125): infantAge
[INFO] init_mapping (16/125): infantSex
[INFO] init_mapping (17/125): ethnicity
[INFO] init_mapping (18/125): education
[INFO] init_mapping (19/125): insurance
[INFO] init_mapping (20/125): race
[INFO] init_mapping (21/125): maritalStatus
[INFO] init

build_value_map: (1108/377) variable: followupCenter type: center
build_value_map: (1109/377) variable: siteID type: text
build_value_map: (1110/377) variable: birthDate type: date
build_value_map: (1111/377) variable: visitDate type: date
build_value_map: (1112/377) variable: birthNumber type: int
build_value_map: (1113/377) variable: center type: center
build_value_map: (1114/377) variable: subjectID type: text
build_value_map: (1115/377) variable: followupID type: text
build_value_map: (1116/377) variable: center_orig type: text
build_value_map: (1117/377) variable: SESVisitDate type: date
build_value_map: (1118/377) variable: SESBirthDate type: date
build_value_map: (1119/377) variable: chronologicalAge_mo type: int
build_value_map: (1120/377) variable: correctedAge_mo type: int
build_value_map: (1121/377) variable: underStateSupervision type: bool
build_value_map: (1122/377) variable: primaryCaretaker type: relationship
build_value_map: (1123/377) variable: otherCaretaker type: re

In [6]:
_ORDER_MAP = COMBINE_harmonizer.build_variable_order_map(data_dict_filename, sheet_name)

## 20-00-follow-up

In [7]:
base_filename = '20-00-follow-up.csv'

In [8]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/10) normalize_value: column: center
(1/10) normalize_value: column: subjectID
(2/10) normalize_value: column: uniqueID
(3/10) normalize_value: column: followupCenter
(4/10) normalize_value: column: followupID
(5/10) normalize_value: column: siteID
(6/10) normalize_value: column: birthDate
(7/10) normalize_value: column: visitDate
(8/10) normalize_value: column: birthNumber
(9/10) normalize_value: column: center_orig


In [9]:
COMBINE_harmonizer.column_info(df)

(0/21) center: (152/0)
(1/21) subjectID: (152/0)
(2/21) uniqueID: (152/0)
(3/21) followupCenter: (152/0)
(4/21) followupID: (152/0)
(5/21) uniqueFollowupID: (152/0)
(6/21) center.orig: (152/0)
(7/21) subjectID.orig: (152/0)
(8/21) uniqueID.orig: (152/0)
(9/21) followupCenter.orig: (152/0)
(10/21) followupID.orig: (152/0)
(11/21) siteID: (152/0)
(12/21) siteID.orig: (152/0)
(13/21) birthDate: (152/0)
(14/21) birthDate.orig: (152/0)
(15/21) visitDate: (152/0)
(16/21) visitDate.orig: (145/7)
(17/21) birthNumber: (152/0)
(18/21) birthNumber.orig: (144/8)
(19/21) center_orig: (152/0)
(20/21) center_orig.orig: (3/149)


## 20-01-ses

In [10]:
base_filename = '20-01-ses.csv'

In [11]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/79) normalize_value: column: center
(1/79) normalize_value: column: subjectID
(2/79) normalize_value: column: uniqueID
(3/79) normalize_value: column: followupCenter
(4/79) normalize_value: column: followupID
(5/79) normalize_value: column: siteID
(6/79) normalize_value: column: underStateSupervision
(7/79) normalize_value: column: otherContributeMoneyToChildHousehold
(8/79) normalize_value: column: workPrimaryCaretaker
(9/79) normalize_value: column: workOtherCaretaker
(10/79) normalize_value: column: inSchoolPrimaryCaretaker
(11/79) normalize_value: column: inSchoolOtherCaretaker
[WARN] unable to bool: val: (**/<class 'str'>)
(12/79) normalize_value: column: primaryLanguageChildOtherText
(13/79) normalize_value: column: isSecondaryLanguageChild
(14/79) normalize_value: column: secondaryLanguageChildOtherText
(15/79) normalize_value: column: visitingNurseNeed
(16/79) normalize_value: column: homeNurseNeed
(17/79) normalize_value: column: otPtNeed
(18/79) normalize_value: column: sp

In [12]:
COMBINE_harmonizer.column_info(df)

(0/159) center: (152/0)
(1/159) subjectID: (152/0)
(2/159) uniqueID: (152/0)
(3/159) followupCenter: (152/0)
(4/159) followupID: (152/0)
(5/159) uniqueFollowupID: (152/0)
(6/159) center.orig: (152/0)
(7/159) subjectID.orig: (152/0)
(8/159) uniqueID.orig: (152/0)
(9/159) followupCenter.orig: (152/0)
(10/159) followupID.orig: (152/0)
(11/159) siteID: (152/0)
(12/159) siteID.orig: (152/0)
(13/159) SESVisitDate: (152/0)
(14/159) SESVisitDate.orig: (142/10)
(15/159) SESBirthDate: (152/0)
(16/159) SESBirthDate.orig: (142/10)
(17/159) chronologicalAge_mo: (152/0)
(18/159) chronologicalAge_mo.orig: (142/10)
(19/159) correctedAge_mo: (152/0)
(20/159) correctedAge_mo.orig: (141/11)
(21/159) underStateSupervision: (152/0)
(22/159) underStateSupervision.orig: (142/10)
(23/159) primaryCaretaker: (152/0)
(24/159) primaryCaretaker.orig: (142/10)
(25/159) otherCaretaker: (152/0)
(26/159) otherCaretaker.orig: (128/24)
(27/159) maritalStatusPrimaryCaretaker: (152/0)
(28/159) maritalStatusPrimaryCaretake

## 20-02-medical-history

In [13]:
base_filename = '20-02-medical-history.csv'

In [14]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/68) normalize_value: column: center
(1/68) normalize_value: column: subjectID
(2/68) normalize_value: column: uniqueID
(3/68) normalize_value: column: followupCenter
(4/68) normalize_value: column: followupID
(5/68) normalize_value: column: siteID
(6/68) normalize_value: column: rehospitalize
(7/68) normalize_value: column: operation
(8/68) normalize_value: column: operationTypanostomyTube
(9/68) normalize_value: column: operationTracheostomy
(10/68) normalize_value: column: operationEyeSurgery
(11/68) normalize_value: column: operationHerniaSurgery
(12/68) normalize_value: column: operationGastrostomyTube
(13/68) normalize_value: column: operationFundoplication
(14/68) normalize_value: column: operationShuntForHydrocephalus
(15/68) normalize_value: column: operationReanastomosisOfLargeOrSmallIntenstine
(16/68) normalize_value: column: operationPDALigation
(17/68) normalize_value: column: operationBrochoscopy
(18/68) normalize_value: column: operationHypospadiusRepair
(19/68) normal

In [15]:
COMBINE_harmonizer.column_info(df)

(0/137) center: (152/0)
(1/137) subjectID: (152/0)
(2/137) uniqueID: (152/0)
(3/137) followupCenter: (152/0)
(4/137) followupID: (152/0)
(5/137) uniqueFollowupID: (152/0)
(6/137) center.orig: (152/0)
(7/137) subjectID.orig: (152/0)
(8/137) uniqueID.orig: (152/0)
(9/137) followupCenter.orig: (152/0)
(10/137) followupID.orig: (152/0)
(11/137) siteID: (152/0)
(12/137) siteID.orig: (152/0)
(13/137) rehospitalize: (152/0)
(14/137) rehospitalize.orig: (143/9)
(15/137) numberRehospitalize: (152/0)
(16/137) numberRehospitalize.orig: (35/117)
(17/137) operation: (152/0)
(18/137) operation.orig: (143/9)
(19/137) operationTypanostomyTube: (152/0)
(20/137) operationTypanostomyTube.orig: (25/127)
(21/137) operationTracheostomy: (152/0)
(22/137) operationTracheostomy.orig: (25/127)
(23/137) operationEyeSurgery: (152/0)
(24/137) operationEyeSurgery.orig: (25/127)
(25/137) operationEyeSurgeryReason: (152/0)
(26/137) operationEyeSurgeryReason.orig: (2/150)
(27/137) operationHerniaSurgery: (152/0)
(28/1

## 20-03-medical-exam

In [16]:
base_filename = '20-03-medical-exam.csv'

In [17]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/97) normalize_value: column: center
(1/97) normalize_value: column: subjectID
(2/97) normalize_value: column: uniqueID
(3/97) normalize_value: column: followupCenter
(4/97) normalize_value: column: followupID
(5/97) normalize_value: column: siteID
(6/97) normalize_value: column: audiologicAssessment
(7/97) normalize_value: column: audiologicPendingForAssessment
(8/97) normalize_value: column: visualReinforcementAudiometry
(9/97) normalize_value: column: ABR
(10/97) normalize_value: column: hearingTestUnknown
(11/97) normalize_value: column: dysphagia
(12/97) normalize_value: column: aspiration
(13/97) normalize_value: column: abnormalVoice
(14/97) normalize_value: column: drooling
(15/97) normalize_value: column: nothingByMouth
(16/97) normalize_value: column: observedAbnormalMovement
(17/97) normalize_value: column: observedAbnormalMovementShortJerky
(18/97) normalize_value: column: observedAbnormalMovementSlowWrithing
(19/97) normalize_value: column: observedAbnormalMovementTremor

In [18]:
COMBINE_harmonizer.column_info(df)

(0/195) center: (152/0)
(1/195) subjectID: (152/0)
(2/195) uniqueID: (152/0)
(3/195) followupCenter: (152/0)
(4/195) followupID: (152/0)
(5/195) uniqueFollowupID: (152/0)
(6/195) center.orig: (152/0)
(7/195) subjectID.orig: (152/0)
(8/195) uniqueID.orig: (152/0)
(9/195) followupCenter.orig: (152/0)
(10/195) followupID.orig: (152/0)
(11/195) siteID: (152/0)
(12/195) siteID.orig: (152/0)
(13/195) weight_kg: (152/0)
(14/195) weight_kg.orig: (141/11)
(15/195) length_cm: (152/0)
(16/195) length_cm.orig: (141/11)
(17/195) headCircumference_cm: (152/0)
(18/195) headCircumference_cm.orig: (141/11)
(19/195) strabismusRight: (152/0)
(20/195) strabismusRight.orig: (141/11)
(21/195) strabismusLeft: (152/0)
(22/195) strabismusLeft.orig: (141/11)
(23/195) nystagmusRight: (152/0)
(24/195) nystagmusRight.orig: (141/11)
(25/195) nystagmusLeft: (152/0)
(26/195) nystagmusLeft.orig: (141/11)
(27/195) rovingEyeMovementRight: (152/0)
(28/195) rovingEyeMovementRight.orig: (141/11)
(29/195) rovingEyeMovementL

## 20-04-bayley-iii

In [19]:
base_filename = '20-04-bayley-iii.csv'

In [20]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/46) normalize_value: column: center
(1/46) normalize_value: column: subjectID
(2/46) normalize_value: column: uniqueID
(3/46) normalize_value: column: followupCenter
(4/46) normalize_value: column: followupID
(5/46) normalize_value: column: siteID
(6/46) normalize_value: column: BayleyIIICognitiveSubtest
(7/46) normalize_value: column: BayleyIIIReasonNoSuccessCognitiveSubtestText
(8/46) normalize_value: column: BayleyIIILanguageReceptiveSubtest
(9/46) normalize_value: column: BayleyIIIReasonNoSuccessLanguageReceptiveSubtestText
(10/46) normalize_value: column: BayleyIIILanguageExpressiveSubtest
(11/46) normalize_value: column: BayleyIIIReasonNoSuccessLanguageExpressiveSubtestText
(12/46) normalize_value: column: BayleyIIIMotorFineSubtest
(13/46) normalize_value: column: BayleyIIIReasonNoSuccessMotorFineSubtestText
(14/46) normalize_value: column: BayleyIIIMotorGrossSubtest
(15/46) normalize_value: column: BayleyIIIReasonNoSuccessMotorGrossSubtestText
(16/46) normalize_value: column:

In [21]:
COMBINE_harmonizer.column_info(df)

(0/93) center: (152/0)
(1/93) subjectID: (152/0)
(2/93) uniqueID: (152/0)
(3/93) followupCenter: (152/0)
(4/93) followupID: (152/0)
(5/93) uniqueFollowupID: (152/0)
(6/93) center.orig: (152/0)
(7/93) subjectID.orig: (152/0)
(8/93) uniqueID.orig: (152/0)
(9/93) followupCenter.orig: (152/0)
(10/93) followupID.orig: (152/0)
(11/93) siteID: (152/0)
(12/93) siteID.orig: (152/0)
(13/93) BayleyIIICognitiveSubtest: (152/0)
(14/93) BayleyIIICognitiveSubtest.orig: (141/11)
(15/93) BayleyIIIReasonNoSuccessCognitiveSubtest: (152/0)
(16/93) BayleyIIIReasonNoSuccessCognitiveSubtest.orig: (9/143)
(17/93) BayleyIIIReasonNoSuccessCognitiveSubtestText: (152/0)
(18/93) BayleyIIIReasonNoSuccessCognitiveSubtestText.orig: (0/152)
(19/93) BayleyIIILanguageReceptiveSubtest: (152/0)
(20/93) BayleyIIILanguageReceptiveSubtest.orig: (141/11)
(21/93) BayleyIIIReasonNoSuccessLanguageReceptiveSubtest: (152/0)
(22/93) BayleyIIIReasonNoSuccessLanguageReceptiveSubtest.orig: (9/143)
(23/93) BayleyIIIReasonNoSuccessLangu

## 20-05. GMFCS

In [22]:
base_filename = '20-05-gmfcs.csv'

In [23]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/7) normalize_value: column: center
(1/7) normalize_value: column: subjectID
(2/7) normalize_value: column: uniqueID
(3/7) normalize_value: column: followupCenter
(4/7) normalize_value: column: followupID
(5/7) normalize_value: column: siteID
(6/7) normalize_value: column: grossMotorFunctionLevel


In [24]:
COMBINE_harmonizer.column_info(df)

(0/15) center: (152/0)
(1/15) subjectID: (152/0)
(2/15) uniqueID: (152/0)
(3/15) followupCenter: (152/0)
(4/15) followupID: (152/0)
(5/15) uniqueFollowupID: (152/0)
(6/15) center.orig: (152/0)
(7/15) subjectID.orig: (152/0)
(8/15) uniqueID.orig: (152/0)
(9/15) followupCenter.orig: (152/0)
(10/15) followupID.orig: (152/0)
(11/15) siteID: (152/0)
(12/15) siteID.orig: (152/0)
(13/15) grossMotorFunctionLevel: (152/0)
(14/15) grossMotorFunctionLevel.orig: (141/11)


## 20-06. Status

In [25]:
base_filename = '20-06-status.csv'

In [26]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/13) normalize_value: column: center
(1/13) normalize_value: column: subjectID
(2/13) normalize_value: column: uniqueID
(3/13) normalize_value: column: followupCenter
(4/13) normalize_value: column: followupID
(5/13) normalize_value: column: siteID
(6/13) normalize_value: column: statusBirthDate
(7/13) normalize_value: column: statusVisitDate
(8/13) normalize_value: column: childFinalStatus
(9/13) normalize_value: column: deathDate
(10/13) normalize_value: column: reasonLossFollowUp
(11/13) normalize_value: column: firstVisitDate
(12/13) normalize_value: column: finalVisitDate


In [27]:
COMBINE_harmonizer.column_info(df)

(0/27) center: (152/0)
(1/27) subjectID: (152/0)
(2/27) uniqueID: (152/0)
(3/27) followupCenter: (152/0)
(4/27) followupID: (152/0)
(5/27) uniqueFollowupID: (152/0)
(6/27) center.orig: (152/0)
(7/27) subjectID.orig: (152/0)
(8/27) uniqueID.orig: (152/0)
(9/27) followupCenter.orig: (152/0)
(10/27) followupID.orig: (152/0)
(11/27) siteID: (152/0)
(12/27) siteID.orig: (152/0)
(13/27) statusVisitDate: (152/0)
(14/27) statusVisitDate.orig: (0/152)
(15/27) statusBirthDate: (152/0)
(16/27) statusBirthDate.orig: (152/0)
(17/27) childFinalStatus: (152/0)
(18/27) childFinalStatus.orig: (152/0)
(19/27) deathDate: (152/0)
(20/27) deathDate.orig: (2/150)
(21/27) reasonLossFollowUp: (152/0)
(22/27) reasonLossFollowUp.orig: (9/143)
(23/27) firstVisitDate: (152/0)
(24/27) firstVisitDate.orig: (140/12)
(25/27) finalVisitDate: (152/0)
(26/27) finalVisitDate.orig: (140/12)


## 20-08. Lost Follow-up

In [28]:
base_filename = '20-08-lost.csv'

In [29]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/61) normalize_value: column: center
(1/61) normalize_value: column: subjectID
(2/61) normalize_value: column: uniqueID
(3/61) normalize_value: column: followupCenter
(4/61) normalize_value: column: followupID
(5/61) normalize_value: column: siteID
(6/61) normalize_value: column: lostFollowUpInformationAvailableIndirectSrc
(7/61) normalize_value: column: lostFollowUpChildAlive
(8/61) normalize_value: column: lostFollowUpInterview
(9/61) normalize_value: column: lostFollowUpAnyQuestionCompleteChartReview
(10/61) normalize_value: column: interviewWalkAlone
(11/61) normalize_value: column: interviewSittingAlong
(12/61) normalize_value: column: interviewHeadControl
(13/61) normalize_value: column: interviewSee
(14/61) normalize_value: column: interviewEyeExam
(15/61) normalize_value: column: interviewNeedWearGlasses
(16/61) normalize_value: column: interviewHear
(17/61) normalize_value: column: interviewHearExam
(18/61) normalize_value: column: interviewNeedWearHearingAid
(19/61) normali

In [30]:
COMBINE_harmonizer.column_info(df)

(0/123) center: (152/0)
(1/123) subjectID: (152/0)
(2/123) uniqueID: (152/0)
(3/123) followupCenter: (152/0)
(4/123) followupID: (152/0)
(5/123) uniqueFollowupID: (152/0)
(6/123) center.orig: (152/0)
(7/123) subjectID.orig: (152/0)
(8/123) uniqueID.orig: (152/0)
(9/123) followupCenter.orig: (152/0)
(10/123) followupID.orig: (152/0)
(11/123) siteID: (152/0)
(12/123) siteID.orig: (152/0)
(13/123) lostFollowUpInformationAvailableIndirectSrc: (152/0)
(14/123) lostFollowUpInformationAvailableIndirectSrc.orig: (10/142)
(15/123) lostFollowUpLastContactDate: (152/0)
(16/123) lostFollowUpLastContactDate.orig: (3/149)
(17/123) lostFollowUpFormCompleteDate: (152/0)
(18/123) lostFollowUpFormCompleteDate.orig: (3/149)
(19/123) lostFollowUpChildAlive: (152/0)
(20/123) lostFollowUpChildAlive.orig: (7/145)
(21/123) lostFollowUpLastKnownAliveCorrectedAge_mo: (152/0)
(22/123) lostFollowUpLastKnownAliveCorrectedAge_mo.orig: (6/146)
(23/123) lostFollowUpDeathDate: (152/0)
(24/123) lostFollowUpDeathDate.or

## 20-09. Secondary Analysis

In [31]:
base_filename = '20-09-secondary.csv'

In [32]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/12) normalize_value: column: center
(1/12) normalize_value: column: subjectID
(2/12) normalize_value: column: uniqueID
(3/12) normalize_value: column: followupCenter
(4/12) normalize_value: column: blindness
(5/12) normalize_value: column: hearingImpairedLevel
(6/12) normalize_value: column: hearingImpairedWithAid
(7/12) normalize_value: column: grossMotorFunctionLevelSeverity
(8/12) normalize_value: column: cerebralPalsyMerge
(9/12) normalize_value: column: moderateSevereCerebralPalsy
(10/12) normalize_value: column: followupID
(11/12) normalize_value: column: afterDischargeSeizure


In [33]:
COMBINE_harmonizer.column_info(df)

(0/25) center: (168/0)
(1/25) subjectID: (168/0)
(2/25) uniqueID: (168/0)
(3/25) followupCenter: (168/0)
(4/25) followupID: (168/0)
(5/25) uniqueFollowupID: (168/0)
(6/25) center.orig: (168/0)
(7/25) subjectID.orig: (168/0)
(8/25) uniqueID.orig: (168/0)
(9/25) followupCenter.orig: (151/17)
(10/25) followupID.orig: (133/35)
(11/25) blindness: (168/0)
(12/25) blindness.orig: (141/27)
(13/25) moderateSevereCerebralPalsy: (168/0)
(14/25) moderateSevereCerebralPalsy.orig: (141/27)
(15/25) cerebralPalsyMerge: (168/0)
(16/25) cerebralPalsyMerge.orig: (141/27)
(17/25) grossMotorFunctionLevelSeverity: (168/0)
(18/25) grossMotorFunctionLevelSeverity.orig: (141/27)
(19/25) hearingImpairedWithAid: (168/0)
(20/25) hearingImpairedWithAid.orig: (141/27)
(21/25) hearingImpairedLevel: (168/0)
(22/25) hearingImpairedLevel.orig: (141/27)
(23/25) afterDischargeSeizure: (168/0)
(24/25) afterDischargeSeizure.orig: (143/25)


## 20-10. Outcome

In [34]:
base_filename = '20-10-outcome.csv'

In [35]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, order_map=_ORDER_MAP)

df['uniqueFollowupID'] = df['uniqueID'].copy()
df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID', 'uniqueFollowupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/13) normalize_value: column: center
(1/13) normalize_value: column: subjectID
(2/13) normalize_value: column: uniqueID
(3/13) normalize_value: column: followupCenter
(4/13) normalize_value: column: BayleyIIICognitive
(5/13) normalize_value: column: deathBeforeFollowup
(6/13) normalize_value: column: normalPrimaryOutcome
(7/13) normalize_value: column: deathBeforeDischarge
(8/13) normalize_value: column: followupID
(9/13) normalize_value: column: moderateSevereDisabilitySurvivor
(10/13) normalize_value: column: disabilityLevelSurvivor
(11/13) normalize_value: column: moderateSevereDisabilityOrDeath
(12/13) normalize_value: column: disabilityLevelDeath4Category


In [36]:
COMBINE_harmonizer.column_info(df)

(0/27) center: (168/0)
(1/27) subjectID: (168/0)
(2/27) uniqueID: (168/0)
(3/27) followupCenter: (168/0)
(4/27) followupID: (168/0)
(5/27) uniqueFollowupID: (168/0)
(6/27) center.orig: (168/0)
(7/27) subjectID.orig: (168/0)
(8/27) uniqueID.orig: (168/0)
(9/27) followupCenter.orig: (151/17)
(10/27) followupID.orig: (133/35)
(11/27) normalPrimaryOutcome: (168/0)
(12/27) normalPrimaryOutcome.orig: (139/29)
(13/27) BayleyIIICognitive: (168/0)
(14/27) BayleyIIICognitive.orig: (138/30)
(15/27) deathBeforeFollowup: (168/0)
(16/27) deathBeforeFollowup.orig: (165/3)
(17/27) deathBeforeDischarge: (168/0)
(18/27) deathBeforeDischarge.orig: (167/1)
(19/27) disabilityLevelSurvivor: (168/0)
(20/27) disabilityLevelSurvivor.orig: (139/29)
(21/27) disabilityLevelDeath4Category: (168/0)
(22/27) disabilityLevelDeath4Category.orig: (157/11)
(23/27) moderateSevereDisabilityOrDeath: (168/0)
(24/27) moderateSevereDisabilityOrDeath.orig: (157/11)
(25/27) moderateSevereDisabilitySurvivor: (168/0)
(26/27) moder

## 20-07-readmission

In [37]:
_FLATTEN_IDS = ['readmissionNumber']
base_filename = '20-07-readmission.csv'

In [38]:
filename = os.sep.join([input_dir, base_filename])
df = pd.read_csv(filename, dtype='O')
df = COMBINE_harmonizer.normalize_value(df, _VALUE_MAP, _FLATTEN_IDS,  order_map=_ORDER_MAP)

df = COMBINE_harmonizer.postprocess(df, first_columns=['center', 'subjectID', 'uniqueID', 'followupCenter', 'followupID'])

out_filename = os.sep.join([out_dir, base_filename])
df.to_csv(out_filename, index=False)

(0/9) normalize_value: column: center
(1/9) normalize_value: column: subjectID
(2/9) normalize_value: column: uniqueID
(3/9) normalize_value: column: followupCenter
(4/9) normalize_value: column: followupID
(5/9) normalize_value: column: readmissionPrimaryCauseOtherText
(6/9) normalize_value: column: readmissionNumber
(7/9) normalize_value: column: readmissionTimePeriod
(8/9) normalize_value: column: readmissionPrimaryCause
flatten_index: flatten_ids: ['readmissionNumber'] unique_id_map: {np.int64(1): np.int64(1), np.int64(2): np.int64(2), np.int64(3): np.int64(3), np.int64(4): np.int64(4), np.int64(5): np.int64(5), np.int64(6): np.int64(6), np.int64(7): np.int64(7), np.int64(8): np.int64(8), np.int64(9): np.int64(9), np.int64(10): np.int64(10), np.int64(11): np.int64(11)} the_type: int64


In [39]:
COMBINE_harmonizer.column_info(df)

(0/19) center: (65/0)
(1/19) subjectID: (65/0)
(2/19) uniqueID: (65/0)
(3/19) followupCenter: (65/0)
(4/19) followupID: (65/0)
(5/19) center.orig: (65/0)
(6/19) subjectID.orig: (65/0)
(7/19) uniqueID.orig: (65/0)
(8/19) followupCenter.orig: (65/0)
(9/19) followupID.orig: (65/0)
(10/19) _flatten_index: (65/0)
(11/19) readmissionNumber: (65/0)
(12/19) readmissionNumber.orig: (65/0)
(13/19) readmissionTimePeriod: (65/0)
(14/19) readmissionTimePeriod.orig: (65/0)
(15/19) readmissionPrimaryCause: (65/0)
(16/19) readmissionPrimaryCause.orig: (65/0)
(17/19) readmissionPrimaryCauseOtherText: (65/0)
(18/19) readmissionPrimaryCauseOtherText.orig: (7/58)
