In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 01. init

In [2]:
root_dir = '../'

In [3]:
COMBINE_harmonizer.init(f'{root_dir}/config.yaml')

In [4]:
data_dict_filename = f"{root_dir}/{COMBINE_harmonizer.DATA_DICTIONARY_EXCEL}"
out_dir = cfg.config['out_dir']


### 01-01. init numeric value mapping

In [5]:
COMBINE_harmonizer.init_rank_mapping(data_dict_filename)

_NUMERIC_VALUE_MAP_MAIN = COMBINE_harmonizer.build_numeric_value_map(data_dict_filename, COMBINE_harmonizer.SHEET_MAIN)

build_numeric_value_map: (0/1108) variable: center type: center
[INFO] _build_numeric_value_map: to inv-text: type: center
build_numeric_value_map: (1/1108) variable: subjectID type: text
[INFO] _build_numeric_value_map: to inv-text: type: text
build_numeric_value_map: (2/1108) variable: siteID type: text
[INFO] _build_numeric_value_map: to inv-text: type: text
build_numeric_value_map: (3/1108) variable: birthDate type: date
[INFO] _build_numeric_value_map: to inv-text: type: date
build_numeric_value_map: (4/1108) variable: birthNumber type: int
build_numeric_value_map: (5/1108) variable: screenComment type: text
[INFO] _build_numeric_value_map: to inv-text: type: text
build_numeric_value_map: (6/1108) variable: coreTempLess32p5CGreaterEq2Hr_e type: bool
build_numeric_value_map: (7/1108) variable: coreTempLess33p5CGreater1Hr_e type: bool
build_numeric_value_map: (8/1108) variable: coreTempLess34CGreater1Hr_e type: bool
build_numeric_value_map: (9/1108) variable: first6HrCoolByClinicalP

## 02-00. Total Modified Sarnat Scores

In [6]:
def _total_modified_sarnat_scores(base_filename, prefix):
    base_core_columns = [
        f'{prefix}NeuroExamLevelConsciousnessScore',
        f'{prefix}NeuroExamSpontaneousActivityScore',
        f'{prefix}NeuroExamPostureScore',
        f'{prefix}NeuroExamToneScore',
    ]
    base_reflex_columns = [
        f'{prefix}NeuroExamSuckScore',
        f'{prefix}NeuroExamMoroScore',
    ]
    base_ans_columns = [
        f'{prefix}NeuroExamPupilsScore',
        f'{prefix}NeuroExamHeartRateScore',
        f'{prefix}NeuroExamRespirationScore',
    ]

    base_columns = list(map(lambda x: x[:-5], base_core_columns + base_reflex_columns + base_ans_columns))
    total_modified_sarnat_score_columns = base_core_columns + [
        f'{prefix}NeuroExamReflexScore',
        f'{prefix}NeuroExamANSScore',
    ]

    full_filename = f'{out_dir}/out-merged-normalized/{base_filename}'

    df = pd.read_csv(full_filename)

    columns = COMBINE_harmonizer.RESERVED_COLUMNS + base_columns

    df = COMBINE_harmonizer.valid_columns(df, columns)

    df_numeric = COMBINE_harmonizer.numeric_values(df, _NUMERIC_VALUE_MAP_MAIN)

    column_map = {
        f'{prefix}NeuroExamLevelConsciousness':  f'{prefix}NeuroExamLevelConsciousnessScore',
        f'{prefix}NeuroExamSpontaneousActivity': f'{prefix}NeuroExamSpontaneousActivityScore',
        f'{prefix}NeuroExamPosture': f'{prefix}NeuroExamPostureScore',
        f'{prefix}NeuroExamTone': f'{prefix}NeuroExamToneScore',

        f'{prefix}NeuroExamSuck': f'{prefix}NeuroExamSuckScore',
        f'{prefix}NeuroExamMoro': f'{prefix}NeuroExamMoroScore',

        f'{prefix}NeuroExamPupils': f'{prefix}NeuroExamPupilsScore',
        f'{prefix}NeuroExamHeartRate': f'{prefix}NeuroExamHeartRateScore',
        f'{prefix}NeuroExamRespiration': f'{prefix}NeuroExamRespirationScore',
    }

    df_numeric = df_numeric.rename(columns=column_map)

    # if any neuro exam score: default to 1 (normal) for the rest of the scores.
    is_invalid_level_consciousness = df_numeric[f'{prefix}NeuroExamLevelConsciousnessScore'].isnull()
    is_invalid_spontaneous_activity = df_numeric[f'{prefix}NeuroExamSpontaneousActivityScore'].isnull()
    is_invalid_posture = df_numeric[f'{prefix}NeuroExamPostureScore'].isnull()
    is_invalid_tone = df_numeric[f'{prefix}NeuroExamToneScore'].isnull()
    is_invalid_suck = df_numeric[f'{prefix}NeuroExamSuckScore'].isnull()
    is_invalid_moro = df_numeric[f'{prefix}NeuroExamMoroScore'].isnull()
    is_invalid_pupils = df_numeric[f'{prefix}NeuroExamPupilsScore'].isnull()
    is_invalid_heart_rate = df_numeric[f'{prefix}NeuroExamHeartRateScore'].isnull()
    is_invalid_respiration = df_numeric[f'{prefix}NeuroExamRespirationScore'].isnull()
    is_invalid_all = is_invalid_level_consciousness & is_invalid_spontaneous_activity & is_invalid_posture & is_invalid_tone & is_invalid_suck & is_invalid_moro & is_invalid_pupils & is_invalid_heart_rate & is_invalid_respiration
    is_valid_any = is_invalid_all == False

    df_numeric.loc[is_invalid_level_consciousness & is_valid_any, f'{prefix}NeuroExamLevelConsciousnessScore'] = 1
    df_numeric.loc[is_invalid_spontaneous_activity & is_valid_any, f'{prefix}NeuroExamSpontaneousActivityScore'] = 1
    df_numeric.loc[is_invalid_posture & is_valid_any, f'{prefix}NeuroExamPostureScore'] = 1
    df_numeric.loc[is_invalid_tone & is_valid_any, f'{prefix}NeuroExamToneScore'] = 1
    df_numeric.loc[is_invalid_suck & is_valid_any, f'{prefix}NeuroExamSuckScore'] = 1
    df_numeric.loc[is_invalid_moro & is_valid_any, f'{prefix}NeuroExamMoroScore'] = 1
    df_numeric.loc[is_invalid_pupils & is_valid_any, f'{prefix}NeuroExamPupilsScore'] = 1
    df_numeric.loc[is_invalid_heart_rate & is_valid_any, f'{prefix}NeuroExamHeartRateScore'] = 1
    df_numeric.loc[is_invalid_respiration & is_valid_any, f'{prefix}NeuroExamRespirationScore'] = 1

    df_numeric[f'{prefix}NeuroExamReflexScore'] = df_numeric.apply(lambda x: COMBINE_harmonizer.safe_max([x[column] for column in base_reflex_columns]), axis=1)

    df_numeric[f'{prefix}NeuroExamANSScore'] = df_numeric.apply(lambda x: COMBINE_harmonizer.safe_max([x[column] for column in base_ans_columns]), axis=1)

    df_numeric[f'{prefix}TotalModifiedSarnatScore'] = COMBINE_harmonizer.total_modified_sarnat_score(df_numeric, total_modified_sarnat_score_columns)
    df_numeric = COMBINE_harmonizer.postprocess(df_numeric)

    return df_numeric


## 02-01. Pre-intervention

In [7]:
base_filename = '01-12-neuro-exam.csv'
prefix = 'pre_'

df_pre = _total_modified_sarnat_scores(base_filename, prefix)

out_filename = f'{out_dir}/out-merged-normalized/01-12_1-total-modified-sarnat.csv'
df_pre.to_csv(out_filename, index=False)

(4/18) MRI_ID not in df
(5/18) followupCenter not in df
(6/18) followupID not in df
(7/18) uniqueFollowupID not in df
(8/18) _flatten_index not in df
[WARN] (0/13) not in numeric_value_map: _study
[INFO] (1/13) to numeric value: column: center var_name: center
[INFO] (2/13) to numeric value: column: subjectID var_name: subjectID
[WARN] (3/13) not in numeric_value_map: uniqueID
[INFO] (4/13) to numeric value: column: pre_NeuroExamLevelConsciousness var_name: pre_NeuroExamLevelConsciousness
[INFO] (5/13) to numeric value: column: pre_NeuroExamSpontaneousActivity var_name: pre_NeuroExamSpontaneousActivity
[INFO] (6/13) to numeric value: column: pre_NeuroExamPosture var_name: pre_NeuroExamPosture
[INFO] (7/13) to numeric value: column: pre_NeuroExamTone var_name: pre_NeuroExamTone
[INFO] (8/13) to numeric value: column: pre_NeuroExamSuck var_name: pre_NeuroExamSuck
[INFO] (9/13) to numeric value: column: pre_NeuroExamMoro var_name: pre_NeuroExamMoro
[INFO] (10/13) to numeric value: column:

## 02-02. Post-normothermia

In [8]:
base_filename = '03-04-post-neuro-exam.csv'
prefix = 'post_'

df_post = _total_modified_sarnat_scores(base_filename, prefix)

out_filename = f'{out_dir}/out-merged-normalized/03-04_1-total-modified-sarnat.csv'
df_post.to_csv(out_filename, index=False)

(4/18) MRI_ID not in df
(5/18) followupCenter not in df
(6/18) followupID not in df
(7/18) uniqueFollowupID not in df
(8/18) _flatten_index not in df
[WARN] (0/13) not in numeric_value_map: _study
[INFO] (1/13) to numeric value: column: center var_name: center
[INFO] (2/13) to numeric value: column: subjectID var_name: subjectID
[WARN] (3/13) not in numeric_value_map: uniqueID
[INFO] (4/13) to numeric value: column: post_NeuroExamLevelConsciousness var_name: post_NeuroExamLevelConsciousness
[INFO] (5/13) to numeric value: column: post_NeuroExamSpontaneousActivity var_name: post_NeuroExamSpontaneousActivity
[INFO] (6/13) to numeric value: column: post_NeuroExamPosture var_name: post_NeuroExamPosture
[INFO] (7/13) to numeric value: column: post_NeuroExamTone var_name: post_NeuroExamTone
[INFO] (8/13) to numeric value: column: post_NeuroExamSuck var_name: post_NeuroExamSuck
[INFO] (9/13) to numeric value: column: post_NeuroExamMoro var_name: post_NeuroExamMoro
[INFO] (10/13) to numeric va

[INFO] (3/6) column: post_NeuroExamToneScore (object) valid: (460/527)
[INFO] (4/6) column: post_NeuroExamReflexScore (float64) valid: (460/527)
[INFO] (5/6) column: post_NeuroExamANSScore (float64) valid: (460/527)


## 02-03. Discharge

In [9]:
base_filename = '04-12-neuro-exam.csv'
prefix = 'discharge'

df_discharge = _total_modified_sarnat_scores(base_filename, prefix)

out_filename = f'{out_dir}/out-merged-normalized/04-12_1-total-modified-sarnat.csv'
df_discharge.to_csv(out_filename, index=False)

(4/18) MRI_ID not in df
(5/18) followupCenter not in df
(6/18) followupID not in df
(7/18) uniqueFollowupID not in df
(8/18) _flatten_index not in df
[WARN] (0/13) not in numeric_value_map: _study
[INFO] (1/13) to numeric value: column: center var_name: center
[INFO] (2/13) to numeric value: column: subjectID var_name: subjectID
[WARN] (3/13) not in numeric_value_map: uniqueID
[INFO] (4/13) to numeric value: column: dischargeNeuroExamLevelConsciousness var_name: dischargeNeuroExamLevelConsciousness
[INFO] (5/13) to numeric value: column: dischargeNeuroExamSpontaneousActivity var_name: dischargeNeuroExamSpontaneousActivity
[INFO] (6/13) to numeric value: column: dischargeNeuroExamPosture var_name: dischargeNeuroExamPosture
[INFO] (7/13) to numeric value: column: dischargeNeuroExamTone var_name: dischargeNeuroExamTone
[INFO] (8/13) to numeric value: column: dischargeNeuroExamSuck var_name: dischargeNeuroExamSuck
[INFO] (9/13) to numeric value: column: dischargeNeuroExamMoro var_name: dis