In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import os

import re
import time

import itertools

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 01. init

In [2]:
root_dir = '../'

In [3]:
COMBINE_harmonizer.init(f'{root_dir}/config.yaml')

out_dir = f"{cfg.config['out_dir']}/out-publish-normalized"

## 02. variables

In [4]:
filename_infos = COMBINE_harmonizer.FILENAME_INFOS

## 03. Functions

### 03-3. process filename info

In [5]:
def _process_filename_info(filename_info):
    # 1. read df
    filename = filename_info['name']
    full_filename = os.sep.join([out_dir, filename])
    df =  pd.read_csv(full_filename, dtype='O')
    df = df.fillna('')
    valid_columns = list(filter(lambda column: not column.endswith('.orig'), df.columns))
    df = df[valid_columns]

    # 2. prefix.
    prefix = COMBINE_harmonizer.flatten_filename_prefix(filename)
    print(f'[INFO] _process_filename_info: filename: {filename} prefix: {prefix}')

    if COMBINE_harmonizer.FLATTEN_INDEX not in df:
        # 4. with prefix
        df_with_prefix = COMBINE_harmonizer.flatten_columns_with_prefix(df.copy(), prefix)

        return df, df_with_prefix

    # 3. flatten.
    df_flatten = COMBINE_harmonizer.flatten(df)

    # 4. with prefix.
    df_flatten_with_prefix = COMBINE_harmonizer.flatten_columns_with_prefix(df_flatten, prefix)

    return df, df_flatten_with_prefix

## 04. Process

In [6]:
out_filename = os.sep.join([out_dir, 'zz-merged.xlsx'])
excel = pd.ExcelWriter(out_filename, engine='xlsxwriter')

df_merge = None
for idx, filename_info in enumerate(filename_infos):
    if not filename_info.get('is_merge', True):
        continue

    # 1. names
    filename = filename_info['name']
    prefix = COMBINE_harmonizer.flatten_filename_prefix(filename)
    sheet_name = COMBINE_harmonizer.flatten_sheet_name(filename)
    print(f"({idx}/{len(filename_infos)}) filename: {filename_info['name']} name: {prefix} sheet_name: {sheet_name}")

    # 2. process file info.
    # df for sheet in excel.
    # df_flatten for merging as the flattened csv.
    df, df_flatten = _process_filename_info(filename_info)

    # 3. to excel.
    print(f'({idx}/{len(filename_infos)}) to df.excel')
    df.to_excel(excel, sheet_name=sheet_name)
    print(f'({idx}/{len(filename_infos)}) after df.excel')

    # 4. to merge.
    print(f'({idx}/{len(filename_infos)}) to merge')
    if df_merge is None:
        df_merge = df_flatten
    else:
        df_merge = df_merge.merge(df_flatten, on=COMBINE_harmonizer.FLATTEN_MERGE_COLUMNS, how='outer')

    # 5. check that unique-id is still unique.
    df_groupby = df_merge.groupby(COMBINE_harmonizer.FLATTEN_MERGE_COLUMNS).agg(count=('_study', 'count'))
    is_invalid = df_groupby['count'] > 1
    print(f'({idx}/{len(filename_infos)}) after merge: groupby: {len(df_groupby)} is_invalid: {is_invalid.sum()}')

# save to csv.
out_filename = os.sep.join([out_dir, 'zz-merged-flatten.csv'])
df_merge.to_csv(out_filename, index=False)

# merge to excel.
print(f'merged columns: {len(df_merge.columns)}')
# df_merge = df_merge.transpose()
df_merge.to_excel(excel, sheet_name='flatten')

# close excel.
print('to excel.close')
excel.close()
print('done')

(2/73) filename: 01-02-screening.csv name: 01-02 sheet_name: 01-02-screening
[INFO] _process_filename_info: filename: 01-02-screening.csv prefix: 01-02
(2/73) to df.excel
(2/73) after df.excel
(2/73) to merge
(2/73) after merge: groupby: 532 is_invalid: 0
(3/73) filename: 01-03-maternal-demographics.csv name: 01-03 sheet_name: 01-03-maternal-demographics


[INFO] _process_filename_info: filename: 01-03-maternal-demographics.csv prefix: 01-03
(3/73) to df.excel
(3/73) after df.excel
(3/73) to merge
(3/73) after merge: groupby: 532 is_invalid: 0
(4/73) filename: 01-04-pregnancy-history.csv name: 01-04 sheet_name: 01-04-pregnancy-history
[INFO] _process_filename_info: filename: 01-04-pregnancy-history.csv prefix: 01-04
(4/73) to df.excel
(4/73) after df.excel
(4/73) to merge
(4/73) after merge: groupby: 532 is_invalid: 0
(5/73) filename: 01-05-labor-delivery.csv name: 01-05 sheet_name: 01-05-labor-delivery
[INFO] _process_filename_info: filename: 01-05-labor-delivery.csv prefix: 01-05
(5/73) to df.excel


(5/73) after df.excel
(5/73) to merge
(5/73) after merge: groupby: 532 is_invalid: 0
(6/73) filename: 01-05_1-pse.csv name: 01-05_1 sheet_name: 01-05_1-pse
[INFO] _process_filename_info: filename: 01-05_1-pse.csv prefix: 01-05_1
(6/73) to df.excel
(6/73) after df.excel
(6/73) to merge
(6/73) after merge: groupby: 532 is_invalid: 0
(7/73) filename: 01-05_2-emergency-csection.csv name: 01-05_2 sheet_name: 01-05_2-emergency-csection
[INFO] _process_filename_info: filename: 01-05_2-emergency-csection.csv prefix: 01-05_2
(7/73) to df.excel
(7/73) after df.excel
(7/73) to merge
(7/73) after merge: groupby: 532 is_invalid: 0
(8/73) filename: 01-06-birth.csv name: 01-06 sheet_name: 01-06-birth
[INFO] _process_filename_info: filename: 01-06-birth.csv prefix: 01-06
(8/73) to df.excel


(8/73) after df.excel
(8/73) to merge
(8/73) after merge: groupby: 532 is_invalid: 0
(9/73) filename: 01-07-pre-temperature.csv name: 01-07 sheet_name: 01-07-pre-temperature
[INFO] _process_filename_info: filename: 01-07-pre-temperature.csv prefix: 01-07
(9/73) to df.excel
(9/73) after df.excel
(9/73) to merge
(9/73) after merge: groupby: 532 is_invalid: 0
(10/73) filename: 01-08-pre-cardiovascular.csv name: 01-08 sheet_name: 01-08-pre-cardiovascular
[INFO] _process_filename_info: filename: 01-08-pre-cardiovascular.csv prefix: 01-08
(10/73) to df.excel
(10/73) after df.excel
(10/73) to merge
(10/73) after merge: groupby: 532 is_invalid: 0
(11/73) filename: 01-09-pre-infection.csv name: 01-09 sheet_name: 01-09-pre-infection


[INFO] _process_filename_info: filename: 01-09-pre-infection.csv prefix: 01-09
(11/73) to df.excel
(11/73) after df.excel
(11/73) to merge
(11/73) after merge: groupby: 532 is_invalid: 0
(12/73) filename: 01-10-pre-other-med.csv name: 01-10 sheet_name: 01-10-pre-other-med
[INFO] _process_filename_info: filename: 01-10-pre-other-med.csv prefix: 01-10
(12/73) to df.excel
(12/73) after df.excel
(12/73) to merge
(12/73) after merge: groupby: 532 is_invalid: 0
(13/73) filename: 01-11-pre-imaging.csv name: 01-11 sheet_name: 01-11-pre-imaging
[INFO] _process_filename_info: filename: 01-11-pre-imaging.csv prefix: 01-11
(13/73) to df.excel


(13/73) after df.excel
(13/73) to merge
(13/73) after merge: groupby: 532 is_invalid: 0
(14/73) filename: 01-12-neuro-exam.csv name: 01-12 sheet_name: 01-12-neuro-exam
[INFO] _process_filename_info: filename: 01-12-neuro-exam.csv prefix: 01-12
(14/73) to df.excel
(14/73) after df.excel
(14/73) to merge
(14/73) after merge: groupby: 532 is_invalid: 0
(15/73) filename: 01-12_1-total-modified-sarnat.csv name: 01-12_1 sheet_name: 01-12_1-total-modified-sarnat
[INFO] _process_filename_info: filename: 01-12_1-total-modified-sarnat.csv prefix: 01-12_1
(15/73) to df.excel
(15/73) after df.excel
(15/73) to merge
(15/73) after merge: groupby: 532 is_invalid: 0
(16/73) filename: 02-01-temperature.csv name: 02-01 sheet_name: 02-01-temperature


[INFO] _process_filename_info: filename: 02-01-temperature.csv prefix: 02-01
_flatten: to pivot_table: df: 24439


_flatten: after pivot_table: time: 4.988666772842407
_flatten: after to_flat_index
(16/73) to df.excel


(16/73) after df.excel
(16/73) to merge
(16/73) after merge: groupby: 532 is_invalid: 0
(17/73) filename: 02-02-cardiovascular.csv name: 02-02 sheet_name: 02-02-cardiovascular
[INFO] _process_filename_info: filename: 02-02-cardiovascular.csv prefix: 02-02
_flatten: to pivot_table: df: 13629


_flatten: after pivot_table: time: 2.5172550678253174
_flatten: after to_flat_index
(17/73) to df.excel


(17/73) after df.excel
(17/73) to merge
(17/73) after merge: groupby: 532 is_invalid: 0
(18/73) filename: 02-03-respiratory.csv name: 02-03 sheet_name: 02-03-respiratory
[INFO] _process_filename_info: filename: 02-03-respiratory.csv prefix: 02-03
_flatten: to pivot_table: df: 2794


_flatten: after pivot_table: time: 0.487382173538208
_flatten: after to_flat_index
(18/73) to df.excel
(18/73) after df.excel
(18/73) to merge
(18/73) after merge: groupby: 532 is_invalid: 0
(19/73) filename: 02-04-blood-gas.csv name: 02-04 sheet_name: 02-04-blood-gas


[INFO] _process_filename_info: filename: 02-04-blood-gas.csv prefix: 02-04
_flatten: to pivot_table: df: 10768


_flatten: after pivot_table: time: 2.780421018600464
_flatten: after to_flat_index
(19/73) to df.excel


(19/73) after df.excel
(19/73) to merge
(19/73) after merge: groupby: 532 is_invalid: 0
(20/73) filename: 02-05-hematology.csv name: 02-05 sheet_name: 02-05-hematology
[INFO] _process_filename_info: filename: 02-05-hematology.csv prefix: 02-05
_flatten: to pivot_table: df: 2742


_flatten: after pivot_table: time: 0.6123089790344238
_flatten: after to_flat_index
(20/73) to df.excel
(20/73) after df.excel
(20/73) to merge
(20/73) after merge: groupby: 532 is_invalid: 0
(21/73) filename: 02-05_s-hematology.csv name: 02-05_s sheet_name: 02-05_s-hematology


[INFO] _process_filename_info: filename: 02-05_s-hematology.csv prefix: 02-05_s
(21/73) to df.excel
(21/73) after df.excel
(21/73) to merge
(21/73) after merge: groupby: 532 is_invalid: 0
(22/73) filename: 02-06_s-blood-value.csv name: 02-06_s sheet_name: 02-06_s-blood-value
[INFO] _process_filename_info: filename: 02-06_s-blood-value.csv prefix: 02-06_s
(22/73) to df.excel
(22/73) after df.excel
(22/73) to merge
(22/73) after merge: groupby: 532 is_invalid: 0
(23/73) filename: 02-07-infection.csv name: 02-07 sheet_name: 02-07-infection


[INFO] _process_filename_info: filename: 02-07-infection.csv prefix: 02-07
_flatten: to pivot_table: df: 532
_flatten: after pivot_table: time: 0.15987014770507812
_flatten: after to_flat_index
(23/73) to df.excel
(23/73) after df.excel
(23/73) to merge


(23/73) after merge: groupby: 532 is_invalid: 0
(24/73) filename: 02-08-other-med.csv name: 02-08 sheet_name: 02-08-other-med
[INFO] _process_filename_info: filename: 02-08-other-med.csv prefix: 02-08
_flatten: to pivot_table: df: 2757


_flatten: after pivot_table: time: 1.1092228889465332
_flatten: after to_flat_index
(24/73) to df.excel


(24/73) after df.excel
(24/73) to merge
(24/73) after merge: groupby: 532 is_invalid: 0
(25/73) filename: 02-09-imaging.csv name: 02-09 sheet_name: 02-09-imaging
[INFO] _process_filename_info: filename: 02-09-imaging.csv prefix: 02-09
_flatten: to pivot_table: df: 525


_flatten: after pivot_table: time: 0.40982604026794434
_flatten: after to_flat_index
(25/73) to df.excel
(25/73) after df.excel
(25/73) to merge
(25/73) after merge: groupby: 532 is_invalid: 0
(26/73) filename: 02-11-elevated-temperature.csv name: 02-11 sheet_name: 02-11-elevated-temperature
[INFO] _process_filename_info: filename: 02-11-elevated-temperature.csv prefix: 02-11
_flatten: to pivot_table: df: 445


_flatten: after pivot_table: time: 0.15220284461975098
_flatten: after to_flat_index
(26/73) to df.excel
(26/73) after df.excel
(26/73) to merge
(26/73) after merge: groupby: 532 is_invalid: 0
(27/73) filename: 02-12-fluctuated-temperature.csv name: 02-12 sheet_name: 02-12-fluctuated-temperature
[INFO] _process_filename_info: filename: 02-12-fluctuated-temperature.csv prefix: 02-12
_flatten: to pivot_table: df: 54
_flatten: after pivot_table: time: 0.012452840805053711
_flatten: after to_flat_index
(27/73) to df.excel
(27/73) after df.excel
(27/73) to merge
(27/73) after merge: groupby: 532 is_invalid: 0
(28/73) filename: 02-13-bradycardia.csv name: 02-13 sheet_name: 02-13-bradycardia
[INFO] _process_filename_info: filename: 02-13-bradycardia.csv prefix: 02-13
_flatten: to pivot_table: df: 210
_flatten: after pivot_table: time: 0.04180598258972168
_flatten: after to_flat_index
(28/73) to df.excel
(28/73) after df.excel
(28/73) to merge
(28/73) after merge: groupby: 532 is_invalid: 0
(2

[INFO] _process_filename_info: filename: 02-14-adverse-event.csv prefix: 02-14
_flatten: to pivot_table: df: 111
_flatten: after pivot_table: time: 0.11860871315002441
_flatten: after to_flat_index
(29/73) to df.excel
(29/73) after df.excel
(29/73) to merge
(29/73) after merge: groupby: 532 is_invalid: 0
(30/73) filename: 02-15-violation.csv name: 02-15 sheet_name: 02-15-violation
[INFO] _process_filename_info: filename: 02-15-violation.csv prefix: 02-15
_flatten: to pivot_table: df: 156
_flatten: after pivot_table: time: 0.030316829681396484
_flatten: after to_flat_index
(30/73) to df.excel


(30/73) after df.excel
(30/73) to merge
(30/73) after merge: groupby: 532 is_invalid: 0
(31/73) filename: 02-16-interrupt.csv name: 02-16 sheet_name: 02-16-interrupt
[INFO] _process_filename_info: filename: 02-16-interrupt.csv prefix: 02-16
_flatten: to pivot_table: df: 249
_flatten: after pivot_table: time: 0.04612994194030762
_flatten: after to_flat_index
(31/73) to df.excel
(31/73) after df.excel
(31/73) to merge
(31/73) after merge: groupby: 532 is_invalid: 0
(32/73) filename: 02-17-discontinue.csv name: 02-17 sheet_name: 02-17-discontinue
[INFO] _process_filename_info: filename: 02-17-discontinue.csv prefix: 02-17
(32/73) to df.excel
(32/73) after df.excel
(32/73) to merge
(32/73) after merge: groupby: 532 is_invalid: 0
(33/73) filename: 03-01-post-temperature.csv name: 03-01 sheet_name: 03-01-post-temperature


[INFO] _process_filename_info: filename: 03-01-post-temperature.csv prefix: 03-01
_flatten: to pivot_table: df: 2333


_flatten: after pivot_table: time: 0.3058178424835205
_flatten: after to_flat_index
(33/73) to df.excel
(33/73) after df.excel
(33/73) to merge
(33/73) after merge: groupby: 532 is_invalid: 0
(34/73) filename: 03-01_s-post-temperature.csv name: 03-01_s sheet_name: 03-01_s-post-temperature
[INFO] _process_filename_info: filename: 03-01_s-post-temperature.csv prefix: 03-01_s
(34/73) to df.excel
(34/73) after df.excel
(34/73) to merge
(34/73) after merge: groupby: 532 is_invalid: 0
(35/73) filename: 03-02-post-blood-value.csv name: 03-02 sheet_name: 03-02-post-blood-value


[INFO] _process_filename_info: filename: 03-02-post-blood-value.csv prefix: 03-02
(35/73) to df.excel
(35/73) after df.excel
(35/73) to merge
(35/73) after merge: groupby: 532 is_invalid: 0
(36/73) filename: 03-03-post-imaging.csv name: 03-03 sheet_name: 03-03-post-imaging
[INFO] _process_filename_info: filename: 03-03-post-imaging.csv prefix: 03-03
(36/73) to df.excel
(36/73) after df.excel
(36/73) to merge
(36/73) after merge: groupby: 532 is_invalid: 0
(37/73) filename: 03-04-post-neuro-exam.csv name: 03-04 sheet_name: 03-04-post-neuro-exam
[INFO] _process_filename_info: filename: 03-04-post-neuro-exam.csv prefix: 03-04
(37/73) to df.excel


(37/73) after df.excel
(37/73) to merge
(37/73) after merge: groupby: 532 is_invalid: 0
(38/73) filename: 03-04_1-total-modified-sarnat.csv name: 03-04_1 sheet_name: 03-04_1-total-modified-sarnat
[INFO] _process_filename_info: filename: 03-04_1-total-modified-sarnat.csv prefix: 03-04_1
(38/73) to df.excel
(38/73) after df.excel
(38/73) to merge
(38/73) after merge: groupby: 532 is_invalid: 0
(39/73) filename: 03-05-mri.csv name: 03-05 sheet_name: 03-05-mri
[INFO] _process_filename_info: filename: 03-05-mri.csv prefix: 03-05
_flatten: to pivot_table: df: 915


_flatten: after pivot_table: time: 1.830894947052002
_flatten: after to_flat_index
(39/73) to df.excel


(39/73) after df.excel
(39/73) to merge
(39/73) after merge: groupby: 532 is_invalid: 0
(40/73) filename: 03-05_s-mri.csv name: 03-05_s sheet_name: 03-05_s-mri
[INFO] _process_filename_info: filename: 03-05_s-mri.csv prefix: 03-05_s
(40/73) to df.excel


(40/73) after df.excel
(40/73) to merge
(40/73) after merge: groupby: 532 is_invalid: 0
(41/73) filename: 03-05_s1-mri.csv name: 03-05_s1 sheet_name: 03-05_s1-mri
[INFO] _process_filename_info: filename: 03-05_s1-mri.csv prefix: 03-05_s1
(41/73) to df.excel
(41/73) after df.excel
(41/73) to merge
(41/73) after merge: groupby: 532 is_invalid: 0
(42/73) filename: 04-01-status.csv name: 04-01 sheet_name: 04-01-status
[INFO] _process_filename_info: filename: 04-01-status.csv prefix: 04-01
(42/73) to df.excel
(42/73) after df.excel
(42/73) to merge
(42/73) after merge: groupby: 532 is_invalid: 0
(43/73) filename: 04-01_1-length-of-stay.csv name: 04-01_1 sheet_name: 04-01_1-length-of-stay


[INFO] _process_filename_info: filename: 04-01_1-length-of-stay.csv prefix: 04-01_1
(43/73) to df.excel
(43/73) after df.excel
(43/73) to merge
(43/73) after merge: groupby: 532 is_invalid: 0
(44/73) filename: 04-02-cardiovascular.csv name: 04-02 sheet_name: 04-02-cardiovascular
[INFO] _process_filename_info: filename: 04-02-cardiovascular.csv prefix: 04-02
(44/73) to df.excel
(44/73) after df.excel
(44/73) to merge
(44/73) after merge: groupby: 532 is_invalid: 0
(45/73) filename: 04-03-respiratory.csv name: 04-03 sheet_name: 04-03-respiratory
[INFO] _process_filename_info: filename: 04-03-respiratory.csv prefix: 04-03
(45/73) to df.excel
(45/73) after df.excel
(45/73) to merge
(45/73) after merge: groupby: 532 is_invalid: 0
(46/73) filename: 04-04-hematology.csv name: 04-04 sheet_name: 04-04-hematology


[INFO] _process_filename_info: filename: 04-04-hematology.csv prefix: 04-04
(46/73) to df.excel
(46/73) after df.excel
(46/73) to merge
(46/73) after merge: groupby: 532 is_invalid: 0
(47/73) filename: 04-05-metabolic.csv name: 04-05 sheet_name: 04-05-metabolic
[INFO] _process_filename_info: filename: 04-05-metabolic.csv prefix: 04-05
(47/73) to df.excel
(47/73) after df.excel
(47/73) to merge
(47/73) after merge: groupby: 532 is_invalid: 0
(48/73) filename: 04-06-renal.csv name: 04-06 sheet_name: 04-06-renal
[INFO] _process_filename_info: filename: 04-06-renal.csv prefix: 04-06
(48/73) to df.excel
(48/73) after df.excel
(48/73) to merge
(48/73) after merge: groupby: 532 is_invalid: 0
(49/73) filename: 04-07-gastrointestinal.csv name: 04-07 sheet_name: 04-07-gastrointestinal
[INFO] _process_filename_info: filename: 04-07-gastrointestinal.csv prefix: 04-07
(49/73) to df.excel
(49/73) after df.excel
(49/73) to merge
(49/73) after merge: groupby: 532 is_invalid: 0
(50/73) filename: 04-08-

[INFO] _process_filename_info: filename: 04-08-skin.csv prefix: 04-08
(50/73) to df.excel
(50/73) after df.excel
(50/73) to merge
(50/73) after merge: groupby: 532 is_invalid: 0
(51/73) filename: 04-09-auditory.csv name: 04-09 sheet_name: 04-09-auditory
[INFO] _process_filename_info: filename: 04-09-auditory.csv prefix: 04-09
(51/73) to df.excel
(51/73) after df.excel
(51/73) to merge
(51/73) after merge: groupby: 532 is_invalid: 0
(52/73) filename: 04-10-surgery.csv name: 04-10 sheet_name: 04-10-surgery
[INFO] _process_filename_info: filename: 04-10-surgery.csv prefix: 04-10
(52/73) to df.excel
(52/73) after df.excel
(52/73) to merge
(52/73) after merge: groupby: 532 is_invalid: 0
(53/73) filename: 04-11-infection.csv name: 04-11 sheet_name: 04-11-infection
[INFO] _process_filename_info: filename: 04-11-infection.csv prefix: 04-11
(53/73) to df.excel


(53/73) after df.excel
(53/73) to merge
(53/73) after merge: groupby: 532 is_invalid: 0
(54/73) filename: 04-12-neuro-exam.csv name: 04-12 sheet_name: 04-12-neuro-exam
[INFO] _process_filename_info: filename: 04-12-neuro-exam.csv prefix: 04-12
(54/73) to df.excel
(54/73) after df.excel
(54/73) to merge
(54/73) after merge: groupby: 532 is_invalid: 0
(55/73) filename: 04-12_1-total-modified-sarnat.csv name: 04-12_1 sheet_name: 04-12_1-total-modified-sarnat
[INFO] _process_filename_info: filename: 04-12_1-total-modified-sarnat.csv prefix: 04-12_1
(55/73) to df.excel
(55/73) after df.excel
(55/73) to merge
(55/73) after merge: groupby: 532 is_invalid: 0
(56/73) filename: 04-13-seizure.csv name: 04-13 sheet_name: 04-13-seizure


[INFO] _process_filename_info: filename: 04-13-seizure.csv prefix: 04-13
(56/73) to df.excel
(56/73) after df.excel
(56/73) to merge
(56/73) after merge: groupby: 532 is_invalid: 0
(57/73) filename: 04-14-birth-defect.csv name: 04-14 sheet_name: 04-14-birth-defect
[INFO] _process_filename_info: filename: 04-14-birth-defect.csv prefix: 04-14
(57/73) to df.excel
(57/73) after df.excel
(57/73) to merge
(57/73) after merge: groupby: 532 is_invalid: 0
(58/73) filename: 04-15-home-therapy.csv name: 04-15 sheet_name: 04-15-home-therapy
[INFO] _process_filename_info: filename: 04-15-home-therapy.csv prefix: 04-15
(58/73) to df.excel
(58/73) after df.excel
(58/73) to merge
(58/73) after merge: groupby: 532 is_invalid: 0
(59/73) filename: 04-16-wdraw-support.csv name: 04-16 sheet_name: 04-16-wdraw-support


[INFO] _process_filename_info: filename: 04-16-wdraw-support.csv prefix: 04-16
(59/73) to df.excel
(59/73) after df.excel
(59/73) to merge
(59/73) after merge: groupby: 532 is_invalid: 0
(60/73) filename: 04-17-limit-care.csv name: 04-17 sheet_name: 04-17-limit-care
[INFO] _process_filename_info: filename: 04-17-limit-care.csv prefix: 04-17
(60/73) to df.excel
(60/73) after df.excel
(60/73) to merge
(60/73) after merge: groupby: 532 is_invalid: 0
(61/73) filename: 20-00-follow-up.csv name: 20-00 sheet_name: 20-00-follow-up
[INFO] _process_filename_info: filename: 20-00-follow-up.csv prefix: 20-00
(61/73) to df.excel
(61/73) after df.excel
(61/73) to merge
(61/73) after merge: groupby: 532 is_invalid: 0
(62/73) filename: 20-01-ses.csv name: 20-01 sheet_name: 20-01-ses


[INFO] _process_filename_info: filename: 20-01-ses.csv prefix: 20-01
(62/73) to df.excel
(62/73) after df.excel
(62/73) to merge
(62/73) after merge: groupby: 532 is_invalid: 0
(63/73) filename: 20-02-medical-history.csv name: 20-02 sheet_name: 20-02-medical-history
[INFO] _process_filename_info: filename: 20-02-medical-history.csv prefix: 20-02
(63/73) to df.excel


(63/73) after df.excel
(63/73) to merge
(63/73) after merge: groupby: 532 is_invalid: 0
(64/73) filename: 20-03-medical-exam.csv name: 20-03 sheet_name: 20-03-medical-exam
[INFO] _process_filename_info: filename: 20-03-medical-exam.csv prefix: 20-03
(64/73) to df.excel


(64/73) after df.excel
(64/73) to merge
(64/73) after merge: groupby: 532 is_invalid: 0
(65/73) filename: 20-04-bayley-iii.csv name: 20-04 sheet_name: 20-04-bayley-iii
[INFO] _process_filename_info: filename: 20-04-bayley-iii.csv prefix: 20-04
(65/73) to df.excel
(65/73) after df.excel
(65/73) to merge
(65/73) after merge: groupby: 532 is_invalid: 0
(66/73) filename: 20-05-gmfcs.csv name: 20-05 sheet_name: 20-05-gmfcs
[INFO] _process_filename_info: filename: 20-05-gmfcs.csv prefix: 20-05
(66/73) to df.excel
(66/73) after df.excel
(66/73) to merge
(66/73) after merge: groupby: 532 is_invalid: 0
(67/73) filename: 20-06-status.csv name: 20-06 sheet_name: 20-06-status


[INFO] _process_filename_info: filename: 20-06-status.csv prefix: 20-06
(67/73) to df.excel
(67/73) after df.excel
(67/73) to merge
(67/73) after merge: groupby: 532 is_invalid: 0
(68/73) filename: 20-07-readmission.csv name: 20-07 sheet_name: 20-07-readmission
[INFO] _process_filename_info: filename: 20-07-readmission.csv prefix: 20-07
_flatten: to pivot_table: df: 212
_flatten: after pivot_table: time: 0.03120899200439453
_flatten: after to_flat_index
(68/73) to df.excel
(68/73) after df.excel
(68/73) to merge
(68/73) after merge: groupby: 532 is_invalid: 0
(69/73) filename: 20-08-lost.csv name: 20-08 sheet_name: 20-08-lost
[INFO] _process_filename_info: filename: 20-08-lost.csv prefix: 20-08
(69/73) to df.excel


(69/73) after df.excel
(69/73) to merge
(69/73) after merge: groupby: 532 is_invalid: 0
(70/73) filename: 20-09-secondary.csv name: 20-09 sheet_name: 20-09-secondary
[INFO] _process_filename_info: filename: 20-09-secondary.csv prefix: 20-09
(70/73) to df.excel
(70/73) after df.excel
(70/73) to merge
(70/73) after merge: groupby: 532 is_invalid: 0
(71/73) filename: 20-10-outcome.csv name: 20-10 sheet_name: 20-10-outcome


[INFO] _process_filename_info: filename: 20-10-outcome.csv prefix: 20-10
(71/73) to df.excel
(71/73) after df.excel
(71/73) to merge
(71/73) after merge: groupby: 532 is_invalid: 0
(72/73) filename: 20-10_1-disability-level-death.csv name: 20-10_1 sheet_name: 20-10_1-disability-level-death
[INFO] _process_filename_info: filename: 20-10_1-disability-level-death.csv prefix: 20-10_1
(72/73) to df.excel
(72/73) after df.excel
(72/73) to merge
(72/73) after merge: groupby: 532 is_invalid: 0


merged columns: 6119


to excel.close


done
