In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import os

import re
import time

import itertools

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 01. init

In [2]:
root_dir = '../'

In [3]:
COMBINE_harmonizer.init(f'{root_dir}/config.yaml')

out_dir = f"{cfg.config['out_dir']}/out-publish-normalized"

## 02. variables

In [4]:
filename_infos = COMBINE_harmonizer.FILENAME_INFOS

## 03. Functions

### 03-3. process filename info

In [5]:
def _process_filename_info(filename_info):
    # 1. read df
    filename = filename_info['name']
    full_filename = os.sep.join([out_dir, filename])
    df =  pd.read_csv(full_filename, dtype='O')
    df = df.fillna('')
    valid_columns = list(filter(lambda column: not column.endswith('.orig'), df.columns))
    df = df[valid_columns]

    # 2. prefix.
    prefix = COMBINE_harmonizer.flatten_filename_prefix(filename)
    print(f'[INFO] _process_filename_info: filename: {filename} prefix: {prefix}')

    if COMBINE_harmonizer.FLATTEN_INDEX not in df:
        # 4. with prefix
        df_with_prefix = COMBINE_harmonizer.flatten_columns_with_prefix(df.copy(), prefix)

        return df, df_with_prefix

    # 3. flatten.
    df_flatten = COMBINE_harmonizer.flatten(df)

    # 4. with prefix.
    df_flatten_with_prefix = COMBINE_harmonizer.flatten_columns_with_prefix(df_flatten, prefix)

    return df, df_flatten_with_prefix

## 04. Process

In [6]:
out_filename = os.sep.join([out_dir, 'zz-merged.xlsx'])
excel = pd.ExcelWriter(out_filename, engine='xlsxwriter')

df_merge = None
for idx, filename_info in enumerate(filename_infos):
    if not filename_info.get('is_merge', True):
        continue

    # 1. names
    filename = filename_info['name']
    prefix = COMBINE_harmonizer.flatten_filename_prefix(filename)
    sheet_name = COMBINE_harmonizer.flatten_sheet_name(filename)
    print(f"({idx}/{len(filename_infos)}) filename: {filename_info['name']} name: {prefix} sheet_name: {sheet_name}")

    # 2. process file info.
    # df for sheet in excel.
    # df_flatten for merging as the flattened csv.
    df, df_flatten = _process_filename_info(filename_info)

    # 3. to excel.
    print(f'({idx}/{len(filename_infos)}) to df.excel')
    df.to_excel(excel, sheet_name=sheet_name)
    print(f'({idx}/{len(filename_infos)}) after df.excel')

    # 4. to merge.
    print(f'({idx}/{len(filename_infos)}) to merge')
    if df_merge is None:
        df_merge = df_flatten
    else:
        df_merge = df_merge.merge(df_flatten, on=COMBINE_harmonizer.FLATTEN_MERGE_COLUMNS, how='outer')

    # 5. check that unique-id is still unique.
    df_groupby = df_merge.groupby(COMBINE_harmonizer.FLATTEN_MERGE_COLUMNS).agg(count=('_study', 'count'))
    is_invalid = df_groupby['count'] > 1
    print(f'({idx}/{len(filename_infos)}) after merge: groupby: {len(df_groupby)} is_invalid: {is_invalid.sum()}')

# save to csv.
out_filename = os.sep.join([out_dir, 'zz-merged-flatten.csv'])
df_merge.to_csv(out_filename, index=False)

# merge to excel.
print(f'merged columns: {len(df_merge.columns)}')
# df_merge = df_merge.transpose()
df_merge.to_excel(excel, sheet_name='flatten')

# close excel.
print('to excel.close')
excel.close()
print('done')

(2/72) filename: 01-02-screening.csv name: 01-02 sheet_name: 01-02-screening
[INFO] _process_filename_info: filename: 01-02-screening.csv prefix: 01-02
(2/72) to df.excel
(2/72) after df.excel
(2/72) to merge
(2/72) after merge: groupby: 532 is_invalid: 0
(3/72) filename: 01-03-maternal-demographics.csv name: 01-03 sheet_name: 01-03-maternal-demographics
[INFO] _process_filename_info: filename: 01-03-maternal-demographics.csv prefix: 01-03
(3/72) to df.excel
(3/72) after df.excel
(3/72) to merge


(3/72) after merge: groupby: 532 is_invalid: 0
(4/72) filename: 01-04-pregnancy-history.csv name: 01-04 sheet_name: 01-04-pregnancy-history
[INFO] _process_filename_info: filename: 01-04-pregnancy-history.csv prefix: 01-04
(4/72) to df.excel
(4/72) after df.excel
(4/72) to merge
(4/72) after merge: groupby: 532 is_invalid: 0
(5/72) filename: 01-05-labor-delivery.csv name: 01-05 sheet_name: 01-05-labor-delivery
[INFO] _process_filename_info: filename: 01-05-labor-delivery.csv prefix: 01-05
(5/72) to df.excel
(5/72) after df.excel
(5/72) to merge
(5/72) after merge: groupby: 532 is_invalid: 0
(6/72) filename: 01-06-birth.csv name: 01-06 sheet_name: 01-06-birth
[INFO] _process_filename_info: filename: 01-06-birth.csv prefix: 01-06
(6/72) to df.excel


(6/72) after df.excel
(6/72) to merge
(6/72) after merge: groupby: 532 is_invalid: 0
(7/72) filename: 01-07-pre-temperature.csv name: 01-07 sheet_name: 01-07-pre-temperature
[INFO] _process_filename_info: filename: 01-07-pre-temperature.csv prefix: 01-07
(7/72) to df.excel
(7/72) after df.excel
(7/72) to merge
(7/72) after merge: groupby: 532 is_invalid: 0
(8/72) filename: 01-08-pre-cardiovascular.csv name: 01-08 sheet_name: 01-08-pre-cardiovascular
[INFO] _process_filename_info: filename: 01-08-pre-cardiovascular.csv prefix: 01-08
(8/72) to df.excel
(8/72) after df.excel
(8/72) to merge
(8/72) after merge: groupby: 532 is_invalid: 0
(9/72) filename: 01-09-pre-infection.csv name: 01-09 sheet_name: 01-09-pre-infection
[INFO] _process_filename_info: filename: 01-09-pre-infection.csv prefix: 01-09
(9/72) to df.excel
(9/72) after df.excel
(9/72) to merge
(9/72) after merge: groupby: 532 is_invalid: 0
(10/72) filename: 01-10-pre-other-med.csv name: 01-10 sheet_name: 01-10-pre-other-med


[INFO] _process_filename_info: filename: 01-10-pre-other-med.csv prefix: 01-10
(10/72) to df.excel
(10/72) after df.excel
(10/72) to merge
(10/72) after merge: groupby: 532 is_invalid: 0
(11/72) filename: 01-11-pre-imaging.csv name: 01-11 sheet_name: 01-11-pre-imaging
[INFO] _process_filename_info: filename: 01-11-pre-imaging.csv prefix: 01-11
(11/72) to df.excel
(11/72) after df.excel
(11/72) to merge
(11/72) after merge: groupby: 532 is_invalid: 0
(12/72) filename: 01-12-neuro-exam.csv name: 01-12 sheet_name: 01-12-neuro-exam
[INFO] _process_filename_info: filename: 01-12-neuro-exam.csv prefix: 01-12
(12/72) to df.excel
(12/72) after df.excel
(12/72) to merge


(12/72) after merge: groupby: 532 is_invalid: 0
(13/72) filename: 02-01-temperature.csv name: 02-01 sheet_name: 02-01-temperature
[INFO] _process_filename_info: filename: 02-01-temperature.csv prefix: 02-01
_flatten: to pivot_table: df: 24439


_flatten: after pivot_table: time: 5.083479166030884
_flatten: after to_flat_index
(13/72) to df.excel


(13/72) after df.excel
(13/72) to merge
(13/72) after merge: groupby: 532 is_invalid: 0
(14/72) filename: 02-02-cardiovascular.csv name: 02-02 sheet_name: 02-02-cardiovascular
[INFO] _process_filename_info: filename: 02-02-cardiovascular.csv prefix: 02-02
_flatten: to pivot_table: df: 13629


_flatten: after pivot_table: time: 2.5332798957824707
_flatten: after to_flat_index
(14/72) to df.excel


(14/72) after df.excel
(14/72) to merge
(14/72) after merge: groupby: 532 is_invalid: 0
(15/72) filename: 02-03-respiratory.csv name: 02-03 sheet_name: 02-03-respiratory
[INFO] _process_filename_info: filename: 02-03-respiratory.csv prefix: 02-03
_flatten: to pivot_table: df: 2794


_flatten: after pivot_table: time: 0.47173500061035156
_flatten: after to_flat_index
(15/72) to df.excel
(15/72) after df.excel
(15/72) to merge
(15/72) after merge: groupby: 532 is_invalid: 0
(16/72) filename: 02-04-blood-gas.csv name: 02-04 sheet_name: 02-04-blood-gas
[INFO] _process_filename_info: filename: 02-04-blood-gas.csv prefix: 02-04
_flatten: to pivot_table: df: 10768


_flatten: after pivot_table: time: 2.8220341205596924
_flatten: after to_flat_index
(16/72) to df.excel


(16/72) after df.excel
(16/72) to merge
(16/72) after merge: groupby: 532 is_invalid: 0
(17/72) filename: 02-05-hematology.csv name: 02-05 sheet_name: 02-05-hematology
[INFO] _process_filename_info: filename: 02-05-hematology.csv prefix: 02-05
_flatten: to pivot_table: df: 2742


_flatten: after pivot_table: time: 0.611903190612793
_flatten: after to_flat_index
(17/72) to df.excel


(17/72) after df.excel
(17/72) to merge
(17/72) after merge: groupby: 532 is_invalid: 0
(18/72) filename: 02-05_s-hematology.csv name: 02-05_s sheet_name: 02-05_s-hematology
[INFO] _process_filename_info: filename: 02-05_s-hematology.csv prefix: 02-05_s
(18/72) to df.excel
(18/72) after df.excel
(18/72) to merge
(18/72) after merge: groupby: 532 is_invalid: 0
(19/72) filename: 02-06_s-blood-value.csv name: 02-06_s sheet_name: 02-06_s-blood-value
[INFO] _process_filename_info: filename: 02-06_s-blood-value.csv prefix: 02-06_s
(19/72) to df.excel
(19/72) after df.excel
(19/72) to merge
(19/72) after merge: groupby: 532 is_invalid: 0
(20/72) filename: 02-07-infection.csv name: 02-07 sheet_name: 02-07-infection


[INFO] _process_filename_info: filename: 02-07-infection.csv prefix: 02-07
_flatten: to pivot_table: df: 532
_flatten: after pivot_table: time: 0.159027099609375
_flatten: after to_flat_index
(20/72) to df.excel
(20/72) after df.excel
(20/72) to merge
(20/72) after merge: groupby: 532 is_invalid: 0
(21/72) filename: 02-08-other-med.csv name: 02-08 sheet_name: 02-08-other-med


[INFO] _process_filename_info: filename: 02-08-other-med.csv prefix: 02-08
_flatten: to pivot_table: df: 2757


_flatten: after pivot_table: time: 1.0726828575134277
_flatten: after to_flat_index
(21/72) to df.excel


(21/72) after df.excel
(21/72) to merge
(21/72) after merge: groupby: 532 is_invalid: 0
(22/72) filename: 02-09-imaging.csv name: 02-09 sheet_name: 02-09-imaging
[INFO] _process_filename_info: filename: 02-09-imaging.csv prefix: 02-09
_flatten: to pivot_table: df: 525


_flatten: after pivot_table: time: 0.36118173599243164
_flatten: after to_flat_index
(22/72) to df.excel
(22/72) after df.excel
(22/72) to merge
(22/72) after merge: groupby: 532 is_invalid: 0
(23/72) filename: 02-11-elevated-temperature.csv name: 02-11 sheet_name: 02-11-elevated-temperature
[INFO] _process_filename_info: filename: 02-11-elevated-temperature.csv prefix: 02-11
_flatten: to pivot_table: df: 445


_flatten: after pivot_table: time: 0.12108683586120605
_flatten: after to_flat_index
(23/72) to df.excel
(23/72) after df.excel
(23/72) to merge
(23/72) after merge: groupby: 532 is_invalid: 0
(24/72) filename: 02-12-fluctuated-temperature.csv name: 02-12 sheet_name: 02-12-fluctuated-temperature
[INFO] _process_filename_info: filename: 02-12-fluctuated-temperature.csv prefix: 02-12
_flatten: to pivot_table: df: 54
_flatten: after pivot_table: time: 0.011566162109375
_flatten: after to_flat_index
(24/72) to df.excel
(24/72) after df.excel
(24/72) to merge
(24/72) after merge: groupby: 532 is_invalid: 0
(25/72) filename: 02-13-bradycardia.csv name: 02-13 sheet_name: 02-13-bradycardia
[INFO] _process_filename_info: filename: 02-13-bradycardia.csv prefix: 02-13
_flatten: to pivot_table: df: 210
_flatten: after pivot_table: time: 0.0409541130065918
_flatten: after to_flat_index
(25/72) to df.excel
(25/72) after df.excel
(25/72) to merge
(25/72) after merge: groupby: 532 is_invalid: 0
(26/72

(26/72) to df.excel
(26/72) after df.excel
(26/72) to merge
(26/72) after merge: groupby: 532 is_invalid: 0
(27/72) filename: 02-15-violation.csv name: 02-15 sheet_name: 02-15-violation
[INFO] _process_filename_info: filename: 02-15-violation.csv prefix: 02-15
_flatten: to pivot_table: df: 156
_flatten: after pivot_table: time: 0.027667999267578125
_flatten: after to_flat_index
(27/72) to df.excel
(27/72) after df.excel
(27/72) to merge
(27/72) after merge: groupby: 532 is_invalid: 0
(28/72) filename: 02-16-interrupt.csv name: 02-16 sheet_name: 02-16-interrupt
[INFO] _process_filename_info: filename: 02-16-interrupt.csv prefix: 02-16
_flatten: to pivot_table: df: 249
_flatten: after pivot_table: time: 0.04288196563720703
_flatten: after to_flat_index
(28/72) to df.excel
(28/72) after df.excel
(28/72) to merge
(28/72) after merge: groupby: 532 is_invalid: 0
(29/72) filename: 02-17-discontinue.csv name: 02-17 sheet_name: 02-17-discontinue
[INFO] _process_filename_info: filename: 02-17-di

(29/72) after df.excel
(29/72) to merge
(29/72) after merge: groupby: 532 is_invalid: 0
(30/72) filename: 03-01-post-temperature.csv name: 03-01 sheet_name: 03-01-post-temperature
[INFO] _process_filename_info: filename: 03-01-post-temperature.csv prefix: 03-01
_flatten: to pivot_table: df: 2333


_flatten: after pivot_table: time: 0.32451725006103516
_flatten: after to_flat_index
(30/72) to df.excel
(30/72) after df.excel
(30/72) to merge
(30/72) after merge: groupby: 532 is_invalid: 0
(31/72) filename: 03-01_s-post-temperature.csv name: 03-01_s sheet_name: 03-01_s-post-temperature
[INFO] _process_filename_info: filename: 03-01_s-post-temperature.csv prefix: 03-01_s
(31/72) to df.excel
(31/72) after df.excel
(31/72) to merge
(31/72) after merge: groupby: 532 is_invalid: 0
(32/72) filename: 03-02-post-blood-value.csv name: 03-02 sheet_name: 03-02-post-blood-value
[INFO] _process_filename_info: filename: 03-02-post-blood-value.csv prefix: 03-02
(32/72) to df.excel
(32/72) after df.excel
(32/72) to merge
(32/72) after merge: groupby: 532 is_invalid: 0
(33/72) filename: 03-03-post-imaging.csv name: 03-03 sheet_name: 03-03-post-imaging


[INFO] _process_filename_info: filename: 03-03-post-imaging.csv prefix: 03-03
(33/72) to df.excel
(33/72) after df.excel
(33/72) to merge
(33/72) after merge: groupby: 532 is_invalid: 0
(34/72) filename: 03-04-post-neuro-exam.csv name: 03-04 sheet_name: 03-04-post-neuro-exam
[INFO] _process_filename_info: filename: 03-04-post-neuro-exam.csv prefix: 03-04
(34/72) to df.excel
(34/72) after df.excel
(34/72) to merge
(34/72) after merge: groupby: 532 is_invalid: 0
(35/72) filename: 03-05-mri.csv name: 03-05 sheet_name: 03-05-mri
[INFO] _process_filename_info: filename: 03-05-mri.csv prefix: 03-05
_flatten: to pivot_table: df: 915


_flatten: after pivot_table: time: 1.7250490188598633
_flatten: after to_flat_index
(35/72) to df.excel


(35/72) after df.excel
(35/72) to merge
(35/72) after merge: groupby: 532 is_invalid: 0
(36/72) filename: 03-05_s-mri.csv name: 03-05_s sheet_name: 03-05_s-mri
[INFO] _process_filename_info: filename: 03-05_s-mri.csv prefix: 03-05_s
(36/72) to df.excel
(36/72) after df.excel
(36/72) to merge
(36/72) after merge: groupby: 532 is_invalid: 0
(37/72) filename: 04-01-status.csv name: 04-01 sheet_name: 04-01-status
[INFO] _process_filename_info: filename: 04-01-status.csv prefix: 04-01
(37/72) to df.excel
(37/72) after df.excel
(37/72) to merge
(37/72) after merge: groupby: 532 is_invalid: 0
(38/72) filename: 04-03-cardiovascular.csv name: 04-03 sheet_name: 04-03-cardiovascular
[INFO] _process_filename_info: filename: 04-03-cardiovascular.csv prefix: 04-03
(38/72) to df.excel


(38/72) after df.excel
(38/72) to merge
(38/72) after merge: groupby: 532 is_invalid: 0
(39/72) filename: 04-04-respiratory.csv name: 04-04 sheet_name: 04-04-respiratory
[INFO] _process_filename_info: filename: 04-04-respiratory.csv prefix: 04-04
(39/72) to df.excel
(39/72) after df.excel
(39/72) to merge
(39/72) after merge: groupby: 532 is_invalid: 0
(40/72) filename: 04-05-hematology.csv name: 04-05 sheet_name: 04-05-hematology
[INFO] _process_filename_info: filename: 04-05-hematology.csv prefix: 04-05
(40/72) to df.excel
(40/72) after df.excel
(40/72) to merge
(40/72) after merge: groupby: 532 is_invalid: 0
(41/72) filename: 04-06-metabolic.csv name: 04-06 sheet_name: 04-06-metabolic
[INFO] _process_filename_info: filename: 04-06-metabolic.csv prefix: 04-06
(41/72) to df.excel
(41/72) after df.excel
(41/72) to merge
(41/72) after merge: groupby: 532 is_invalid: 0
(42/72) filename: 04-07-renal.csv name: 04-07 sheet_name: 04-07-renal
[INFO] _process_filename_info: filename: 04-07-ren

[INFO] _process_filename_info: filename: 04-08-gastrointestinal.csv prefix: 04-08
(43/72) to df.excel
(43/72) after df.excel
(43/72) to merge
(43/72) after merge: groupby: 532 is_invalid: 0
(44/72) filename: 04-09-skin.csv name: 04-09 sheet_name: 04-09-skin
[INFO] _process_filename_info: filename: 04-09-skin.csv prefix: 04-09
(44/72) to df.excel
(44/72) after df.excel
(44/72) to merge
(44/72) after merge: groupby: 532 is_invalid: 0
(45/72) filename: 04-10-auditory.csv name: 04-10 sheet_name: 04-10-auditory
[INFO] _process_filename_info: filename: 04-10-auditory.csv prefix: 04-10
(45/72) to df.excel
(45/72) after df.excel
(45/72) to merge
(45/72) after merge: groupby: 532 is_invalid: 0
(46/72) filename: 04-11-surgery.csv name: 04-11 sheet_name: 04-11-surgery
[INFO] _process_filename_info: filename: 04-11-surgery.csv prefix: 04-11
(46/72) to df.excel
(46/72) after df.excel
(46/72) to merge
(46/72) after merge: groupby: 532 is_invalid: 0
(47/72) filename: 04-12-infection.csv name: 04-12 s

(47/72) after df.excel
(47/72) to merge
(47/72) after merge: groupby: 532 is_invalid: 0
(48/72) filename: 04-02-neuro-exam.csv name: 04-02 sheet_name: 04-02-neuro-exam
[INFO] _process_filename_info: filename: 04-02-neuro-exam.csv prefix: 04-02
(48/72) to df.excel
(48/72) after df.excel
(48/72) to merge
(48/72) after merge: groupby: 532 is_invalid: 0
(49/72) filename: 04-13-seizure.csv name: 04-13 sheet_name: 04-13-seizure
[INFO] _process_filename_info: filename: 04-13-seizure.csv prefix: 04-13
(49/72) to df.excel
(49/72) after df.excel
(49/72) to merge
(49/72) after merge: groupby: 532 is_invalid: 0
(50/72) filename: 04-14-birth-defect.csv name: 04-14 sheet_name: 04-14-birth-defect
[INFO] _process_filename_info: filename: 04-14-birth-defect.csv prefix: 04-14
(50/72) to df.excel
(50/72) after df.excel
(50/72) to merge
(50/72) after merge: groupby: 532 is_invalid: 0
(51/72) filename: 04-15-home-therapy.csv name: 04-15 sheet_name: 04-15-home-therapy


[INFO] _process_filename_info: filename: 04-15-home-therapy.csv prefix: 04-15
(51/72) to df.excel
(51/72) after df.excel
(51/72) to merge
(51/72) after merge: groupby: 532 is_invalid: 0
(52/72) filename: 04-16-wdraw-support.csv name: 04-16 sheet_name: 04-16-wdraw-support
[INFO] _process_filename_info: filename: 04-16-wdraw-support.csv prefix: 04-16
(52/72) to df.excel
(52/72) after df.excel
(52/72) to merge
(52/72) after merge: groupby: 532 is_invalid: 0
(53/72) filename: 04-17-limit-care.csv name: 04-17 sheet_name: 04-17-limit-care
[INFO] _process_filename_info: filename: 04-17-limit-care.csv prefix: 04-17
(53/72) to df.excel
(53/72) after df.excel
(53/72) to merge
(53/72) after merge: groupby: 532 is_invalid: 0
(54/72) filename: 20-00-follow-up.csv name: 20-00 sheet_name: 20-00-follow-up
[INFO] _process_filename_info: filename: 20-00-follow-up.csv prefix: 20-00
(54/72) to df.excel
(54/72) after df.excel
(54/72) to merge
(54/72) after merge: groupby: 532 is_invalid: 0
(55/72) filename

[INFO] _process_filename_info: filename: 20-01-ses.csv prefix: 20-01
(55/72) to df.excel
(55/72) after df.excel
(55/72) to merge
(55/72) after merge: groupby: 532 is_invalid: 0
(56/72) filename: 20-02-medical-history.csv name: 20-02 sheet_name: 20-02-medical-history
[INFO] _process_filename_info: filename: 20-02-medical-history.csv prefix: 20-02
(56/72) to df.excel


(56/72) after df.excel
(56/72) to merge
(56/72) after merge: groupby: 532 is_invalid: 0
(57/72) filename: 20-03-medical-exam.csv name: 20-03 sheet_name: 20-03-medical-exam
[INFO] _process_filename_info: filename: 20-03-medical-exam.csv prefix: 20-03
(57/72) to df.excel
(57/72) after df.excel
(57/72) to merge


(57/72) after merge: groupby: 532 is_invalid: 0
(58/72) filename: 20-04-bayley-iii.csv name: 20-04 sheet_name: 20-04-bayley-iii
[INFO] _process_filename_info: filename: 20-04-bayley-iii.csv prefix: 20-04
(58/72) to df.excel
(58/72) after df.excel
(58/72) to merge
(58/72) after merge: groupby: 532 is_invalid: 0
(59/72) filename: 20-05-gmfcs.csv name: 20-05 sheet_name: 20-05-gmfcs
[INFO] _process_filename_info: filename: 20-05-gmfcs.csv prefix: 20-05
(59/72) to df.excel
(59/72) after df.excel
(59/72) to merge
(59/72) after merge: groupby: 532 is_invalid: 0
(60/72) filename: 20-06-status.csv name: 20-06 sheet_name: 20-06-status
[INFO] _process_filename_info: filename: 20-06-status.csv prefix: 20-06
(60/72) to df.excel
(60/72) after df.excel
(60/72) to merge


(60/72) after merge: groupby: 532 is_invalid: 0
(61/72) filename: 20-07-readmission.csv name: 20-07 sheet_name: 20-07-readmission
[INFO] _process_filename_info: filename: 20-07-readmission.csv prefix: 20-07
_flatten: to pivot_table: df: 212
_flatten: after pivot_table: time: 0.030686140060424805
_flatten: after to_flat_index
(61/72) to df.excel
(61/72) after df.excel
(61/72) to merge
(61/72) after merge: groupby: 532 is_invalid: 0
(62/72) filename: 20-08-lost.csv name: 20-08 sheet_name: 20-08-lost
[INFO] _process_filename_info: filename: 20-08-lost.csv prefix: 20-08
(62/72) to df.excel
(62/72) after df.excel
(62/72) to merge
(62/72) after merge: groupby: 532 is_invalid: 0
(63/72) filename: 30-01-secondary.csv name: 30-01 sheet_name: 30-01-secondary


[INFO] _process_filename_info: filename: 30-01-secondary.csv prefix: 30-01
(63/72) to df.excel
(63/72) after df.excel
(63/72) to merge
(63/72) after merge: groupby: 532 is_invalid: 0
(64/72) filename: 30-02-outcome.csv name: 30-02 sheet_name: 30-02-outcome
[INFO] _process_filename_info: filename: 30-02-outcome.csv prefix: 30-02
(64/72) to df.excel
(64/72) after df.excel
(64/72) to merge
(64/72) after merge: groupby: 532 is_invalid: 0
(65/72) filename: 30-03-mri.csv name: 30-03 sheet_name: 30-03-mri
[INFO] _process_filename_info: filename: 30-03-mri.csv prefix: 30-03
(65/72) to df.excel


(65/72) after df.excel
(65/72) to merge
(65/72) after merge: groupby: 532 is_invalid: 0
(66/72) filename: 31-02-total-modified-sarnat.csv name: 31-02 sheet_name: 31-02-total-modified-sarnat
[INFO] _process_filename_info: filename: 31-02-total-modified-sarnat.csv prefix: 31-02
(66/72) to df.excel
(66/72) after df.excel
(66/72) to merge
(66/72) after merge: groupby: 532 is_invalid: 0
(67/72) filename: 31-03-mri.csv name: 31-03 sheet_name: 31-03-mri
[INFO] _process_filename_info: filename: 31-03-mri.csv prefix: 31-03
(67/72) to df.excel
(67/72) after df.excel
(67/72) to merge
(67/72) after merge: groupby: 532 is_invalid: 0
(68/72) filename: 31-04-pse.csv name: 31-04 sheet_name: 31-04-pse
[INFO] _process_filename_info: filename: 31-04-pse.csv prefix: 31-04
(68/72) to df.excel


(68/72) after df.excel
(68/72) to merge
(68/72) after merge: groupby: 532 is_invalid: 0
(69/72) filename: 31-05-disability-level-death.csv name: 31-05 sheet_name: 31-05-disability-level-death
[INFO] _process_filename_info: filename: 31-05-disability-level-death.csv prefix: 31-05
(69/72) to df.excel
(69/72) after df.excel
(69/72) to merge
(69/72) after merge: groupby: 532 is_invalid: 0
(70/72) filename: 31-06-emergency-csection.csv name: 31-06 sheet_name: 31-06-emergency-csection
[INFO] _process_filename_info: filename: 31-06-emergency-csection.csv prefix: 31-06
(70/72) to df.excel
(70/72) after df.excel
(70/72) to merge
(70/72) after merge: groupby: 532 is_invalid: 0
(71/72) filename: 31-07-length-of-stay.csv name: 31-07 sheet_name: 31-07-length-of-stay


[INFO] _process_filename_info: filename: 31-07-length-of-stay.csv prefix: 31-07
(71/72) to df.excel
(71/72) after df.excel
(71/72) to merge
(71/72) after merge: groupby: 532 is_invalid: 0


merged columns: 6131


to excel.close


done
