# Merge Multiple Pandas DataFrames from .csv Files into a Single .csv File

## Default Values for Papermill Parameters

In [1]:
PARAM_DF_DIR = "../outputs"
PARAM_DF_FILENAMES = ["sd_result_set_average_ranking_loss.csv"]
PARAM_MERGE_RESULT_FILENAME = "merged_result_set.csv"

## Import and Set Parameters

In [2]:
from subroc import util
import pandas as pd
import os
import re

# fill environment variables into params
PARAM_DF_DIR = util.prepend_experiment_output_path(PARAM_DF_DIR)

if PARAM_DF_FILENAMES is None:
    # get all csv filenames in PARAM_DF_DIR
    PARAM_DF_FILENAMES = [filename for filename in os.listdir(PARAM_DF_DIR)
                   if os.path.isfile(os.path.join(PARAM_DF_DIR, filename))
                   and re.match(".*\.csv$", filename)]

# get environment variables
STAGE_OUTPUT_PATH = os.environ.get('STAGE_OUTPUT_PATH', '../outputs')

## Read the DataFrames

In [3]:
dfs = []

for df_filename in PARAM_DF_FILENAMES:
    df = pd.read_csv(os.path.join(PARAM_DF_DIR, df_filename))
    df["pre_merge_filename"] = [df_filename]*len(df)
    dfs.append(df)

## Merge the DataFrames

In [4]:
merged_df = pd.concat(dfs, ignore_index=True)

## Write the Merge Result

In [5]:
merged_df.to_csv(f"{STAGE_OUTPUT_PATH}/{PARAM_MERGE_RESULT_FILENAME}", index=False)