In [23]:
import pandas as pd
import numpy as np
import re
import os

In [24]:
result_dir = 'Results/'
data_dir = 'data/'

# Define parameters
dataset_name = 'epic_ce_ms'
methods = ['gso_1', 'gso_2', 'validation_score', 'one_sem_grd', 'one_sem']
output_excel = f'{dataset_name}_concat_results.xlsx'

list_of_csv = ['epic_ce_ms.csv','epic_lc_ms_neg.csv','epic_lc_ms_pos']

In [25]:
def concatenate_csv_files(directory, dataset_name, methods, output_excel):
    # List all files in the directory
    files = os.listdir(directory)
    
    # Filter for the relevant CSV files
    csv_files = [f for f in files if f.endswith('.csv') and dataset_name in f]
    
    # Initialize an empty list to hold DataFrames
    df_list = []
    
    # Loop over each CSV file
    for csv_file in csv_files:
        # Determine the method from the file name
        for method in methods:
            if method in csv_file:
                # Read the CSV file into a DataFrame
                df = pd.read_csv(os.path.join(directory, csv_file))
                # Add a column for the method
                df['inner_method'] = method
                # Append the DataFrame to the list
                df_list.append(df)
                break
    
    # Concatenate all DataFrames in the list
    concatenated_df = pd.concat(df_list, ignore_index=True)
    
    # Save the concatenated DataFrame to an Excel file
    concatenated_df.to_excel(output_excel, index=False)
    
    return concatenated_df

In [26]:
final_df = concatenate_csv_files(result_dir, dataset_name, methods, output_excel)

In [27]:
final_df.head()

Unnamed: 0,Estimator,Classifier,Outer_matthews_corrcoef,Max,Std,SEM,Median,Hyperparameters,Selected_Features,Numbers_of_Features,Way_of_Selection,Samples_classification_rates,roc_auc,accuracy,balanced_accuracy,recall,precision,f1,inner_method,Inner_Selection
0,CatBoostClassifier,CatBoostClassifier,"[0.8125, 0.8125, 0.6875, 0.8666666666666667, 0...",1.0,0.120862,0.017266,0.75,"[{'n_estimators': 242, 'learning_rate': 0.0942...",,329,full,"[1.0, 0.9, 0.2, 1.0, 0.9, 0.7, 1.0, 0.9, 1.0, ...",[0.828125 0.828125 0.71875 0.98214286 0...,[0.8125 0.8125 0.6875 0.86666667 0...,[0.8125 0.8125 0.6875 0.85714286 0...,[0.875 0.75 0.875 0.71428571 0...,[0.77777778 0.85714286 0.63636364 1. 0...,[0.82352941 0.8 0.73684211 0.83333333 0...,one_sem,
1,ElasticNet,ElasticNet,"[0.6875, 0.875, 0.625, 0.8, 0.6666666666666666...",0.9375,0.112394,0.016056,0.75,"[{'penalty': 'elasticnet', 'C': 0.709989172159...",,329,full,"[1.0, 1.0, 0.0, 1.0, 0.9, 0.9, 1.0, 0.5, 1.0, ...",[0.796875 0.890625 0.671875 0.94642857 0...,[0.6875 0.875 0.625 0.8 0...,[0.6875 0.875 0.625 0.79464286 0...,[0.875 1. 0.75 0.71428571 0...,[0.63636364 0.8 0.6 0.83333333 0...,[0.73684211 0.88888889 0.66666667 0.76923077 0...,one_sem,
2,GaussianNB,GaussianNB,"[0.5625, 0.6875, 0.5, 0.8, 0.4666666666666667,...",0.875,0.12507,0.017867,0.5,[{'var_smoothing': 4.5041112769241066e-08}\n {...,,329,full,"[0.9, 1.0, 0.1, 0.9, 0.0, 1.0, 1.0, 0.1, 0.7, ...",[0.578125 0.609375 0.59375 0.85714286 0...,[0.5625 0.6875 0.5 0.8 0...,[0.5625 0.6875 0.5 0.80357143 0...,[0.625 0.875 0.625 0.85714286 0...,[0.55555556 0.63636364 0.5 0.75 0...,[0.58823529 0.73684211 0.55555556 0.8 0...,one_sem,
3,GaussianProcessClassifier,GaussianProcessClassifier,"[0.6875, 0.5625, 0.5, 0.6, 0.5333333333333333,...",0.8,0.103405,0.014772,0.5,"[{'optimizer': None, 'max_iter_predict': 149, ...",,329,full,"[1.0, 0.8, 0.5, 0.3, 0.8, 0.7, 1.0, 0.0, 0.1, ...",[0.78125 0.65625 0.609375 0.71428571 0...,[0.6875 0.5625 0.5 0.6 0...,[0.6875 0.5625 0.5 0.61607143 0...,[0.75 0.5 0.375 0.85714286 0...,[0.66666667 0.57142857 0.5 0.54545455 0...,[0.70588235 0.53333333 0.42857143 0.66666667 0...,one_sem,
4,GradientBoostingClassifier,GradientBoostingClassifier,"[0.75, 0.75, 0.6875, 0.9333333333333333, 0.866...",1.0,0.105511,0.015073,0.75,"[{'loss': 'log_loss', 'learning_rate': 0.07865...",,329,full,"[0.9, 0.9, 0.2, 0.9, 0.9, 0.8, 1.0, 0.9, 1.0, ...",[0.875 0.875 0.8203125 0.96428571 0...,[0.75 0.75 0.6875 0.93333333 0...,[0.75 0.75 0.6875 0.9375 0...,[0.75 0.625 0.625 1. 0...,[0.75 0.83333333 0.71428571 0.875 0...,[0.75 0.71428571 0.66666667 0.93333333 0...,one_sem,


In [28]:
final_df.inner_method.unique()

array(['one_sem', 'validation_score', 'gso_1', 'one_sem_grd', 'gso_2'],
      dtype=object)

In [29]:
type(final_df)

pandas.core.frame.DataFrame