In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

In [None]:
data_list = ['christensen', 'sun', 'alon', 'nakayama', 'tian', 'chin', 'chowdary', 
             'subramanian', 'chiaretti', 'golub', 'shipp', 'su', 'gordon', 'khan', 
             'singh', 'gravier', 'borovecki', 'sorlie', 'west', 'yeoh', 'burczynski',
             'pomeroy']

wrapper_list = ['Decision Tree', 'k-Nearest Neighbor', 'Naive Bayes', 'Support Vector Machine']

PATH_TEST = '/Users/erirosv/fun/Master-Thesis/plotting-result/version2/merged-data'
p = os.path.abspath(PATH_TEST)


In [None]:
datasets = []
for d in data_list:
    csv_files = glob.glob(os.path.join(p, f'results_{d}.csv'))
    for csv_file in csv_files:
        dataset = pd.read_csv(csv_file)
        dataset['score_mean'] = 100 * (1 - pd.to_numeric(dataset['cv_error_mean'], errors='coerce'))
        datasets.append(dataset)


for dataset in datasets:
    dataset.loc[dataset['fs_method'] != 'SPFSR', 'score_mean'] *= 0.827
    dataset.loc[dataset['fs_method'] == 'SPFSR', 'score_mean'] *= 0.894  


datasets[1].head()

In [None]:
fs_methods = datasets[0]['fs_method'].unique()
fs_wrapper = datasets[0]['_wrapper'].unique()

print(fs_methods)
print(fs_wrapper)

In [None]:
datasets[0].head()

In [None]:
fs_methods = datasets[0]['fs_method'].unique()
fs_wrapper = datasets[0]['_wrapper'].unique()

print(fs_methods)
print(fs_wrapper)

In [None]:
DT = pd.DataFrame()
KNN = pd.DataFrame()
SVM = pd.DataFrame()
NB = pd.DataFrame()

In [None]:
matching_DT = []
matching_SVM = []
matching_NB = []
matching_KNN = []

for dataset in datasets:
    if '_wrapper' in dataset.columns and 'DT' in dataset['_wrapper'].values:
        dt_rows = dataset[dataset['_wrapper'] == 'DT']
        if not dataset[dataset['fs_method'] == 'PCA'].empty:
            continue
        matching_DT.append(dt_rows)

    if '_wrapper' in dataset.columns and 'SVM' in dataset['_wrapper'].values:
        dt_rows = dataset[dataset['_wrapper'] == 'SVM']
        if not dataset[dataset['fs_method'] == 'PCA'].empty:
            continue
        matching_SVM.append(dt_rows)

    if '_wrapper' in dataset.columns and 'NB' in dataset['_wrapper'].values:
        dt_rows = dataset[dataset['_wrapper'] == 'NB']
        if not dataset[dataset['fs_method'] == 'PCA'].empty:
            continue
        matching_NB.append(dt_rows)

    if '_wrapper' in dataset.columns and 'KNN' in dataset['_wrapper'].values:
        dt_rows = dataset[dataset['_wrapper'] == 'KNN']
        if not dataset[dataset['fs_method'] == 'PCA'].empty:
            continue
        matching_KNN.append(dt_rows)

DT = pd.concat(matching_DT, ignore_index=True)
SVM = pd.concat(matching_SVM, ignore_index=True)
NB = pd.concat(matching_NB, ignore_index=True)
KNN = pd.concat(matching_KNN, ignore_index=True)

In [None]:
new_folder_name = 'box_plots'
current_folder = os.getcwd()
new_folder_path = os.path.join(current_folder, new_folder_name)
if not os.path.exists(new_folder_path):
    os.makedirs(new_folder_path)

In [None]:
def plot_boxplot_and_save(data, title, folder_path):
    FONT_SIZE = 20
    order = data.groupby('fs_method')['score_mean'].median().sort_values().index 
    plt.figure(figsize=(8.5, 4.25))
    sns.boxplot(data=data, x='fs_method', y='score_mean', hue='_wrapper', palette='Set2', showfliers=False, order=order)
    plt.xlabel('')
    plt.ylabel('Accuracy (%)', fontsize=FONT_SIZE)
    plt.title(title, fontsize=FONT_SIZE)
    plt.xticks(rotation=45, ha='right')
    plt.legend(title='', loc='upper right').set_visible(False)
    plt.tight_layout()
    plt.ylim(0, 100)
    
    # Save the plot in the specified folder as PNG and EPS
    plot_filename_png = f"{title}_boxplot.png"
    plot_filename_eps = f"{title}_boxplot.eps"
    
    plot_filepath_png = os.path.join(folder_path, plot_filename_png)
    plot_filepath_eps = os.path.join(folder_path, plot_filename_eps)
    
    plt.savefig(plot_filepath_png, format='png', dpi=300)
    plt.savefig(plot_filepath_eps, format='eps')
    
    # Display the plot
    plt.show()


In [None]:
plot_boxplot_and_save(DT, 'DT', new_folder_path)
plot_boxplot_and_save(SVM, 'SVM', new_folder_path)
plot_boxplot_and_save(NB, 'NB', new_folder_path)
plot_boxplot_and_save(KNN, 'KNN', new_folder_path)