In [1]:
# Libraries
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [50]:
# Datasets
datasets = ['aids', 'students', 'malware']

# Machine Learning models
models = ['LogisticRegression', 'SVC', 'RandomForestClassifier', 'GradientBoostingClassifier','DeepNeuralNetwork']

# Experiment options
subsampling_options = ['False', 'True']
feature_reduction_options = ['False', 'True']

# Inputs
DATASET = datasets[0]
METRIC = 'log_loss'


In [47]:
def get_experiment_name(ss_opt, fr_opt):
	if ss_opt == 'True' and fr_opt == 'True':
		return 'SS & FR'
	elif ss_opt == 'True':
		return 'SS'
	elif fr_opt == 'True':
		return 'FR'
	else:
		return 'Full'

Read CSVs

In [52]:
results = {}
for model in models:
    for ss_opt in subsampling_options:
        for fr_opt in feature_reduction_options:
            filename = f'{DATASET}_{ss_opt}_{fr_opt}_{model}'
            path = f'../results/{filename}.csv'
            df = pd.read_csv(path)
            results[filename] = df[METRIC].transpose()


Plot

In [55]:
n_experiments = len(results[filename])

fig = go.Figure()

for ss_opt in subsampling_options:
    for fr_opt in feature_reduction_options:
        x, y = [], []
        for model in models:
            filename = f'{DATASET}_{ss_opt}_{fr_opt}_{model}'
            x.extend([model for _ in range(n_experiments)])
            y.extend(list(results[filename]))
        fig.add_trace(go.Box(y=y, x=x, name=get_experiment_name(ss_opt, fr_opt)))
        

fig.update_layout(
    title=METRIC,
    yaxis_title=METRIC,
    boxmode='group'
)
fig.show()