In [1]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


def load_dataframes(dir="datasets/bests/"):
    datasets = os.listdir(dir)
    datasets = sorted(
        [d for d in datasets if d.endswith(".csv") and d.startswith("CNN")]
    )

    dfs = []
    for dataset in datasets:
        df = pd.read_csv(dir + dataset, encoding="utf-8")
        df.head()
        dfs.append({"file": dataset, "dataframe": df})

    return dfs


def split_data_from_dataframe(df):
    df = df["dataframe"]
    df_without_class = df.drop(["classe"], axis=1)
    X = df_without_class
    y = df.classe

    # Standardize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X, y


def split_data_to_train_and_test(X, y):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )

    return X_train, X_test, y_train, y_test

In [2]:
from sklearn.neural_network import MLPClassifier

mlp_1 = MLPClassifier(
    hidden_layer_sizes=(258, 128),
    activation="tanh",
    solver="adam",
    learning_rate=0.0001,
    max_iter=500,
)
mlp_2 = MLPClassifier(
    hidden_layer_sizes=516,
    activation="relu",
    solver="adam",
    learning_rate=0.0001,
    max_iter=500,
)
mlp_3 = MLPClassifier(
    hidden_layer_sizes=(258, 128),
    activation="logistic",
    solver="sgd",
    learning_rate=0.01,
    max_iter=500,
)
mlp_4 = MLPClassifier(
    hidden_layer_sizes=516,
    activation="relu",
    solver="adam",
    learning_rate=0.0001,
    max_iter=500,
)
mlp_5 = MLPClassifier(
    hidden_layer_sizes=258,
    activation="identity",
    solver="sgd",
    learning_rate=0.0001,
    max_iter=1000,
)
mlp_6 = MLPClassifier(
    hidden_layer_sizes=(258, 128),
    activation="logistic",
    solver="sgd",
    learning_rate=0.1,
    max_iter=500,
)
mlp_7 = MLPClassifier(
    hidden_layer_sizes=516,
    activation="logistic",
    solver="sgd",
    learning_rate=0.01,
    max_iter=500,
)
mlp_8 = MLPClassifier(
    hidden_layer_sizes=(258, 128),
    activation="logistic",
    solver="sgd",
    learning_rate=0.1,
    max_iter=500,
)
mlp_9 = MLPClassifier(
    hidden_layer_sizes=516,
    activation="relu",
    solver="sgd",
    learning_rate=0.01,
    max_iter=500,
)
mlp_10 = MLPClassifier(
    hidden_layer_sizes=258,
    activation="relu",
    solver="sgd",
    learning_rate=0.1,
    max_iter=500,
)

mlps = [mlp_1, mlp_2, mlp_3, mlp_4, mlp_5, mlp_6, mlp_7, mlp_8, mlp_9, mlp_10]

In [3]:
from sklearn import metrics

dfs = load_dataframes()

for df in dfs:
    dataset_name = df["file"].replace(".csv", "")
    print(dataset_name)
    X, y = split_data_from_dataframe(df)
    X_train_70, X_test_30, y_train_70, y_test_30 = split_data_to_train_and_test(X, y)

    results = []
    for mlp in mlps:
        mlp.fit(X_train_70, y_train_70)
        y_pred = mlp.predict(X_test_30)
        acuracia = metrics.accuracy_score(y_test_30, y_pred)
        results.append(f"{acuracia:.3f}".replace(".", ","))

    results_df = pd.DataFrame(
        [results],
        columns=[
            "Config. 1",
            "Config. 2",
            "Config. 3",
            "Config. 4",
            "Config. 5",
            "Config. 6",
            "Config. 7",
            "Config. 8",
            "Config. 9",
            "Config. 10",
        ],
    )
    results_df.to_csv(f"results/MLP_holdout_{dataset_name}.csv", index=False)
    print(results_df)

CNN_16_128_AVG
  Config. 1 Config. 2 Config. 3 Config. 4 Config. 5 Config. 6 Config. 7  \
0     0,854     0,838     0,850     0,858     0,829     0,871     0,863   

  Config. 8 Config. 9 Config. 10 Config. 11 Config. 12  
0     0,858     0,867      0,854      0,846      0,867  
CNN_16_128_MAX
  Config. 1 Config. 2 Config. 3 Config. 4 Config. 5 Config. 6 Config. 7  \
0     0,842     0,825     0,871     0,858     0,817     0,871     0,858   

  Config. 8 Config. 9 Config. 10 Config. 11 Config. 12  
0     0,867     0,854      0,863      0,850      0,854  
CNN_16_256_AVG
  Config. 1 Config. 2 Config. 3 Config. 4 Config. 5 Config. 6 Config. 7  \
0     0,912     0,933     0,938     0,921     0,904     0,942     0,933   

  Config. 8 Config. 9 Config. 10 Config. 11 Config. 12  
0     0,933     0,938      0,917      0,950      0,938  
CNN_16_256_MAX
  Config. 1 Config. 2 Config. 3 Config. 4 Config. 5 Config. 6 Config. 7  \
0     0,904     0,879     0,908     0,896     0,908     0,925     0,91