In [1]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


def load_dataframes(dir="datasets/bests/"):
    datasets = os.listdir(dir)
    datasets = sorted([d for d in datasets if d.endswith(".csv")])

    dfs = []
    for dataset in datasets:
        df = pd.read_csv(dir + dataset, encoding="utf-8")
        df.head()
        dfs.append({"file": dataset, "dataframe": df})

    return dfs


def split_data_from_dataframe(df):
    df = df["dataframe"]
    df_without_class = df.drop(["classe"], axis=1)
    X = df_without_class
    y = df.classe

    # Standardize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X, y


def split_data_to_train_and_test(X, y):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )

    return X_train, X_test, y_train, y_test

In [9]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
import time

mlp = MLPClassifier(random_state=42, verbose=0)

# 7 = (10 + 4)/2 (PCA fair)
# 258 = (512 + 4)/2
# 516 = 512 + 4

param_grid = {
    "hidden_layer_sizes": [(7), (258), (258, 128), (516)],
    "activation": ["identity", "logistic", "tanh", "relu"],
    "solver": ["sgd", "adam"],
    "learning_rate_init": [0.0001, 0.01, 0.1],
    "max_iter": [500, 1000],
}

grid_search = GridSearchCV(
    mlp, param_grid, scoring="accuracy", cv=3, n_jobs=-1, verbose=0
)

In [11]:
for df in load_dataframes():
    dataset_name = df["file"].replace('.csv', '')
    print(dataset_name)
    X, y = split_data_from_dataframe(df)
    X_train, X_test, y_train, y_test = split_data_to_train_and_test(X, y)

    start = time.time()
    grid_search.fit(X_train, y_train)
    end = time.time()

    print(f"Time: {end - start}")
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best score: {grid_search.best_score_}")

    print("")

CNN_16_128_AVG




Time: 648.2262303829193
Best parameters: {'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
Best score: 0.8637992831541218

CNN_16_128_MAX




Time: 624.8424825668335
Best parameters: {'activation': 'relu', 'hidden_layer_sizes': 258, 'learning_rate_init': 0.1, 'max_iter': 500, 'solver': 'sgd'}
Best score: 0.8691756272401433

CNN_16_256_AVG




Time: 598.2593805789948
Best parameters: {'activation': 'relu', 'hidden_layer_sizes': 516, 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
Best score: 0.9139784946236559

CNN_16_256_MAX




Time: 588.1984164714813
Best parameters: {'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.1, 'max_iter': 500, 'solver': 'sgd'}
Best score: 0.9086021505376344

CNN_19_256_AVG




Time: 591.063994884491
Best parameters: {'activation': 'logistic', 'hidden_layer_sizes': 516, 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
Best score: 0.924731182795699

CNN_19_256_MAX




Time: 575.0118935108185
Best parameters: {'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.1, 'max_iter': 500, 'solver': 'sgd'}
Best score: 0.9265232974910393

PCA_10_CNN_16_128_AVG




Time: 204.33915424346924
Best parameters: {'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
Best score: 0.8566308243727598

PCA_10_CNN_16_128_MAX




Time: 193.88879776000977
Best parameters: {'activation': 'identity', 'hidden_layer_sizes': 258, 'learning_rate_init': 0.0001, 'max_iter': 1000, 'solver': 'sgd'}
Best score: 0.8512544802867383

PCA_10_CNN_16_256_AVG




Time: 185.4173982143402
Best parameters: {'activation': 'relu', 'hidden_layer_sizes': 516, 'learning_rate_init': 0.0001, 'max_iter': 500, 'solver': 'adam'}
Best score: 0.913978494623656

PCA_10_CNN_16_256_MAX




Time: 185.12433981895447
Best parameters: {'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
Best score: 0.9121863799283155

PCA_10_CNN_19_256_AVG




Time: 186.73154616355896
Best parameters: {'activation': 'relu', 'hidden_layer_sizes': 516, 'learning_rate_init': 0.0001, 'max_iter': 500, 'solver': 'adam'}
Best score: 0.9336917562724015

PCA_10_CNN_19_256_MAX




Time: 185.37961435317993
Best parameters: {'activation': 'tanh', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.0001, 'max_iter': 500, 'solver': 'adam'}
Best score: 0.9265232974910393



In [12]:
# print current time with date
from datetime import datetime
print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

15/12/2024 23:23:48


{'activation': 'tanh', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.0001, 'max_iter': 500, 'solver': 'adam'}
{'activation': 'relu', 'hidden_layer_sizes': 516, 'learning_rate_init': 0.0001, 'max_iter': 500, 'solver': 'adam'}
{'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
{'activation': 'relu', 'hidden_layer_sizes': 516, 'learning_rate_init': 0.0001, 'max_iter': 500, 'solver': 'adam'}
{'activation': 'identity', 'hidden_layer_sizes': 258, 'learning_rate_init': 0.0001, 'max_iter': 1000, 'solver': 'sgd'}
{'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
{'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.1, 'max_iter': 500, 'solver': 'sgd'}
{'activation': 'logistic', 'hidden_layer_sizes': 516, 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
{'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.1, 'max_iter': 500, 'solver': 'sgd'}
{'activation': 'relu', 'hidden_layer_sizes': 516, 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}
{'activation': 'relu', 'hidden_layer_sizes': 258, 'learning_rate_init': 0.1, 'max_iter': 500, 'solver': 'sgd'}
{'activation': 'logistic', 'hidden_layer_sizes': (258, 128), 'learning_rate_init': 0.01, 'max_iter': 500, 'solver': 'sgd'}



In [1]:
mlp_1 = MLPClassifier(
    hidden_layer_sizes=(258, 128),
    activation="tanh",
    solver="adam",
    learning_rate=0.0001,
    max_iter=500,
)
mlp_2 = MLPClassifier(
    hidden_layer_sizes=516,
    activation="relu",
    solver="adam",
    learning_rate=0.0001,
    max_iter=500,
)
mlp_3 = MLPClassifier(
    hidden_layer_sizes=(258, 128),
    activation="logistic",
    solver="sgd",
    learning_rate=0.01,
    max_iter=500,
)
mlp_4 = MLPClassifier(
    hidden_layer_sizes=516,
    activation="relu",
    solver="adam",
    learning_rate=0.0001,
    max_iter=500,
)
mlp_5 = MLPClassifier(
    hidden_layer_sizes=258,
    activation="identity",
    solver="sgd",
    learning_rate=0.0001,
    max_iter=1000,
)
mlp_6 = MLPClassifier(
    hidden_layer_sizes=(258, 128),
    activation="logistic",
    solver="sgd",
    learning_rate=0.1,
    max_iter=500,
)
mlp_7 = MLPClassifier(
    hidden_layer_sizes=516,
    activation="logistic",
    solver="sgd",
    learning_rate=0.01,
    max_iter=500,
)
mlp_8 = MLPClassifier(
    hidden_layer_sizes=(258, 128),
    activation="logistic",
    solver="sgd",
    learning_rate=0.1,
    max_iter=500,
)
mlp_9 = MLPClassifier(
    hidden_layer_sizes=516,
    activation="relu",
    solver="sgd",
    learning_rate=0.01,
    max_iter=500,
)
mlp_10 = MLPClassifier(
    hidden_layer_sizes=258,
    activation="relu",
    solver="sgd",
    learning_rate=0.1,
    max_iter=500,
)

mlps = [mlp_1, mlp_2, mlp_3, mlp_4, mlp_5, mlp_6, mlp_7, mlp_8, mlp_9, mlp_10]

NameError: name 'MLPClassifier' is not defined