# Deep Learning Classifier

In [1]:
import pandas as pd

train = pd.read_csv('../train.csv')
test = pd.read_csv('../test.csv')

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input
import optuna

2024-05-08 15:52:04.729820: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# Escalar características
scaler = MinMaxScaler()
X_train = scaler.fit_transform(train.drop('nobeyesdad', axis=1))
X_test = scaler.transform(test.drop('nobeyesdad', axis=1))

# Codificar la variable objetivo
y_train = to_categorical(train['nobeyesdad'].astype('category').cat.codes)
y_test = to_categorical(test['nobeyesdad'].astype('category').cat.codes)

In [4]:
from tensorflow.keras.layers import InputLayer

def create_model(trial):
    n_layers = trial.suggest_int("n_layers", 1, 20)
    model = Sequential()
    model.add(InputLayer(input_shape=(X_train.shape[1],)))  # Define solo la forma de entrada
    for i in range(n_layers):
        num_units = trial.suggest_int(f"n_units_l{i}", 5, 200)
        activation = trial.suggest_categorical(f"activation_l{i}", ["relu", "tanh", "sigmoid"])
        dropout = trial.suggest_float(f"dropout_l{i}", 0.1, 0.5)
        model.add(Dense(num_units, activation=activation))
        model.add(Dropout(dropout))
    model.add(Dense(y_train.shape[1], activation="softmax"))
    return model

def objective(trial):
    model = create_model(trial)
    optimizer_options = trial.suggest_categorical("optimizer", ["adam", "sgd", "rmsprop"])
    epochs = trial.suggest_int("epochs", 10, 100)  # Número de épocas
    batch_size = trial.suggest_int("batch_size", 16, 128)  # Tamaño del lote

    model.compile(optimizer=optimizer_options,
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    
    model.fit(X_train, y_train, verbose=0, epochs=epochs, batch_size=batch_size, validation_split=0.2)
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

In [5]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=200)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2024-05-08 15:52:28,644] A new study created in memory with name: no-name-415cd868-d990-4e00-81a0-a3f0302994b4
[I 2024-05-08 15:52:38,230] Trial 0 finished with value: 0.7967289686203003 and parameters: {'n_layers': 5, 'n_units_l0': 86, 'activation_l0': 'relu', 'dropout_l0': 0.40740638356106107, 'n_units_l1': 10, 'activation_l1': 'sigmoid', 'dropout_l1': 0.4873714808319738, 'n_units_l2': 140, 'activation_l2': 'tanh', 'dropout_l2': 0.44311471028566995, 'n_units_l3': 86, 'activation_l3': 'tanh', 'dropout_l3': 0.3750824940585862, 'n_units_l4': 148, 'activation_l4': 'sigmoid', 'dropout_l4': 0.155617946715346, 'optimizer': 'adam', 'epochs': 55, 'batch_size': 20}. Best is trial 0 with value: 0.7967289686203003.
[I 2024-05-08 15:52:46,968] Trial 1 finished with value: 0.17056074738502502 and parameters: {'n_layers': 11, 'n_units_l0': 7, 'activation_l0': 'tanh', 'dropout_l0': 0.24779985951587352, 'n_units_l1': 27, 'activation_l1': 'sigmoid', 'dropout_l1': 0.18399132431600462, 'n_units_l2': 

Best trial:
  Value: 0.9649532437324524
  Params: 
    n_layers: 3
    n_units_l0: 104
    activation_l0: relu
    dropout_l0: 0.12370056120386413
    n_units_l1: 141
    activation_l1: relu
    dropout_l1: 0.12914085872761777
    n_units_l2: 75
    activation_l2: sigmoid
    dropout_l2: 0.17797692797438575
    optimizer: adam
    epochs: 98
    batch_size: 52


In [6]:
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Usar el mejor modelo para predecir las etiquetas del conjunto de test
best_model = create_model(study.best_trial)
best_model.compile(optimizer=study.best_trial.params["optimizer"],
                   loss="categorical_crossentropy",
                   metrics=["accuracy"])
best_model.fit(X_train, y_train, verbose=0, epochs=study.best_trial.params["epochs"],
               batch_size=study.best_trial.params["batch_size"], validation_split=0.2)

# Realizar predicciones
predictions = best_model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Obtener etiquetas verdaderas del conjunto de test
true_labels = np.argmax(y_test, axis=1)

# Calcular y mostrar las métricas
print("Accuracy:", accuracy_score(true_labels, predicted_labels))
print("Confusion Matrix:\n", confusion_matrix(true_labels, predicted_labels))
print("Classification Report:\n", classification_report(true_labels, predicted_labels))

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Accuracy: 0.9462616822429907
Confusion Matrix:
 [[63  3  0  0  0  0  0]
 [ 3 52  0  0  0  6  0]
 [ 0  0 71  0  0  0  2]
 [ 0  0  2 61  0  0  0]
 [ 0  0  0  0 54  0  0]
 [ 0  0  0  0  0 51  0]
 [ 0  0  1  0  0  6 53]]
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.95      0.95        66
           1       0.95      0.85      0.90        61
           2       0.96      0.97      0.97        73
           3       1.00      0.97      0.98        63
           4       1.00      1.00      1.00        54
           5       0.81      1.00      0.89        51
           6       0.96      0.88      0.92        60

    accuracy                           0.95       428
   macro avg       0.95      0.95      0.95       428
weighted avg       0.95      0.95      0.95       428

