# Deep Learning Classifier

In [9]:
import pandas as pd

train = pd.read_csv('../train.csv')
test = pd.read_csv('../test.csv')

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input
import optuna

In [11]:
train['height'] = train['height'] / 100 #pasar_height a cm
test['height'] = test['height'] / 100 #pasar_height a cm

# Ahora calcula el BMI:
train['bmi'] = train['weight'] / (train['height']**2)
test['bmi'] = test['weight'] / (test['height']**2)

In [12]:
# Escalar características
scaler = MinMaxScaler()
X_train = scaler.fit_transform(train.drop('nobeyesdad', axis=1))
X_test = scaler.transform(test.drop('nobeyesdad', axis=1))

# Codificar la variable objetivo
y_train = to_categorical(train['nobeyesdad'].astype('category').cat.codes)
y_test = to_categorical(test['nobeyesdad'].astype('category').cat.codes)

In [13]:
from tensorflow.keras.layers import InputLayer

def create_model(trial):
    n_layers = trial.suggest_int("n_layers", 1, 20)
    model = Sequential()
    model.add(InputLayer(input_shape=(X_train.shape[1],)))  # Define solo la forma de entrada
    for i in range(n_layers):
        num_units = trial.suggest_int(f"n_units_l{i}", 5, 200)
        activation = trial.suggest_categorical(f"activation_l{i}", ["relu", "tanh", "sigmoid"])
        dropout = trial.suggest_float(f"dropout_l{i}", 0.1, 0.5)
        model.add(Dense(num_units, activation=activation))
        model.add(Dropout(dropout))
    model.add(Dense(y_train.shape[1], activation="softmax"))
    return model

def objective(trial):
    model = create_model(trial)
    optimizer_options = trial.suggest_categorical("optimizer", ["adam", "sgd", "rmsprop"])
    epochs = trial.suggest_int("epochs", 10, 100)  # Número de épocas
    batch_size = trial.suggest_int("batch_size", 16, 128)  # Tamaño del lote

    model.compile(optimizer=optimizer_options,
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    
    model.fit(X_train, y_train, verbose=0, epochs=epochs, batch_size=batch_size, validation_split=0.2)
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

In [14]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=200)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2024-06-01 13:00:47,506] A new study created in memory with name: no-name-cfc7f64a-0c1b-4327-b8c2-6f89e9db1b93
[I 2024-06-01 13:00:53,473] Trial 0 finished with value: 0.21962617337703705 and parameters: {'n_layers': 4, 'n_units_l0': 40, 'activation_l0': 'relu', 'dropout_l0': 0.24155776764766537, 'n_units_l1': 97, 'activation_l1': 'tanh', 'dropout_l1': 0.1264340857510159, 'n_units_l2': 93, 'activation_l2': 'sigmoid', 'dropout_l2': 0.24739336889133112, 'n_units_l3': 55, 'activation_l3': 'relu', 'dropout_l3': 0.1420851910724497, 'optimizer': 'sgd', 'epochs': 90, 'batch_size': 108}. Best is trial 0 with value: 0.21962617337703705.
[I 2024-06-01 13:01:03,907] Trial 1 finished with value: 0.17056074738502502 and parameters: {'n_layers': 19, 'n_units_l0': 94, 'activation_l0': 'sigmoid', 'dropout_l0': 0.4540031890236138, 'n_units_l1': 76, 'activation_l1': 'relu', 'dropout_l1': 0.13651817161751167, 'n_units_l2': 36, 'activation_l2': 'sigmoid', 'dropout_l2': 0.12303243730099137, 'n_units_l3'

Best trial:
  Value: 0.9696261882781982
  Params: 
    n_layers: 2
    n_units_l0: 186
    activation_l0: tanh
    dropout_l0: 0.47815409456046987
    n_units_l1: 192
    activation_l1: tanh
    dropout_l1: 0.23242473851478274
    optimizer: rmsprop
    epochs: 98
    batch_size: 38


In [16]:
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Usar el mejor modelo para predecir las etiquetas del conjunto de test
best_model = create_model(study.best_trial)
best_model.compile(optimizer=study.best_trial.params["optimizer"],
                   loss="categorical_crossentropy",
                   metrics=["accuracy"])
best_model.fit(X_train, y_train, verbose=0, epochs=study.best_trial.params["epochs"],
               batch_size=study.best_trial.params["batch_size"], validation_split=0.2)

# Realizar predicciones
predictions = best_model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Obtener etiquetas verdaderas del conjunto de test
true_labels = np.argmax(y_test, axis=1)

# Calcular y mostrar las métricas
print("Accuracy:", accuracy_score(true_labels, predicted_labels))
print("Confusion Matrix:\n", confusion_matrix(true_labels, predicted_labels))
print("Classification Report:\n", classification_report(true_labels, predicted_labels))



[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Accuracy: 0.9462616822429907
Confusion Matrix:
 [[64  2  0  0  0  0  0]
 [ 5 51  0  0  0  5  0]
 [ 0  0 69  4  0  0  0]
 [ 0  0  0 63  0  0  0]
 [ 0  0  0  0 54  0  0]
 [ 0  0  0  0  0 50  1]
 [ 0  0  2  0  0  4 54]]
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.97      0.95        66
           1       0.96      0.84      0.89        61
           2       0.97      0.95      0.96        73
           3       0.94      1.00      0.97        63
           4       1.00      1.00      1.00        54
           5       0.85      0.98      0.91        51
           6       0.98      0.90      0.94        60

    accuracy                           0.95       428
   macro avg       0.95      0.95      0.95       428
weighted avg       0.95      0.95      0.95       428

