# Optimización de redes neuronales con Adam estocástico

In [14]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

## Consideramos un conjunto de datos con 1000 muestras (Instances) de 5 features y un label

In [15]:
X, y = make_classification(n_samples=1000, n_features=5, 
                           n_informative=2, n_redundant=1, random_state=1)


## Construimos la red neuronal tipo Perceptrón

In [16]:
def sigma(s):
    if s >= 0:
        return 1
    else:
        return 0

In [17]:
def activate(row, weights):
    activation = weights[-1]
    for i in range(len(row)):
        activation += weights[i] * row[i]
    return activation

def predict_row(row, weights):
    activation = activate(row, weights)
    return sigma(activation)

In [18]:
def predict_dataset(X, weights):
    yhats = []
    for row in X:
        yhat = predict_row(row, weights)
        yhats.append(yhat)
    return yhats

In [19]:
n_weights = X.shape[1] + 1
weights = np.random.rand(n_weights)
yhats = predict_dataset(X, weights)
score = accuracy_score(y, yhats)
print(f"Precisicón (Jaccard metrics) del modelo: {score}")

mse = (1/X.shape[0]) * np.linalg.norm(y - yhats) ** 2
print(f"Precisión (error cuadrático medio) del modelo: {mse}")

Precisicón (Jaccard metrics) del modelo: 0.666
Precisión (error cuadrático medio) del modelo: 0.3340000000000001


## Es hora de optimizar (training process)

In [20]:
def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

random_batch(X, y)

(array([[ 0.85283342,  0.97680326, -2.29980571, -1.07105259,  2.72587341],
        [ 0.65601518,  0.75226986, -0.80441364, -1.49485955,  1.7467252 ],
        [-0.85388107,  0.0327583 , -1.210862  ,  0.24224614,  0.864312  ],
        [-1.05060936,  0.70009952, -0.02585751, -1.20942325,  0.87856393],
        [ 2.13640549,  2.14640218,  0.62083926, -0.71464532, -0.02503918],
        [-1.41245885,  0.75116708, -0.26538431,  0.2473977 ,  0.0518357 ],
        [-0.58466839,  0.59794564,  2.80418983,  0.23882885, -2.56802641],
        [-0.0441608 , -0.87724686, -1.83910137, -0.54948664,  1.9624116 ],
        [ 0.54311372,  1.59055673, -1.44675634,  1.40807869,  0.24053746],
        [ 1.23166934,  1.07511569, -0.59402708,  1.18428417, -0.33046892],
        [-0.89094654,  0.11289115,  1.33174619, -0.29216092, -0.93237809],
        [ 1.89128734,  1.81677792,  0.00386983, -0.14695989,  0.10075797],
        [-1.2671073 ,  0.79244077,  1.3736892 , -1.05242305, -0.42988544],
        [ 0.36087787,  0.

In [21]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [22]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.activations import hard_sigmoid
from tensorflow.keras.regularizers import l2

In [23]:
model = Sequential()
model.add(Dense(units=1, input_shape=(5, ), activation=hard_sigmoid, 
                kernel_initializer='glorot_uniform', kernel_regularizer=l2(0.05))
          )

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
from tensorflow import keras

n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Adam(learning_rate=0.01)
loss_fun = keras.losses.MeanSquaredError()
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

In [25]:
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result()) 
                          for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics, end=end)

In [26]:
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train, batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fun(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_state()
        

Epoch 1/5
800/800 - mean: 0.3361 - mean_absolute_error: 0.4851
800/800 - mean: 0.3361 - mean_absolute_error: 0.4851
Epoch 2/5
800/800 - mean: 0.2358 - mean_absolute_error: 0.4220
800/800 - mean: 0.2358 - mean_absolute_error: 0.4220
Epoch 3/5
800/800 - mean: 0.1975 - mean_absolute_error: 0.3818
800/800 - mean: 0.1975 - mean_absolute_error: 0.3818
Epoch 4/5
800/800 - mean: 0.1866 - mean_absolute_error: 0.3638
800/800 - mean: 0.1866 - mean_absolute_error: 0.3638
Epoch 5/5
800/800 - mean: 0.1726 - mean_absolute_error: 0.3452
800/800 - mean: 0.1726 - mean_absolute_error: 0.3452


In [27]:
print(f"Parámetros del modelo optimizado {model.weights}")

Parámetros del modelo optimizado [<KerasVariable shape=(5, 1), dtype=float32, path=sequential_1/dense_1/kernel>, <KerasVariable shape=(1,), dtype=float32, path=sequential_1/dense_1/bias>]
