In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from datetime import datetime
from IPython.display import clear_output
from itertools import product
from src.data.handlers import DataLoader
from src.models.regression import MeanSquaredErrorWithSoftConstraint, NeuralNet
from src.utils import denorm

In [2]:
def convert_K_to_XY(outputs, inputs):
    K = outputs[:, :-1]
    V = outputs[:, -1:]
    Z = inputs[:, :-2]
    L = 1 - V
    X = Z / (L + V * K)
    Y = K * X
    return X, Y


def train_model(datasets, minmax, log=False, **kwargs):
    hidden_units = kwargs["hidden_units"]

    activation = "relu"
    batch_size = 32
    epochs = 200
    lr = 0.001
    lambda_ = kwargs["lambda"]

    # 10% das épocas com a restrição

    train_log = "Epoch: {:04d}, train loss: {:.5f}, valid loss: {:.5f}"
    # summ_xi_hat = np.zeros((10,))
    # summ_yi_hat = np.zeros((10,))

    for i, (train, valid, minmax_vals) in enumerate(zip(datasets["train"], datasets["valid"], minmax)):
        min_vals, max_vals = minmax_vals
        min_vals = tf.convert_to_tensor(min_vals, dtype=tf.float32)
        max_vals = tf.convert_to_tensor(max_vals, dtype=tf.float32)
        x_train = tf.convert_to_tensor(train["features"], dtype=tf.float32)
        y_train = tf.convert_to_tensor(train["targets"], dtype=tf.float32)
        x_valid = tf.convert_to_tensor(valid["features"], dtype=tf.float32)
        y_valid = tf.convert_to_tensor(valid["targets"], dtype=tf.float32)

        train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)

        model = NeuralNet(hidden_units, activation)
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        loss_func = MeanSquaredErrorWithSoftConstraint(lambda_=lambda_)

        train_losses, valid_losses = [], []
        for epoch in range(epochs):
            for x_batch_train, y_batch_train in train_dataset:

                # Record operations
                with tf.GradientTape() as tape:
                    y_hat = model(x_batch_train, training=True)
                    loss_val = loss_func(y_batch_train, y_hat, x_batch_train, min_vals, max_vals)

                # Grads dloss/dwij
                grads = tape.gradient(loss_val, model.trainable_weights)

                # Optimizer using grads
                optimizer.apply_gradients(zip(grads, model.trainable_weights))

                # Validation loss
                loss_val = loss_func(y_batch_train, y_hat, x_batch_train, min_vals, max_vals)

            y_hat_train = model(x_train)
            y_hat_valid = model(x_valid)
            train_loss = loss_func(y_train, y_hat_train, x_train, min_vals, max_vals)
            valid_loss = loss_func(y_valid, y_hat_valid, x_valid, min_vals, max_vals)

            train_losses.append(float(train_loss))
            valid_losses.append(float(valid_loss))

            if log and (epoch + 1) % 100 == 0:
                print(train_log.format(epoch + 1, float(train_loss), float(valid_loss)))
        break

    y_hat_valid = model(x_valid)

    y_pred = denorm(y_hat_valid, min_vals, max_vals)
    xi_pred, yi_pred = convert_K_to_XY(y_pred, x_valid)

    # summ_xi_hat[i] = xi_pred.numpy().sum(axis=-1).mean(axis=-1)
    # summ_yi_hat[i] = yi_pred.numpy().sum(axis=-1).mean(axis=-1)

    return {
        **kwargs,
        "train_losses": train_losses,
        "valid_losses": valid_losses,
        "summ_xi_hat": xi_pred.numpy().sum(axis=-1).mean(),
        "summ_yi_hat": yi_pred.numpy().sum(axis=-1).mean(),
    }

In [None]:
dl = DataLoader()
datasets, minmax = dl.load_cross_validation_datasets(problem="regression", samples_per_composition=30)

params = {"hidden_layers": [3, 4, 5, 6, 7], "hidden_units": [128, 256, 512], "lambda": [0.0, 1e-3, 1e-2, 1e-1]}

results = []
hyperparameters = list(product(*[vals for vals in params.values()]))
xy = np.zeros((len(hyperparameters), 2))
for i, (hidden_units, neurons, lambda_) in enumerate(hyperparameters):
    hparams = {"hidden_units": [neurons for _ in range(hidden_units)], "lambda": lambda_}
    start = datetime.now()
    r = train_model(datasets, minmax, **hparams)
    end = datetime.now()

    results.append(r)
    xy[i, 0] = r["summ_xi_hat"]
    xy[i, 1] = r["summ_yi_hat"]

    print(f"training model: {i+1}/{len(hyperparameters)}, elapsed time: {end - start}")
    clear_output(wait=True)

In [None]:
hyperparameters = pd.DataFrame(hyperparameters).sort_values([2, 0, 1])
sorted_idx = pd.DataFrame(hyperparameters).sort_values([2, 0, 1]).index

losses = np.array([r["valid_losses"][-1] for r in results])

f, axs = plt.subplots(2, 1, figsize=(12, 5), sharex=True)
axs[0].errorbar(np.arange(len(hyperparameters)), xy[sorted_idx, 0], fmt="o-", label="$\sum \widehat{x_i}$")
axs[0].errorbar(np.arange(len(hyperparameters)), xy[sorted_idx, 1], fmt="o-", label="$\sum \widehat{y_i}$")
axs[0].axhline(1.0, ls="--")
axs[0].axvspan(11.5, 23.5, alpha=0.2)

# axs[0].set_ylim(bottom=0.15)
axs[0].text(-1.2, 0.94, "$\lambda = 0.0$", fontsize=14)
axs[0].text(11.8, 0.94, "$\lambda = 0.1$", fontsize=14)
axs[0].text(23.8, 0.94, "$\lambda = 100.0$", fontsize=14)
# axs[0].set_xticks(np.arange(len(hyperparameters)), hyperparameters, rotation="vertical")
axs[0].legend()

axs[1].plot(losses[sorted_idx], "o-", label="loss")
axs[1].set_xticks(np.arange(len(hyperparameters)), hyperparameters.to_records(index=False), rotation="vertical")
axs[1].axvspan(11.5, 23.5, alpha=0.2)
axs[1].legend()

plt.subplots_adjust(hspace=0.1)
plt.show()

In [None]:
model_idx = -3
train_losses, valid_losses = results[model_idx]["train_losses"],results[model_idx]["valid_losses"]

import matplotlib.pyplot as plt
import numpy as np

f, ax = plt.subplots(figsize=(6, 4))
ax.plot(np.arange(len(train_losses)), train_losses, label="train")
ax.plot(np.arange(len(valid_losses)), valid_losses, label="valid")
ax.grid()

ax.legend()
plt.show()