In [1]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras import Model
from tensorflow.keras.datasets import mnist
import tensorflow.keras
import tensorflow as tf
import tqdm

In [2]:
%%capture
!pip install wandb

In [20]:
import wandb
from wandb.keras import WandbCallback

wandb.login()

True

In [4]:
#cargamos el dataset y lo partimos en entranamiento y prueba
(x_entrenamiento, y_entrenamiento), (x_prueba, y_prueba) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
#calculamos el numero de clases, deben de ser 10
num_clases = len(np.unique(y_entrenamiento))

In [6]:
#convertimos la variable de las categorias a un vector One Hot
y_entrenamiento = to_categorical(y_entrenamiento)
y_prueba = to_categorical(y_prueba)

In [7]:
# dimensiones de la imagen de entrada
tamanio_imagen = x_entrenamiento.shape[1]

In [8]:
#Hacemos un reescalamiento y normalizamos
x_entrenamiento = np.reshape(x_entrenamiento,[-1, tamanio_imagen, tamanio_imagen, 1])
x_prueba = np.reshape(x_prueba,[-1, tamanio_imagen, tamanio_imagen, 1])
x_entrenamiento = x_entrenamiento.astype('float32') / 255
x_prueba = x_prueba.astype('float32') / 255

In [None]:
#define el modelo

In [11]:
def cnn_model():
    #parametros del modelo
    input_shape = (tamanio_imagen, tamanio_imagen, 1)
    #batch_size  = 128
    kernel_size = 3
    pool_size   = 2
    filters     = 64
    dropout     = 0.2

    inputs = tensorflow.keras.Input(shape=input_shape, name="digits")
    
    x1 = Conv2D(filters=filters,
                kernel_size=kernel_size,
                activation='relu')(inputs)
    
    x2 = MaxPooling2D(pool_size=pool_size)(x1)

    x3 = Conv2D(filters=filters,
                kernel_size=kernel_size,
                activation='relu')(x2)
    
    x4 = MaxPooling2D(pool_size=pool_size)(x3)

    x5 = Conv2D(filters=filters,
                kernel_size=kernel_size,
                activation='relu')(x4)

    x6 = Flatten()(x5)

    x7 = Dropout(dropout)(x6)

    x8 = Dense(num_clases)(x7)

    outputs = Activation('softmax')(x8)

    return Model(inputs=inputs, outputs=outputs)

In [13]:
def train_step(x, y, model, optimizer, loss_fn, train_acc_metric):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)

    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    train_acc_metric.update_state(y, logits)

    return loss_value

In [14]:
def test_step(x, y, model, loss_fn, val_acc_metric):
    val_logits = model(x, training=False)
    loss_value = loss_fn(y, val_logits)
    val_acc_metric.update_state(y, val_logits)

    return loss_value

In [15]:
def train(train_dataset,val_dataset, model,optimizer,loss_fn,train_acc_metric,val_acc_metric,
          epochs=10, 
          log_step=200, 
          val_log_step=50):
  
    for epoch in range(epochs):
        print("\nStart of epoch %d" % (epoch,))

        train_loss = []   
        val_loss = []

        # Iterate over the batches of the dataset
        for step, (x_batch_train, y_batch_train) in tqdm.tqdm(enumerate(train_dataset), total=len(train_dataset)):
            loss_value = train_step(x_batch_train, y_batch_train, 
                                    model, optimizer, 
                                    loss_fn, train_acc_metric)
            train_loss.append(float(loss_value))

        # Run a validation loop at the end of each epoch
        for step, (x_batch_val, y_batch_val) in enumerate(val_dataset):
            val_loss_value = test_step(x_batch_val, y_batch_val, 
                                       model, loss_fn, 
                                       val_acc_metric)
            val_loss.append(float(val_loss_value))
            
        # Display metrics at the end of each epoch
        train_acc = train_acc_metric.result()
        print("Training acc over epoch: %.4f" % (float(train_acc),))

        val_acc = val_acc_metric.result()
        print("Validation acc: %.4f" % (float(val_acc),))

        # Reset metrics at the end of each epoch
        train_acc_metric.reset_states()
        val_acc_metric.reset_states()

        # 3️⃣ log metrics using wandb.log
        wandb.log({'epochs': epoch,
                   'loss': np.mean(train_loss),
                   'acc': float(train_acc), 
                   'val_loss': np.mean(val_loss),
                   'val_acc':float(val_acc)})

In [16]:
sweep_config = {
  'method': 'random', 
  'metric': {
      'name': 'val_loss',
      'goal': 'minimize'
  },
  'early_terminate':{
      'type': 'hyperband',
      'min_iter': 5
  },
  'parameters': {
      'batch_size': {
          'values': [32, 64, 128, 256]
      },
      'learning_rate':{
          'values': [0.01, 0.005, 0.001, 0.0005, 0.0001]
      }
  }
}

In [17]:
def sweep_train(config_defaults=None):
    # Set default values
    config_defaults = {
        "batch_size": 64,
        "learning_rate": 0.01
    }
    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults)  # this gets over-written in the Sweep

    # Specify the other hyperparameters to the configuration, if any
    wandb.config.epochs = 2
    wandb.config.log_step = 20
    wandb.config.val_log_step = 50
    wandb.config.architecture_name = "MLP"
    wandb.config.dataset_name = "MNIST"

    # build input pipeline using tf.data
    train_dataset = tf.data.Dataset.from_tensor_slices((x_entrenamiento, y_entrenamiento))
    train_dataset = (train_dataset.shuffle(buffer_size=1024)
                                  .batch(wandb.config.batch_size)
                                  .prefetch(buffer_size=tf.data.AUTOTUNE))

    val_dataset = tf.data.Dataset.from_tensor_slices((x_prueba, y_prueba))
    val_dataset = (val_dataset.batch(wandb.config.batch_size)
                              .prefetch(buffer_size=tf.data.AUTOTUNE))

    # initialize model
    model = cnn_model()

    # Instantiate an optimizer to train the model.
    optimizer = tensorflow.keras.optimizers.SGD(learning_rate=wandb.config.learning_rate)
    # Instantiate a loss function.
    loss_fn = tensorflow.keras.losses.CategoricalCrossentropy(from_logits=True)

    # Prepare the metrics.
    train_acc_metric = tensorflow.keras.metrics.CategoricalAccuracy()
    val_acc_metric   = tensorflow.keras.metrics.CategoricalAccuracy()

    train(train_dataset,
          val_dataset, 
          model,
          optimizer,
          loss_fn,
          train_acc_metric,
          val_acc_metric,
          epochs=wandb.config.epochs, 
          log_step=wandb.config.log_step, 
          val_log_step=wandb.config.val_log_step)

In [18]:
sweep_id = wandb.sweep(sweep_config, project="sweeps-tensorflow")

Create sweep with ID: w57jat9c
Sweep URL: https://wandb.ai/caoba/sweeps-tensorflow/sweeps/w57jat9c


In [19]:
wandb.agent(sweep_id, function=sweep_train, count=10)

[34m[1mwandb[0m: Agent Starting Run: hchr900o with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: Currently logged in as: [33mcaoba[0m (use `wandb login --relogin` to force relogin)



Start of epoch 0


  return dispatch_target(*args, **kwargs)
100%|██████████| 469/469 [01:28<00:00,  5.30it/s]


Training acc over epoch: 0.1184
Validation acc: 0.1579

Start of epoch 1


100%|██████████| 469/469 [01:29<00:00,  5.22it/s]


Training acc over epoch: 0.1874
Validation acc: 0.2503


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.18742
epochs,1.0
loss,2.27137
val_acc,0.2503
val_loss,2.26101


[34m[1mwandb[0m: Agent Starting Run: ap7ztwtt with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001



Start of epoch 0


100%|██████████| 1875/1875 [01:58<00:00, 15.79it/s]


Training acc over epoch: 0.1077
Validation acc: 0.1380

Start of epoch 1


100%|██████████| 1875/1875 [01:56<00:00, 16.13it/s]


Training acc over epoch: 0.1766
Validation acc: 0.2742


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.1766
epochs,1.0
loss,2.28823
val_acc,0.2742
val_loss,2.28024


[34m[1mwandb[0m: Agent Starting Run: d0p6akv2 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001



Start of epoch 0


100%|██████████| 235/235 [01:21<00:00,  2.87it/s]


Training acc over epoch: 0.0860
Validation acc: 0.0821

Start of epoch 1


100%|██████████| 235/235 [01:22<00:00,  2.86it/s]


Training acc over epoch: 0.0903
Validation acc: 0.0916


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.09027
epochs,1.0
loss,2.305
val_acc,0.0916
val_loss,2.30413


[34m[1mwandb[0m: Agent Starting Run: 1lc0nolr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001



Start of epoch 0


100%|██████████| 1875/1875 [01:54<00:00, 16.32it/s]


Training acc over epoch: 0.1154
Validation acc: 0.1254

Start of epoch 1


100%|██████████| 1875/1875 [01:54<00:00, 16.35it/s]


Training acc over epoch: 0.1346
Validation acc: 0.1415


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.13463
epochs,1.0
loss,2.28554
val_acc,0.1415
val_loss,2.28021


[34m[1mwandb[0m: Agent Starting Run: q1o5k4s7 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001



Start of epoch 0


100%|██████████| 469/469 [01:28<00:00,  5.30it/s]


Training acc over epoch: 0.1625
Validation acc: 0.2870

Start of epoch 1


100%|██████████| 469/469 [01:30<00:00,  5.20it/s]


Training acc over epoch: 0.2561
Validation acc: 0.3928


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.25613
epochs,1.0
loss,2.27261
val_acc,0.3928
val_loss,2.25652


[34m[1mwandb[0m: Agent Starting Run: xl2t0d4l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001



Start of epoch 0


100%|██████████| 938/938 [01:38<00:00,  9.48it/s]


Training acc over epoch: 0.0791
Validation acc: 0.0754

Start of epoch 1


100%|██████████| 938/938 [01:41<00:00,  9.25it/s]


Training acc over epoch: 0.0962
Validation acc: 0.0964


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.09622
epochs,1.0
loss,2.2983
val_acc,0.0964
val_loss,2.29445


[34m[1mwandb[0m: Agent Starting Run: xkvqcl6w with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.005



Start of epoch 0


100%|██████████| 469/469 [01:29<00:00,  5.23it/s]


Training acc over epoch: 0.3889
Validation acc: 0.7388

Start of epoch 1


100%|██████████| 469/469 [01:30<00:00,  5.17it/s]


Training acc over epoch: 0.7613
Validation acc: 0.8725


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.76135
epochs,1.0
loss,0.81002
val_acc,0.8725
val_loss,0.44278


[34m[1mwandb[0m: Agent Starting Run: 7xjji89p with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001



Start of epoch 0


100%|██████████| 469/469 [01:29<00:00,  5.23it/s]


Training acc over epoch: 0.0820
Validation acc: 0.0731

Start of epoch 1


100%|██████████| 469/469 [01:28<00:00,  5.31it/s]


Training acc over epoch: 0.0904
Validation acc: 0.0868


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.09038
epochs,1.0
loss,2.29944
val_acc,0.0868
val_loss,2.29776


[34m[1mwandb[0m: Agent Starting Run: mjldwf56 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005



Start of epoch 0


100%|██████████| 469/469 [01:31<00:00,  5.11it/s]


Training acc over epoch: 0.1074
Validation acc: 0.1193

Start of epoch 1


100%|██████████| 469/469 [01:31<00:00,  5.13it/s]


Training acc over epoch: 0.1466
Validation acc: 0.1788


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.14657
epochs,1.0
loss,2.28053
val_acc,0.1788
val_loss,2.26982


[34m[1mwandb[0m: Agent Starting Run: vfttimv0 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.005



Start of epoch 0


100%|██████████| 1875/1875 [01:58<00:00, 15.83it/s]


Training acc over epoch: 0.7244
Validation acc: 0.9187

Start of epoch 1


100%|██████████| 1875/1875 [01:57<00:00, 16.02it/s]


Training acc over epoch: 0.9181
Validation acc: 0.9504


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
acc,▁█
epochs,▁█
loss,█▁
val_acc,▁█
val_loss,█▁

0,1
acc,0.91813
epochs,1.0
loss,0.26714
val_acc,0.9504
val_loss,0.16608
