In [1]:
import tensorflow as tf
import keras
import random
from pathlib import Path 
from time import strftime 

2026-02-05 22:16:17.640763: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-05 22:16:17.649450: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2026-02-05 22:16:17.660265: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2026-02-05 22:16:17.663769: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2026-02-05 22:16:17.672008: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# Load the dataset
(X_train_all, y_train_all), (X_test, y_test) = keras.datasets.cifar10.load_data()

X_valid, y_valid = X_train_all[-5000:], y_train_all[-5000:]
X_train, y_train = X_train_all[:-5000], y_train_all[:-5000]


In [None]:
X_train.shape

In [None]:
print(X_train[0, 0:32, 0, 0]) # integers between zero and 255 

In [3]:
X_train = (X_train / 255.)
X_valid = (X_valid / 255.) 
X_test = (X_test / 255.)

In [4]:
param_dict = {
    'learning_rate' : [0.0001, 0.0005, 0.001, 0.005, 0.01], 
    'batch_size' : [16, 32, 64, 128], 
    'epochs' : [50, 100, 150, 200], 
    'num_layers' : [2, 4, 6, 8, 10],
    'neurons' : [200, 400, 600],
    'optimizer' : ['adam', 'sgd'],
    'lr_sched' : ['exp', 'poly']
}

In [5]:
## this is the function for the random model
def random_model(neurons=128, num_layers=2):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=[32, 32, 3]))
    model.add(tf.keras.layers.Flatten())
    for _ in range(num_layers):
        model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    return model

In [None]:
def other_model(neurons):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=[32, 32, 3]))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    return model

In [None]:
results2 = []
num_trials = 10
for trial in range(num_trials):

    lr_og = 0.0005
    
    end_learning_rate = 0.01
    decay_steps = 10000
    lr_schedule = keras.optimizers.schedules.PolynomialDecay(
        lr_og,
        decay_steps,
        end_learning_rate,
        power=0.5)
    
    model = other_model(neurons=600) # creating the  model

    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    
    run_logdir = Path("my_logs/manual2") / f"trial_{trial}"
    tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=100)
    
    history = model.fit(
        X_train, y_train, 
        epochs=200,
        validation_data=(X_valid, y_valid), 
        callbacks=[tensorboard_cb, early_stopping_cb],
        batch_size=128
    )
    
    results2.append({
        'final_val_acc' : max(history.history['val_accuracy']),
        'final_train_acc' : max(history.history['accuracy']),
        'run_id' : trial
    })

In [None]:
results2

In [6]:
def get_hyperparams(param_dict):
    list = []
    for key, values in param_dict.items():
        dict = {key : random.choice(values) for key, values in param_dict.items()}
        return dict

In [None]:
def get_run_logdir(root_logdir="my_logs"):
    return Path(root_logdir) / strftime("run_%Y_%m_%d_%H_%M_%S")

run_logdir = get_run_logdir()

In [7]:
results = []
num_trials = 10
for trial in range(num_trials):
    hps = get_hyperparams(param_dict)
    print(f"Hyperparams for trial {trial}: {hps}")

    lr_og = hps["learning_rate"] # original learning rate for lr schedulers
    lr_sched = hps["lr_sched"] # picking a random lr scheduler
    
    if lr_sched == "exp": 
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
            lr_og,
            decay_steps=100000,
            decay_rate=0.96,
            staircase=True)
    else:
        end_learning_rate = 0.01
        decay_steps = 10000
        lr_schedule = keras.optimizers.schedules.PolynomialDecay(
            lr_og,
            decay_steps,
            end_learning_rate,
            power=0.5)
    
    model = random_model(neurons=hps['neurons'], num_layers=hps["num_layers"]) # creating the random model

    opt = hps["optimizer"] # picking the random optimizer
    if opt == "adam":
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
    else:
        optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
    
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    
    run_logdir = Path("my_logs/manual3") / f"trial_{trial}"
    tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=30)
    
    history = model.fit(
        X_train, y_train, 
        epochs=hps['epochs'],
        validation_data=(X_valid, y_valid), 
        callbacks=[tensorboard_cb, early_stopping_cb],
        batch_size=hps['batch_size'],
    )
    
    results.append({
        'hyperparams' : hps,
        'final_val_acc' : max(history.history['val_accuracy']),
        'final_train_acc' : max(history.history['accuracy']),
        'run_id' : trial
    })

Hyperparams for trial 0: {'learning_rate': 0.01, 'batch_size': 64, 'epochs': 150, 'num_layers': 4, 'neurons': 400, 'optimizer': 'sgd', 'lr_sched': 'poly'}


I0000 00:00:1770347806.029380  642763 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1770347806.076564  642763 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1770347806.080371  642763 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1770347806.087598  642763 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

Epoch 1/150


I0000 00:00:1770347807.906991  643367 service.cc:146] XLA service 0x7381ac004800 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1770347807.907012  643367 service.cc:154]   StreamExecutor device (0): NVIDIA RTX A400, Compute Capability 8.6
2026-02-05 22:16:47.918279: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2026-02-05 22:16:47.951941: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 91700





[1m 92/704[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 2ms/step - accuracy: 0.1789 - loss: 2.1887

I0000 00:00:1770347810.457929  643367 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.2790 - loss: 1.9701 - val_accuracy: 0.3570 - val_loss: 1.7794
Epoch 2/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3886 - loss: 1.6996 - val_accuracy: 0.4016 - val_loss: 1.6373
Epoch 3/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4229 - loss: 1.5960 - val_accuracy: 0.4200 - val_loss: 1.6184
Epoch 4/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4419 - loss: 1.5584 - val_accuracy: 0.4306 - val_loss: 1.5653
Epoch 5/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4679 - loss: 1.4808 - val_accuracy: 0.4542 - val_loss: 1.5216
Epoch 6/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4764 - loss: 1.4522 - val_accuracy: 0.4724 - val_loss: 1.4911
Epoch 7/150
[1m704/704[0m [32m━





[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.1835 - loss: 2.1403 - val_accuracy: 0.3212 - val_loss: 1.8630
Epoch 2/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3346 - loss: 1.8205 - val_accuracy: 0.3380 - val_loss: 1.8600
Epoch 3/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3825 - loss: 1.7074 - val_accuracy: 0.3884 - val_loss: 1.6776
Epoch 4/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4064 - loss: 1.6306 - val_accuracy: 0.4270 - val_loss: 1.6103
Epoch 5/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4324 - loss: 1.5743 - val_accuracy: 0.4350 - val_loss: 1.5749
Epoch 6/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4457 - loss: 1.5450 - val_accuracy: 0.4476 - val_loss: 1.5359
Epoch 7/100
[1m704/704[0m [32m━




[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.2999 - loss: 1.9336 - val_accuracy: 0.3892 - val_loss: 1.7011
Epoch 2/50
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.4155 - loss: 1.6433 - val_accuracy: 0.4408 - val_loss: 1.5974
Epoch 3/50
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.4482 - loss: 1.5509 - val_accuracy: 0.4590 - val_loss: 1.5274
Epoch 4/50
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.4775 - loss: 1.4801 - val_accuracy: 0.4760 - val_loss: 1.4824
Epoch 5/50
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.4927 - loss: 1.4258 - val_accuracy: 0.4842 - val_loss: 1.4612
Epoch 6/50
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.5110 - loss: 1.3856 - val_accuracy: 0.4810 - val_loss: 1.4817
Epoch 7/50
[1m2813/2813[






[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.2376 - loss: 2.0800 - val_accuracy: 0.3586 - val_loss: 1.7749
Epoch 2/150
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3739 - loss: 1.7346 - val_accuracy: 0.4002 - val_loss: 1.6696
Epoch 3/150
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4099 - loss: 1.6381 - val_accuracy: 0.4154 - val_loss: 1.6322
Epoch 4/150
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4338 - loss: 1.5739 - val_accuracy: 0.4482 - val_loss: 1.5383
Epoch 5/150
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4504 - loss: 1.5330 - val_accuracy: 0.4294 - val_loss: 1.5767
Epoch 6/150
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4564 - loss: 1.5176 - val_accuracy: 0.4256 - val_loss: 1.6300
Epoch 7/150
[1m1407/1






[1m345/352[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.1310 - loss: 2.7985









[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 14ms/step - accuracy: 0.1320 - loss: 2.7859 - val_accuracy: 0.2200 - val_loss: 2.0204
Epoch 2/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.2590 - loss: 1.9647 - val_accuracy: 0.3028 - val_loss: 1.8552
Epoch 3/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3084 - loss: 1.8579 - val_accuracy: 0.3232 - val_loss: 1.8335
Epoch 4/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3320 - loss: 1.8099 - val_accuracy: 0.3558 - val_loss: 1.7739
Epoch 5/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3602 - loss: 1.7592 - val_accuracy: 0.3280 - val_loss: 1.8213
Epoch 6/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3609 - loss: 1.7497 - val_accuracy: 0.3858 - val_loss: 1.7376
Epoch 7/50
[1m352/352[0m [32m━━━━━━




[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - accuracy: 0.2156 - loss: 2.1634 - val_accuracy: 0.3228 - val_loss: 1.9110
Epoch 2/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3423 - loss: 1.8670 - val_accuracy: 0.3696 - val_loss: 1.8174
Epoch 3/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3758 - loss: 1.7771 - val_accuracy: 0.3902 - val_loss: 1.7464
Epoch 4/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3979 - loss: 1.7122 - val_accuracy: 0.4042 - val_loss: 1.6971
Epoch 5/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4179 - loss: 1.6568 - val_accuracy: 0.4214 - val_loss: 1.6627
Epoch 6/50
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4318 - loss: 1.6255 - val_accuracy: 0.4250 - val_loss: 1.6224
Epoch 7/50
[1m352/352[0m [32m━━━━━━








[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.2617 - loss: 2.0103 - val_accuracy: 0.3548 - val_loss: 1.8003
Epoch 2/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.3887 - loss: 1.6897 - val_accuracy: 0.4076 - val_loss: 1.6653
Epoch 3/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4257 - loss: 1.5859 - val_accuracy: 0.4440 - val_loss: 1.5530
Epoch 4/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4569 - loss: 1.5236 - val_accuracy: 0.4392 - val_loss: 1.5476
Epoch 5/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4693 - loss: 1.4737 - val_accuracy: 0.4570 - val_loss: 1.5129
Epoch 6/150
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4880 - loss: 1.4222 - val_accuracy: 0.4864 - val_loss: 1.4336
Epoch 7/150
[1m704/704[0m [32m━

In [10]:
%load_ext tensorboard
%tensorboard --logdir=./my_logs/manual2

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [8]:
results

[{'hyperparams': {'learning_rate': 0.01,
   'batch_size': 64,
   'epochs': 150,
   'num_layers': 4,
   'neurons': 400,
   'optimizer': 'sgd',
   'lr_sched': 'poly'},
  'final_val_acc': 0.5307999849319458,
  'final_train_acc': 0.8512444496154785,
  'run_id': 0},
 {'hyperparams': {'learning_rate': 0.005,
   'batch_size': 64,
   'epochs': 100,
   'num_layers': 10,
   'neurons': 200,
   'optimizer': 'sgd',
   'lr_sched': 'exp'},
  'final_val_acc': 0.5325999855995178,
  'final_train_acc': 0.731844425201416,
  'run_id': 1},
 {'hyperparams': {'learning_rate': 0.0001,
   'batch_size': 16,
   'epochs': 50,
   'num_layers': 2,
   'neurons': 600,
   'optimizer': 'adam',
   'lr_sched': 'exp'},
  'final_val_acc': 0.5540000200271606,
  'final_train_acc': 0.8779777884483337,
  'run_id': 2},
 {'hyperparams': {'learning_rate': 0.0005,
   'batch_size': 64,
   'epochs': 200,
   'num_layers': 6,
   'neurons': 400,
   'optimizer': 'sgd',
   'lr_sched': 'exp'},
  'final_val_acc': 0.5436000227928162,
  'fina