In [42]:
import tensorflow as tf
from sklearn.datasets import fetch_california_housing, make_friedman1
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import copy

In [None]:
class CustomScheduler(tf.keras.callbacks.Callback):
    def __init__(
        self,
        monitor="val_loss",
        factor=0.5,
        patience=5,
        num_reductions=2,
        min_lr=1e-6,
        restore_best_weights=True,
        verbose=1,
    ):
        super().__init__()
        self.monitor = monitor
        self.factor = factor
        self.patience = patience
        self.num_reductions = num_reductions
        self.min_lr = min_lr
        self.restore_best_weights = restore_best_weights
        self.verbose = verbose

    def on_train_begin(self, logs=None):
        self.wait = 0
        self.best = float("inf")
        self.reduction_count = 0
        self.best_weights = None

    def on_epoch_end(self, epoch, logs=None):
        print("LR ------>", float(tf.keras.backend.get_value(self.model.optimizer.learning_rate)))
        current = logs.get(self.monitor)
        if current is None and self.monitor != "loss":
            current = logs.get("loss")
            if self.verbose:
                print(
                    f"\nEpoch {epoch + 1}: "
                    f"Metric '{self.monitor}' not found. "
                    f"Falling back to 'loss'."
                )

        if current is None:
            return

        if current < self.best:
            self.best = current
            self.wait = 0
            if self.restore_best_weights:
                self.best_weights = copy.deepcopy(self.model.get_weights())
            return

        self.wait += 1

        if self.wait < self.patience:
            return

        self.wait = 0

        if self.reduction_count < self.num_reductions:
            lr = self.model.optimizer.learning_rate
            if hasattr(lr, "assign"):
                old_lr = float(tf.keras.backend.get_value(lr))
                new_lr = max(old_lr * self.factor, self.min_lr)
                lr.assign(new_lr)

            # Case 2: learning_rate is a float / int
            elif isinstance(lr, (float, int)):
                old_lr = float(lr)
                new_lr = max(old_lr * self.factor, self.min_lr)
                self.model.optimizer.learning_rate = new_lr

            # Case 3: learning rate schedule → cannot be reduced
            elif isinstance(lr, tf.keras.optimizers.schedules.LearningRateSchedule):
                if self.verbose:
                    print(
                        "\nLearning rate is a schedule; "
                        "ReduceLROnPlateau behavior is disabled."
                    )
                return

            else:
                raise TypeError(
                    f"Unsupported learning_rate type: {type(lr)}"
                )

            self.reduction_count += 1

            if self.verbose:
                print(
                    f"\nEpoch {epoch + 1}: "
                    f"ReduceLROnPlateau {self.reduction_count}/{self.num_reductions} "
                    f"— LR {old_lr:.3e} → {new_lr:.3e}"
                )

        else:
            if self.verbose:
                print(
                    f"\nEpoch {epoch + 1}: "
                    f"Early stopping triggered after {self.num_reductions} LR reductions."
                )

            if self.restore_best_weights and self.best_weights is not None:
                if self.verbose:
                    print("Restoring best model weights.")
                self.model.set_weights(self.best_weights)

            self.model.stop_training = True

In [44]:
data = fetch_california_housing()
X = data.data
y = data.target

In [45]:
X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))


In [46]:
# X, y = make_friedman1(n_samples=5000, n_features=10, noise=1.0, random_state=0)

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [48]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation='relu', kernel_initializer="he_normal"),
    tf.keras.layers.Dense(100, activation='relu', kernel_initializer="he_normal"),
    tf.keras.layers.Dense(100, activation='relu', kernel_initializer="he_normal"),
    tf.keras.layers.Dense(1)
])

In [49]:
X.shape, y.shape

((20640, 8), (20640,))

In [50]:
OPTIMIZERS = {
    'adam': tf.keras.optimizers.Adam(learning_rate=1e-3),
    'adamw': tf.keras.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-4),
    'nadam': tf.keras.optimizers.Nadam(learning_rate=1e-3),
    'rmsprop': tf.keras.optimizers.RMSprop(learning_rate=5e-4),
}

In [51]:
# optimizer = OPTIMIZERS['nadam'] # Epoch 32/200 loss: 1.0644 - mae: 0.8193 - val_loss: 1.0346 - val_mae: 0.8113
optimizer = OPTIMIZERS['adam'] # Epoch 29/200 loss: 1.2096 - mae: 0.8827 - val_loss: 1.0778 - val_mae: 0.8260
# optimizer = OPTIMIZERS['adamw'] # Epoch 39/200 loss: 1.0883 - mae: 0.8289 - val_loss: 1.1170 - val_mae: 0.8352
# optimizer = OPTIMIZERS['rmsprop'] # Epoch 37/200 loss: 1.3758 - mae: 0.9286 - val_loss: 1.8918 - val_mae: 1.1354

In [52]:
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

In [53]:
es = CustomScheduler(patience=2)

In [None]:
# es = tf.keras.callbacks.EarlyStopping(patience=5)
model.fit(X, y, validation_data=(X_test, y_test), epochs=200, callbacks=[es])

Epoch 1/200


[1m593/645[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 769us/step - loss: 1.0061 - mae: 0.7297LR ------> <KerasVariable shape=(), dtype=float32, path=adam/learning_rate>
[1m645/645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.9782 - mae: 0.7186 - val_loss: 0.5351 - val_mae: 0.5063
Epoch 2/200
[1m613/645[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 742us/step - loss: 0.5106 - mae: 0.5172LR ------> <KerasVariable shape=(), dtype=float32, path=adam/learning_rate>
[1m645/645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 924us/step - loss: 0.5102 - mae: 0.5168 - val_loss: 0.5004 - val_mae: 0.4771
Epoch 3/200
[1m596/645[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 768us/step - loss: 0.4737 - mae: 0.4896LR ------> <KerasVariable shape=(), dtype=float32, path=adam/learning_rate>
[1m645/645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 913us/step - loss: 0.4730 - mae: 0.4894 - val_loss: 0.4525 - val_mae: 0.4533
Epoch 