In [5]:
import tensorflow as tf
from sklearn.datasets import fetch_california_housing, make_friedman1
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import copy
import pandas as pd

In [None]:
class CustomScheduler(tf.keras.callbacks.Callback):
    def __init__(
        self,
        monitor="val_loss",
        factor=0.5,
        patience=5,
        num_reductions=2,
        min_lr=1e-6,
        restore_best_weights=True,
        verbose=1,
    ):
        super().__init__()
        self.monitor = monitor
        self.factor = factor
        self.patience = patience
        self.num_reductions = num_reductions
        self.min_lr = min_lr
        self.restore_best_weights = restore_best_weights
        self.verbose = verbose

    def on_train_begin(self, logs=None):
        self.wait = 0
        self.best = float("inf")
        self.reduction_count = 0
        self.best_weights = None

    def on_epoch_end(self, epoch, logs=None):
        print("LR ------>", float(tf.keras.backend.get_value(self.model.optimizer.learning_rate)))
        current = logs.get(self.monitor)
        if current is None and self.monitor != "loss":
            current = logs.get("loss")
            if self.verbose:
                print(
                    f"\nEpoch {epoch + 1}: "
                    f"Metric '{self.monitor}' not found. "
                    f"Falling back to 'loss'."
                )

        if current is None:
            return

        if current < self.best:
            self.best = current
            self.wait = 0
            if self.restore_best_weights:
                self.best_weights = copy.deepcopy(self.model.get_weights())
            return

        self.wait += 1

        if self.wait < self.patience:
            return

        self.wait = 0

        if self.reduction_count < self.num_reductions:
            lr = self.model.optimizer.learning_rate
            if hasattr(lr, "assign"):
                old_lr = float(tf.keras.backend.get_value(lr))
                new_lr = max(old_lr * self.factor, self.min_lr)
                lr.assign(new_lr)

            # Case 2: learning_rate is a float / int
            elif isinstance(lr, (float, int)):
                old_lr = float(lr)
                new_lr = max(old_lr * self.factor, self.min_lr)
                self.model.optimizer.learning_rate = new_lr

            # Case 3: learning rate schedule → cannot be reduced
            elif isinstance(lr, tf.keras.optimizers.schedules.LearningRateSchedule):
                if self.verbose:
                    print(
                        "\nLearning rate is a schedule; "
                        "ReduceLROnPlateau behavior is disabled."
                    )
                return

            else:
                raise TypeError(
                    f"Unsupported learning_rate type: {type(lr)}"
                )

            self.reduction_count += 1

            if self.verbose:
                print(
                    f"\nEpoch {epoch + 1}: "
                    f"ReduceLROnPlateau {self.reduction_count}/{self.num_reductions} "
                    f"— LR {old_lr:.3e} → {new_lr:.3e}"
                )

        else:
            if self.verbose:
                print(
                    f"\nEpoch {epoch + 1}: "
                    f"Early stopping triggered after {self.num_reductions} LR reductions."
                )

            if self.restore_best_weights and self.best_weights is not None:
                if self.verbose:
                    print("Restoring best model weights.")
                self.model.set_weights(self.best_weights)

            self.model.stop_training = True

In [17]:
data = fetch_california_housing()
X = data.data
y = data.target

In [18]:
df = pd.DataFrame(X)
df['y'] = y
df.columns = [f"x_{c}" if c != 'y' else c for c in df.columns]


In [22]:
X = df.drop(columns=['y'])
y = df['y']

In [None]:
y

0        4.526
1        3.585
2        3.521
3        3.413
4        3.422
         ...  
20635    0.781
20636    0.771
20637    0.923
20638    0.847
20639    0.894
Name: y, Length: 20640, dtype: float64

In [20]:
df.to_parquet("data.parquet")

In [None]:
X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))

In [None]:
# X, y = make_friedman1(n_samples=5000, n_features=10, noise=1.0, random_state=0)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation='relu', kernel_initializer="he_normal"),
    tf.keras.layers.Dense(100, activation='relu', kernel_initializer="he_normal"),
    tf.keras.layers.Dense(100, activation='relu', kernel_initializer="he_normal"),
    tf.keras.layers.Dense(1)
])

In [None]:
X.shape, y.shape

In [None]:
OPTIMIZERS = {
    'adam': tf.keras.optimizers.Adam(learning_rate=1e-3),
    'adamw': tf.keras.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-4),
    'nadam': tf.keras.optimizers.Nadam(learning_rate=1e-3),
    'rmsprop': tf.keras.optimizers.RMSprop(learning_rate=5e-4),
}

In [None]:
# optimizer = OPTIMIZERS['nadam'] # Epoch 32/200 loss: 1.0644 - mae: 0.8193 - val_loss: 1.0346 - val_mae: 0.8113
optimizer = OPTIMIZERS['adam'] # Epoch 29/200 loss: 1.2096 - mae: 0.8827 - val_loss: 1.0778 - val_mae: 0.8260
# optimizer = OPTIMIZERS['adamw'] # Epoch 39/200 loss: 1.0883 - mae: 0.8289 - val_loss: 1.1170 - val_mae: 0.8352
# optimizer = OPTIMIZERS['rmsprop'] # Epoch 37/200 loss: 1.3758 - mae: 0.9286 - val_loss: 1.8918 - val_mae: 1.1354

In [None]:
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

In [None]:
es = CustomScheduler(patience=2)

In [None]:
# es = tf.keras.callbacks.EarlyStopping(patience=5)
model.fit(X, y, validation_data=(X_test, y_test), epochs=200, callbacks=[es])