In [1]:
%load_ext autoreload
%autoreload 2

%load_ext dotenv
%dotenv


In [2]:
import os
os.environ["KERAS_BACKEND"] = "jax"

import polars as pl
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

from centimators.model_estimators import MLPRegressor
from centimators.keras_cortex import KerasCortex

In [3]:
X, y = make_regression(
    n_samples=2000,
    n_features=20,
    noise=0.1,
    random_state=42,
)

X = pl.DataFrame(X)
y = pl.Series(y)

# train / val / test split  (60 / 20 / 20)
X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(
    X_tmp, y_tmp, test_size=0.5, random_state=42
)

print(X_train.shape, X_val.shape, X_test.shape)

(1200, 20) (400, 20) (400, 20)


In [4]:
base_mlp = MLPRegressor(
    hidden_units=(64, 32),
    dropout_rate=0.1,
)

cortex = KerasCortex(
    base_estimator=base_mlp,
    n_iterations=5,
    lm="openai/gpt-4o-mini",
    verbose=True
)

In [5]:
cortex.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=5,
    batch_size=256,
)



Epoch 1/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 178ms/step - loss: 30056.2285 - mse: 30056.2285
Epoch 2/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 30656.7637 - mse: 30656.7637  
Epoch 3/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 31089.9023 - mse: 31089.9023
Epoch 4/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 29419.2422 - mse: 29419.2422
Epoch 5/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 29960.8262 - mse: 29960.8262
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step

--- Iteration 1 ---
Reasoning: 
To improve validation scores, we can consider several modifications to the model architecture and training process. One effective approach is to add batch normalization layers after the dense layers. This can help stabilize and accelerate training by normalizing the inputs to each layer. Additionally