In [1]:
%cd ../..

/Users/aflamant/Documents/courses/2024-2025/mémoire/03-code


In [2]:
RANDOM_STATE = 42
import numpy as np
import pickle
import optuna
from torch.utils.data import random_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from dataset import (TenBarsCantileverTrussSingleEADataset,
                     TwoBarsTrussSingleEADataset,
                     BiSupportedTrussBeamSingleEADataset)

np.random.seed(RANDOM_STATE)

# Data

In [3]:
names = ['beam', 'cantilever', 'triangle']

path = {
    'beam': 'data/dataset/beam/data.hdf5',
    'cantilever': 'data/dataset/cantilever/data.hdf5',
    'triangle': 'data/dataset/triangle/data.hdf5'
}

_dataset = {
    'beam': BiSupportedTrussBeamSingleEADataset(path['beam']),
    'cantilever': TenBarsCantileverTrussSingleEADataset(path['cantilever']),
    'triangle': TwoBarsTrussSingleEADataset(path['triangle'])
}

In [4]:
_x_train = {}
_y_train = {}
_x_test = {}
_y_test = {}

for key in _dataset.keys():
    ds = _dataset[key]
    train, test = random_split(ds, (.8, .2))

    x_train = []
    y_train = []
    for x, y, _, _, _ in train:
        x_train.append(x.cpu().detach().numpy())
        y_train.append(y.cpu().detach().numpy())
    _x_train[key] = np.vstack(x_train)
    _y_train[key] = np.vstack(y_train)

    x_test = []
    y_test = []
    for x, y, _, _, _ in test:
        x_test.append(x.cpu().detach().numpy())
        y_test.append(y.cpu().detach().numpy())
    _x_test[key] = np.vstack(x_test)
    _y_test[key] = np.vstack(y_test)

del x_train, y_train, x_test, y_test

In [5]:
models_scores = {}
best_models = {}
scaler_x = {key: StandardScaler().fit(_x_train[key]) for key in _x_train.keys()}
scaler_y = {key: StandardScaler().fit(_y_train[key]) for key in _y_train.keys()}
studies = {}

# Linear Regression

In [40]:
def eval_model(model, key, scale=False):
    x = _x_test[key]
    if scale: x = scaler_x[key].transform(x)

    y_pred = best_models[model][key].predict(x)
    if scale: y_pred = scaler_y[key].inverse_transform(y_pred.reshape(-1, 1))

    if model not in models_scores:
        models_scores[model] = {}
    models_scores[model][key] = {
        'MAER': np.abs(((y_pred - _y_test[key]) / _y_test[key]).mean()),
        'MAE': np.abs((y_pred - _y_test[key]).mean()),
        'MSE': ((y_pred - _y_test[key]) ** 2).mean(),
    }

    print(f"Validation score \"{key}\":")
    print(f"\tMAER: {models_scores[model][key]['MAER'] * 100:.4f} %")
    print(f"\tMAE: {models_scores[model][key]['MAE'] * 1e-6:.4f} MN")
    print(f"\tMSE: {models_scores[model][key]['MSE'] * 1e-12:.4f} MN^2")
    print()

In [41]:
best_models['linear regression'] = {}

for key in names:
    best_models['linear regression'][key] = LinearRegression().fit(_x_train[key], _y_train[key])
    eval_model('linear regression', key)

Validation score "beam":
	MAER: 3.5539 %
	MAE: 1.9012 MN
	MSE: 55413.1125 MN^2

Validation score "cantilever":
	MAER: 9.1481 %
	MAE: 1.2233 MN
	MSE: 200621.7956 MN^2

Validation score "triangle":
	MAER: 13.9590 %
	MAE: 1.8480 MN
	MSE: 254521.4763 MN^2



# KNN Regressor

In [26]:
def objective(trial, key):
    x, y = _x_train[key][:15000], _y_train[key][:15000] # Subsampling because it's otherwise way too long to train

    n_neighbors = trial.suggest_int('n_neighbors', 1, 100)
    weights = trial.suggest_categorical('weights', ['distance'])
    metric = trial.suggest_categorical('metric', ['minkowski', 'l1', 'l2', 'cosine'])
    p = trial.suggest_float('p', 1, 20) if metric == 'minkowski' else None

    params = (n_neighbors, weights, metric, p)
    if params in trial_history:
        raise optuna.exceptions.TrialPruned()

    trial_history.add(params)

    scores = []
    kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
    for train_index, test_index in kf.split(x):
        x_train, x_validation = x[train_index], x[test_index]
        y_train, y_validation = y[train_index], y[test_index]

        regressor = KNeighborsRegressor(
            n_neighbors=n_neighbors,
            weights=weights,
            metric=metric,
            p=p,
            n_jobs=-1,

        )

        scaler_x = StandardScaler().fit(x_train)
        scaler_y = StandardScaler().fit(y_train)

        x_train_scaled = scaler_x.transform(x_train)
        x_validation_scaled = scaler_x.transform(x_validation)

        y_train_scaled = scaler_y.transform(y_train).flatten()

        regressor.fit(x_train_scaled, y_train_scaled)

        y_pred = regressor.predict(x_validation_scaled).reshape(-1, 1)
        y_pred = scaler_y.inverse_transform(y_pred)

        scores.append(((y_pred - y_validation) ** 2).mean() * 1e-12)

    return np.mean(scores)



# Optuna study
studies['knn'] = {}
for key in names:
    trial_history = set()
    print(f"Study with \"{key}\":", flush=True)
    studies['knn'][key] = optuna.create_study(study_name=f"knn {key}", direction='minimize')
    studies['knn'][key].optimize(lambda trial: objective(trial, key), n_jobs=1, n_trials=200, timeout=None)
    print('')

del trial_history

Study with "beam":


[I 2025-01-14 12:16:11,745] A new study created in memory with name: knn beam
[I 2025-01-14 12:16:12,237] Trial 0 finished with value: 67276.26652562307 and parameters: {'n_neighbors': 94, 'weights': 'distance', 'metric': 'l2'}. Best is trial 0 with value: 67276.26652562307.
[I 2025-01-14 12:16:12,528] Trial 1 finished with value: 75928.98933904273 and parameters: {'n_neighbors': 25, 'weights': 'distance', 'metric': 'l1'}. Best is trial 0 with value: 67276.26652562307.
[I 2025-01-14 12:16:12,937] Trial 2 finished with value: 87930.0806596023 and parameters: {'n_neighbors': 74, 'weights': 'distance', 'metric': 'l1'}. Best is trial 0 with value: 67276.26652562307.
[I 2025-01-14 12:16:13,331] Trial 3 finished with value: 86398.51240508494 and parameters: {'n_neighbors': 65, 'weights': 'distance', 'metric': 'l1'}. Best is trial 0 with value: 67276.26652562307.
[I 2025-01-14 12:16:16,240] Trial 4 finished with value: 55484.60363741832 and parameters: {'n_neighbors': 86, 'weights': 'distance


Study with "cantilever":


[I 2025-01-14 12:19:22,852] A new study created in memory with name: knn cantilever
[I 2025-01-14 12:19:23,195] Trial 0 finished with value: 199842.3174261623 and parameters: {'n_neighbors': 59, 'weights': 'distance', 'metric': 'l2'}. Best is trial 0 with value: 199842.3174261623.
[I 2025-01-14 12:19:26,781] Trial 1 finished with value: 225974.07233952484 and parameters: {'n_neighbors': 83, 'weights': 'distance', 'metric': 'minkowski', 'p': 11.135651874889291}. Best is trial 0 with value: 199842.3174261623.
[I 2025-01-14 12:19:30,372] Trial 2 finished with value: 204102.0396377317 and parameters: {'n_neighbors': 63, 'weights': 'distance', 'metric': 'minkowski', 'p': 2.4581911420538507}. Best is trial 0 with value: 199842.3174261623.
[I 2025-01-14 12:19:30,687] Trial 3 finished with value: 222522.95204682034 and parameters: {'n_neighbors': 50, 'weights': 'distance', 'metric': 'l1'}. Best is trial 0 with value: 199842.3174261623.
[I 2025-01-14 12:19:30,688] Trial 4 pruned. 
[I 2025-01-14


Study with "triangle":


[I 2025-01-14 12:22:20,929] A new study created in memory with name: knn triangle
[I 2025-01-14 12:22:21,086] Trial 0 finished with value: 176123.0898103108 and parameters: {'n_neighbors': 57, 'weights': 'distance', 'metric': 'l1'}. Best is trial 0 with value: 176123.0898103108.
[I 2025-01-14 12:22:21,250] Trial 1 finished with value: 139771.11639663897 and parameters: {'n_neighbors': 34, 'weights': 'distance', 'metric': 'l2'}. Best is trial 1 with value: 139771.11639663897.
[I 2025-01-14 12:22:21,475] Trial 2 finished with value: 185438.52864725576 and parameters: {'n_neighbors': 80, 'weights': 'distance', 'metric': 'minkowski', 'p': 9.06896006377505}. Best is trial 1 with value: 139771.11639663897.
[I 2025-01-14 12:22:21,562] Trial 3 finished with value: 113326.41507263554 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'metric': 'minkowski', 'p': 18.521938223905146}. Best is trial 3 with value: 113326.41507263554.
[I 2025-01-14 12:22:24,835] Trial 4 finished with value: 14




In [35]:
"""
Get the 3 best trials and print their characteristics.
Then select the best one and fit its model.
"""
def compute_results_and_models(model_key, model_class, n_best = 3):
    best_models[model_key] = {}

    for typology, study in studies[model_key].items():
        top_trials = sorted(
            study.trials,
            key=lambda t: t.value if t.value is not None else np.inf,
            reverse=False
        )[:n_best]

        print(f"{typology}:")
        for i, trial in enumerate(top_trials):
            print(f"  Rank {i + 1}:")
            print(f"    Trial ID: {trial.number}")
            print(f"    Value: {trial.value}")
            print(f"    Params: {trial.params}")
            print()

        best_models[model_key][typology] = model_class(**top_trials[0].params)
        best_models[model_key][typology].fit(scaler_x[typology].transform(_x_train[typology]),
                                             scaler_y[typology].transform(_y_train[typology]))

compute_results_and_models('knn', KNeighborsRegressor)

beam:
  Rank 1:
    Trial ID: 23
    Value: 37369.96947928687
    Params: {'n_neighbors': 7, 'weights': 'distance', 'metric': 'cosine'}

  Rank 2:
    Trial ID: 48
    Value: 37423.16371815774
    Params: {'n_neighbors': 6, 'weights': 'distance', 'metric': 'cosine'}

  Rank 3:
    Trial ID: 10
    Value: 37508.60797839287
    Params: {'n_neighbors': 8, 'weights': 'distance', 'metric': 'cosine'}

cantilever:
  Rank 1:
    Trial ID: 43
    Value: 138091.78591943943
    Params: {'n_neighbors': 7, 'weights': 'distance', 'metric': 'cosine'}

  Rank 2:
    Trial ID: 31
    Value: 138245.8951038196
    Params: {'n_neighbors': 8, 'weights': 'distance', 'metric': 'cosine'}

  Rank 3:
    Trial ID: 30
    Value: 138345.86716880143
    Params: {'n_neighbors': 9, 'weights': 'distance', 'metric': 'cosine'}

triangle:
  Rank 1:
    Trial ID: 189
    Value: 100225.0525637901
    Params: {'n_neighbors': 4, 'weights': 'distance', 'metric': 'minkowski', 'p': 1.8833518363462791}

  Rank 2:
    Trial ID: 

In [42]:
model_key = 'knn'
for key in best_models[model_key].keys():
            eval_model(model_key, key, scale=True)

Validation score "beam":
	MAER: 3.2590 %
	MAE: 7.6875 MN
	MSE: 21721.0835 MN^2

Validation score "cantilever":
	MAER: 6.3499 %
	MAE: 2.1820 MN
	MSE: 73988.9132 MN^2

Validation score "triangle":
	MAER: 3.1839 %
	MAE: 13.7134 MN
	MSE: 46190.1886 MN^2



# AdaBost with Linear Regressor

In [33]:
def objective(trial, key):
    x, y = _x_train[key][:15000], _y_train[key][:15000]

    n_estimators = trial.suggest_int('n_estimators', 1, 100)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 4, log=True)
    loss = trial.suggest_categorical('loss', ['square', 'linear', 'exponential'])

    params = (n_estimators, learning_rate, loss)
    if params in trial_history:
        raise optuna.exceptions.TrialPruned()

    scores = []
    kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
    for train_index, test_index in kf.split(x):
        x_train, x_validation = x[train_index], x[test_index]
        y_train, y_validation = y[train_index], y[test_index]

        regressor = AdaBoostRegressor(LinearRegression(),
                                      n_estimators=n_estimators,
                                      learning_rate=learning_rate,
                                      loss=loss,
                                      random_state=RANDOM_STATE)

        scaler_x = StandardScaler().fit(x_train)
        scaler_y = StandardScaler().fit(y_train)

        x_train_scaled = scaler_x.transform(x_train)
        x_validation_scaled = scaler_x.transform(x_validation)

        y_train_scaled = scaler_y.transform(y_train)

        regressor.fit(x_train_scaled, y_train_scaled.ravel())

        y_pred = regressor.predict(x_validation_scaled).reshape(-1, 1)
        y_pred = scaler_y.inverse_transform(y_pred)

        scores.append(((y_pred - y_validation) ** 2).mean() * 1e-12)

    return np.mean(scores)


# Optuna study
studies['adaboost regression'] = {}
for key in names:
    trial_history = set()
    print(f"Study with \"{key}\":", flush=True)
    studies['adaboost regression'][key] = optuna.create_study(study_name=f"AdaBoost Regression {key}",
                                                              direction='minimize')
    studies['adaboost regression'][key].optimize(lambda trial: objective(trial, key), n_jobs=1, n_trials=200,
                                                 timeout=None)
    print('')
del trial_history

Study with "beam":


[I 2025-01-14 13:17:56,941] A new study created in memory with name: AdaBoost Regression beam
[I 2025-01-14 13:18:03,858] Trial 0 finished with value: 79013.35809239733 and parameters: {'n_estimators': 36, 'learning_rate': 0.1449356173816538, 'loss': 'square'}. Best is trial 0 with value: 79013.35809239733.
[I 2025-01-14 13:18:06,311] Trial 1 finished with value: 69879.93849657653 and parameters: {'n_estimators': 69, 'learning_rate': 0.5168654575435286, 'loss': 'linear'}. Best is trial 1 with value: 69879.93849657653.
[I 2025-01-14 13:18:06,868] Trial 2 finished with value: 64644.9840132759 and parameters: {'n_estimators': 40, 'learning_rate': 1.393138531249109, 'loss': 'linear'}. Best is trial 2 with value: 64644.9840132759.
[I 2025-01-14 13:18:10,594] Trial 3 finished with value: 67828.46923255274 and parameters: {'n_estimators': 82, 'learning_rate': 4.4419041575510303e-05, 'loss': 'linear'}. Best is trial 2 with value: 64644.9840132759.
[I 2025-01-14 13:18:12,019] Trial 4 finished w


Study with "cantilever":


[I 2025-01-14 13:20:45,840] A new study created in memory with name: AdaBoost Regression cantilever
[I 2025-01-14 13:20:48,304] Trial 0 finished with value: 287212.59150690795 and parameters: {'n_estimators': 81, 'learning_rate': 0.0008760662083146893, 'loss': 'linear'}. Best is trial 0 with value: 287212.59150690795.
[I 2025-01-14 13:20:49,880] Trial 1 finished with value: 281067.7320458426 and parameters: {'n_estimators': 52, 'learning_rate': 2.0750240959378165e-05, 'loss': 'square'}. Best is trial 1 with value: 281067.7320458426.
[I 2025-01-14 13:20:52,335] Trial 2 finished with value: 219624.53853642195 and parameters: {'n_estimators': 81, 'learning_rate': 0.04020443050437757, 'loss': 'square'}. Best is trial 2 with value: 219624.53853642195.
[I 2025-01-14 13:20:55,143] Trial 3 finished with value: 222012.6714382097 and parameters: {'n_estimators': 92, 'learning_rate': 0.044781093942178725, 'loss': 'square'}. Best is trial 2 with value: 219624.53853642195.
[I 2025-01-14 13:20:55,62


Study with "triangle":


[I 2025-01-14 13:26:46,335] A new study created in memory with name: AdaBoost Regression triangle
[I 2025-01-14 13:26:46,907] Trial 0 finished with value: 255760.20440668133 and parameters: {'n_estimators': 45, 'learning_rate': 3.3350538323732803e-05, 'loss': 'exponential'}. Best is trial 0 with value: 255760.20440668133.
[I 2025-01-14 13:26:47,380] Trial 1 finished with value: 255741.93734935118 and parameters: {'n_estimators': 36, 'learning_rate': 0.0008726981091322249, 'loss': 'linear'}. Best is trial 1 with value: 255741.93734935118.
[I 2025-01-14 13:26:47,826] Trial 2 finished with value: 284583.9546423788 and parameters: {'n_estimators': 35, 'learning_rate': 0.21171260408633696, 'loss': 'square'}. Best is trial 1 with value: 255741.93734935118.
[I 2025-01-14 13:26:48,011] Trial 3 finished with value: 255877.38942563068 and parameters: {'n_estimators': 15, 'learning_rate': 0.0001514383867383576, 'loss': 'exponential'}. Best is trial 1 with value: 255741.93734935118.
[I 2025-01-14 




In [37]:
compute_results_and_models('adaboost regression', lambda **params: AdaBoostRegressor(LinearRegression(), **params))

beam:
  Rank 1:
    Trial ID: 181
    Value: 61050.27908070411
    Params: {'n_estimators': 10, 'learning_rate': 4.779325279915494e-05, 'loss': 'linear'}

  Rank 2:
    Trial ID: 151
    Value: 61177.058908820465
    Params: {'n_estimators': 10, 'learning_rate': 4.477049789579449e-05, 'loss': 'linear'}

  Rank 3:
    Trial ID: 174
    Value: 61202.45678592981
    Params: {'n_estimators': 10, 'learning_rate': 4.382321896398301e-05, 'loss': 'linear'}



  y = column_or_1d(y, warn=True)


cantilever:
  Rank 1:
    Trial ID: 149
    Value: 212137.8830858383
    Params: {'n_estimators': 25, 'learning_rate': 0.07676266357634506, 'loss': 'square'}

  Rank 2:
    Trial ID: 152
    Value: 212158.2441812465
    Params: {'n_estimators': 24, 'learning_rate': 0.08406323298388911, 'loss': 'square'}

  Rank 3:
    Trial ID: 151
    Value: 212615.62401510082
    Params: {'n_estimators': 33, 'learning_rate': 0.06750868785732621, 'loss': 'square'}



  y = column_or_1d(y, warn=True)


triangle:
  Rank 1:
    Trial ID: 159
    Value: 254818.3245838622
    Params: {'n_estimators': 8, 'learning_rate': 0.00011636762045759228, 'loss': 'exponential'}

  Rank 2:
    Trial ID: 111
    Value: 254861.3920514228
    Params: {'n_estimators': 8, 'learning_rate': 9.189372025643519e-05, 'loss': 'linear'}

  Rank 3:
    Trial ID: 83
    Value: 254986.1573738469
    Params: {'n_estimators': 8, 'learning_rate': 7.239079743882965e-05, 'loss': 'exponential'}



  y = column_or_1d(y, warn=True)


In [43]:
model_key = 'adaboost regression'
for key in best_models[model_key].keys():
            eval_model(model_key, key, scale=True)

Validation score "beam":
	MAER: 3.5401 %
	MAE: 1.9260 MN
	MSE: 55892.9535 MN^2

Validation score "cantilever":
	MAER: 11.1646 %
	MAE: 13.5280 MN
	MSE: 202271.0064 MN^2

Validation score "triangle":
	MAER: 13.8642 %
	MAE: 2.3221 MN
	MSE: 254759.3920 MN^2



# Random Forest Regressor

In [50]:
def objective(trial, key):
    x, y = _x_train[key][:50000], _y_train[key][:50000]

    n_estimators = trial.suggest_int('n_estimators', 1, 50)
    max_depth = trial.suggest_int('max_depth', 1, 100)
    min_samples_split = trial.suggest_float('min_samples_split', 1e-5, 1, log=True)
    min_samples_leaf = trial.suggest_float('min_samples_leaf', 1e-5, 1, log=True)
    min_weight_fraction_leaf = trial.suggest_float('min_weight_fraction_leaf', 0, .5)
    max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2', 1])

    scores = []
    kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
    for train_index, test_index in kf.split(x):
        x_train, x_validation = x[train_index], x[test_index]
        y_train, y_validation = y[train_index], y[test_index]

        regressor = RandomForestRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth, min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            min_weight_fraction_leaf=min_weight_fraction_leaf,
            max_features=max_features,
            n_jobs=-1,
            random_state=RANDOM_STATE)

        scaler_x = StandardScaler().fit(x_train)
        scaler_y = StandardScaler().fit(y_train)

        x_train_scaled = scaler_x.transform(x_train)
        x_validation_scaled = scaler_x.transform(x_validation)

        y_train_scaled = scaler_y.transform(y_train)

        regressor.fit(x_train_scaled, y_train_scaled.ravel())

        y_pred = regressor.predict(x_validation_scaled).reshape(-1, 1)
        y_pred = scaler_y.inverse_transform(y_pred)

        scores.append(((y_pred - y_validation) ** 2).mean() * 1e-12)

    return np.mean(scores)


# Optuna study
model_key = 'random forest'
studies[model_key] = {}
for key in names:
    trial_history = set()
    print(f"Study with \"{key}\":", flush=True)
    studies[model_key][key] = optuna.create_study(study_name=f"{model_key} {key}",
                                                              direction='minimize')
    studies[model_key][key].optimize(lambda trial: objective(trial, key), n_jobs=1, n_trials=200,
                                                 timeout=None)
    print('')
del trial_history

Study with "beam":


[I 2025-01-14 13:49:07,472] A new study created in memory with name: random forest beam
[I 2025-01-14 13:49:08,516] Trial 0 finished with value: 117263.7982349594 and parameters: {'n_estimators': 42, 'max_depth': 15, 'min_samples_split': 0.00013092996057956493, 'min_samples_leaf': 0.00010001680952204811, 'min_weight_fraction_leaf': 0.10255421372959478, 'max_features': 'sqrt'}. Best is trial 0 with value: 117263.7982349594.
[I 2025-01-14 13:49:09,255] Trial 1 finished with value: 131361.82871883843 and parameters: {'n_estimators': 41, 'max_depth': 6, 'min_samples_split': 0.0003919124889528594, 'min_samples_leaf': 0.05844891617079429, 'min_weight_fraction_leaf': 0.19943270073396852, 'max_features': 'sqrt'}. Best is trial 0 with value: 117263.7982349594.
[I 2025-01-14 13:49:09,547] Trial 2 finished with value: 138118.71674984242 and parameters: {'n_estimators': 8, 'max_depth': 1, 'min_samples_split': 0.00024915398150206864, 'min_samples_leaf': 4.014560738877507e-05, 'min_weight_fraction_l


Study with "cantilever":


[I 2025-01-14 13:51:41,421] A new study created in memory with name: random forest cantilever
[I 2025-01-14 13:51:41,810] Trial 0 finished with value: 549209.8617739382 and parameters: {'n_estimators': 30, 'max_depth': 67, 'min_samples_split': 0.0036703685139379922, 'min_samples_leaf': 0.0010373126680153718, 'min_weight_fraction_leaf': 0.4033486108960421, 'max_features': 'sqrt'}. Best is trial 0 with value: 549209.8617739382.
[I 2025-01-14 13:51:42,315] Trial 1 finished with value: 391298.82507153536 and parameters: {'n_estimators': 23, 'max_depth': 36, 'min_samples_split': 0.045368771008539886, 'min_samples_leaf': 0.08899283696570033, 'min_weight_fraction_leaf': 0.15738230007608894, 'max_features': 'sqrt'}. Best is trial 1 with value: 391298.82507153536.
[I 2025-01-14 13:51:42,721] Trial 2 finished with value: 563337.3456658742 and parameters: {'n_estimators': 45, 'max_depth': 35, 'min_samples_split': 0.36482449445940357, 'min_samples_leaf': 0.00033337508384208644, 'min_weight_fractio


Study with "triangle":


[I 2025-01-14 13:53:43,215] A new study created in memory with name: random forest triangle
[I 2025-01-14 13:53:43,445] Trial 0 finished with value: 608182.5495903163 and parameters: {'n_estimators': 36, 'max_depth': 10, 'min_samples_split': 0.7213525254963488, 'min_samples_leaf': 0.0005537092113021687, 'min_weight_fraction_leaf': 0.4231517975000525, 'max_features': 'log2'}. Best is trial 0 with value: 608182.5495903163.
[I 2025-01-14 13:53:43,942] Trial 1 finished with value: 397438.0997136886 and parameters: {'n_estimators': 50, 'max_depth': 100, 'min_samples_split': 0.00011348205473477882, 'min_samples_leaf': 2.4082351585664063e-05, 'min_weight_fraction_leaf': 0.16002163549163667, 'max_features': 'sqrt'}. Best is trial 1 with value: 397438.0997136886.
[I 2025-01-14 13:53:44,211] Trial 2 finished with value: 608181.9617360623 and parameters: {'n_estimators': 49, 'max_depth': 98, 'min_samples_split': 0.7843400326181905, 'min_samples_leaf': 0.0019140843645442413, 'min_weight_fraction_l




In [51]:
model_key = 'random forest'
compute_results_and_models(model_key, RandomForestRegressor)

  return fit_method(estimator, *args, **kwargs)


beam:
  Rank 1:
    Trial ID: 160
    Value: 19649.322836679286
    Params: {'n_estimators': 16, 'max_depth': 52, 'min_samples_split': 5.345888722815002e-05, 'min_samples_leaf': 9.331258339299405e-05, 'min_weight_fraction_leaf': 3.485519120469008e-05, 'max_features': 'sqrt'}

  Rank 2:
    Trial ID: 156
    Value: 19937.862783849556
    Params: {'n_estimators': 15, 'max_depth': 50, 'min_samples_split': 4.6109083781054734e-05, 'min_samples_leaf': 9.512942743846597e-05, 'min_weight_fraction_leaf': 5.516767113038917e-05, 'max_features': 'sqrt'}

  Rank 3:
    Trial ID: 146
    Value: 23465.414952601393
    Params: {'n_estimators': 16, 'max_depth': 48, 'min_samples_split': 0.00010351573512313833, 'min_samples_leaf': 0.0001993552342160317, 'min_weight_fraction_leaf': 0.00017642234262870012, 'max_features': 'sqrt'}

cantilever:
  Rank 1:
    Trial ID: 179
    Value: 54388.53847912075
    Params: {'n_estimators': 13, 'max_depth': 97, 'min_samples_split': 0.00012199442761992017, 'min_samples_l

  return fit_method(estimator, *args, **kwargs)


triangle:
  Rank 1:
    Trial ID: 164
    Value: 27437.765293107008
    Params: {'n_estimators': 23, 'max_depth': 21, 'min_samples_split': 0.00017374499114546608, 'min_samples_leaf': 4.477695044374399e-05, 'min_weight_fraction_leaf': 1.7173009103979824e-06, 'max_features': 'log2'}

  Rank 2:
    Trial ID: 180
    Value: 29658.160535880772
    Params: {'n_estimators': 40, 'max_depth': 25, 'min_samples_split': 0.00011368841948007307, 'min_samples_leaf': 8.063325483946518e-05, 'min_weight_fraction_leaf': 0.00019734804515797626, 'max_features': 'sqrt'}

  Rank 3:
    Trial ID: 186
    Value: 29760.358498567617
    Params: {'n_estimators': 41, 'max_depth': 26, 'min_samples_split': 0.00015164940576537807, 'min_samples_leaf': 6.646263594651405e-05, 'min_weight_fraction_leaf': 0.00019488458004443245, 'max_features': 'sqrt'}



  return fit_method(estimator, *args, **kwargs)


In [52]:
model_key = 'random forest'
for key in best_models[model_key].keys():
            eval_model(model_key, key, scale=True)

Validation score "beam":
	MAER: 3.9976 %
	MAE: 1.9687 MN
	MSE: 19236.3528 MN^2

Validation score "cantilever":
	MAER: 8.4507 %
	MAE: 1.2627 MN
	MSE: 41450.4004 MN^2

Validation score "triangle":
	MAER: 3.9966 %
	MAE: 3.6668 MN
	MSE: 22083.5147 MN^2



In [60]:
save = False
if save:
    with open('experiments/01-single_ea/1-models.pickle', 'wb') as handle:
        pickle.dump(best_models, handle, protocol=pickle.HIGHEST_PROTOCOL)

    with open('experiments/01-single_ea/1-test_scores.pickle', 'wb') as handle:
        pickle.dump(models_scores, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [56]:
load = False
if load:
    with open('experiments/01-single_ea/model.pickle', 'rb') as handle:
        best_models = pickle.load(handle)

    with open('experiments/01-single_ea/test_metrics.pickle', 'rb') as handle:
        models_scores = pickle.load(handle)