In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from torch.utils.data import DataLoader, TensorDataset
from xgboost import XGBRegressor
from tensorflow import keras
from scipy.interpolate import interpn
from tqdm import tqdm
from joblib import dump, load
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import tensorflow as tf

In [14]:
from load_data import process_steel_data

full_path = 'data/'
path = 'data/MDC_Data_Descriptions_MeCoMeP-r-value.xlsx'
correlation_rate = 0.2
dvl_line = 1

df = process_steel_data(full_path, path, correlation_rate, dvl_line, model_output=True)
df = pd.get_dummies(df, columns=['steel_family'], prefix='steel').drop(['steel_grade'], axis=1)

Dropped 25 columns


  warn(msg)


In [15]:
def train_model_with_cv_gridsearch(df, model, param_grid=None, n_splits=5, random_state=42, use_grid_search=True, model_params=None):
    """
    Train a model with optional grid search and cross-validation
    
    Parameters:
    -----------
    df : pandas.DataFrame
        Input dataframe
    model : estimator object
        Machine learning model to train
    param_grid : dict, optional
        Parameter grid for grid search (used if use_grid_search=True)
    n_splits : int, optional
        Number of cross-validation splits (default: 5)
    random_state : int, optional
        Random state for reproducibility (default: 42)
    use_grid_search : bool, optional
        Whether to perform grid search (default: True)
    model_params : dict, optional
        Direct model parameters to use if use_grid_search=False
    
    Returns:
    --------
    dict containing model results and performance metrics including tol90
    """
    # Prepare X and y
    X = df.drop(['r_value'], axis=1)
    y = df['r_value']
    
    # Initialize cross-validation
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    # Initialize metrics storage
    cv_scores = {
        'mae': [],
        'mse': [],
        'r2': [],
        'tol90': []  # Add tol90 metric
    }
    
    # Determine model parameters
    if use_grid_search:
        if param_grid is None:
            raise ValueError("param_grid must be provided when use_grid_search is True")
        
        # Initialize GridSearchCV
        grid_search = GridSearchCV(
            estimator=model,
            param_grid=param_grid,
            cv=n_splits,
            scoring='neg_mean_absolute_error',
            n_jobs=-1,
            verbose=0
        )
        
        # Fit GridSearchCV
        print("Performing GridSearch...")
        grid_search.fit(X, y)
        print(f"\nBest parameters: {grid_search.best_params_}")
        best_model = grid_search.best_estimator_
    else:
        # Use directly specified parameters or default model
        if model_params:
            best_model = type(model)(**model_params)
        else:
            best_model = model
        
        grid_search = None
    
    # Perform cross-validation
    print("\nPerforming cross-validation...")
    pbar = tqdm(enumerate(kf.split(X), 1),
                total=n_splits,
                desc="Cross-validation",
                leave=True)
    
    for fold, (train_idx, val_idx) in pbar:
        # Split data
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        # Train model
        best_model.fit(X_train, y_train)
        
        # Make predictions
        y_pred = best_model.predict(X_val)
        
        # Calculate metrics
        mae = mean_absolute_error(y_val, y_pred)
        mse = mean_squared_error(y_val, y_pred)
        r2 = r2_score(y_val, y_pred)
        
        # Calculate tol90 (90th percentile of absolute errors)
        abs_errors = np.abs(y_val - y_pred)
        tol90 = np.percentile(abs_errors, 90)
        
        cv_scores['mae'].append(mae)
        cv_scores['mse'].append(mse)
        cv_scores['r2'].append(r2)
        cv_scores['tol90'].append(tol90)
        
        # Update progress bar description
        pbar.set_description(
            f"Fold {fold} - MAE: {mae:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}, TOL90: {tol90:.4f}"
        )
    
    # Prepare results
    results = {
        'model': best_model,
        'best_params': grid_search.best_params_ if use_grid_search else model_params or {},
        'avg_mae': np.mean(cv_scores['mae']),
        'std_mae': np.std(cv_scores['mae']),
        'avg_mse': np.mean(cv_scores['mse']),
        'std_mse': np.std(cv_scores['mse']),
        'avg_r2': np.mean(cv_scores['r2']),
        'std_r2': np.std(cv_scores['r2']),
        'avg_tol90': np.mean(cv_scores['tol90']),  # Add average tol90
        'std_tol90': np.std(cv_scores['tol90']),   # Add std of tol90
        'cv_scores': cv_scores,
        'grid_search_results': grid_search.cv_results_ if use_grid_search else None
    }
    
    return results

def report_cv_results(results):
    print("\nCross-Validation Results:")
    print("-" * 50)
    print(f"Best Parameters: {results['best_params']}")
    print(f"Average MAE: {results['avg_mae']:.4f} ± {results['std_mae']:.4f}")
    print(f"Average MSE: {results['avg_mse']:.4f} ± {results['std_mse']:.4f}")
    print(f"Average R2: {results['avg_r2']:.4f} ± {results['std_r2']:.4f}")
    print(f"Average TOL90: {results['avg_tol90']:.4f} ± {results['std_tol90']:.4f}")

In [16]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_scaled_df, scaler = scale_data(train_df)
binary_columns = [col for col in test_df.columns if col.startswith('steel_')]
columns_to_scale = [col for col in test_df.columns if col not in binary_columns + ['r_value']]
scaled_test_data = scaler.transform(test_df[columns_to_scale])
test_scaled_df = pd.DataFrame(scaled_test_data, columns=columns_to_scale)
for col in binary_columns:
    test_scaled_df[col] = test_df[col].values
if 'r_value' in test_df.columns:
    test_scaled_df['r_value'] = test_df['r_value'].values

In [17]:
rfr = RandomForestRegressor(random_state=42)
rfr_param_grid = {
    'n_estimators': [350]
}

rfr_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=rfr,
    param_grid=rfr_param_grid,
    n_splits=5
)

Performing GridSearch...

Best parameters: {'n_estimators': 350}

Performing cross-validation...


Fold 5 - MAE: 0.0860, MSE: 0.0145, R2: 0.9573, TOL90: 0.1982: 100%|██████████| 5/5 [26:46<00:00, 321.38s/it]


In [18]:
xgb_model = XGBRegressor(random_state=42)

xgb_param_grid = {
    'eta': [0.01, 0.05, 0.1, 0.2, 0.3, 0.4],
    'lambda': [0, 0.01, 0.1, 1, 10, 50],
    'max_depth': [3, 4, 5, 6, 7, 8]
}

xgb_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=xgb_model,
    param_grid=xgb_param_grid,
    n_splits=5
)

Performing GridSearch...

Best parameters: {'eta': 0.1, 'lambda': 0.1, 'max_depth': 8}

Performing cross-validation...


Fold 5 - MAE: 0.0873, MSE: 0.0146, R2: 0.9571, TOL90: 0.1984: 100%|██████████| 5/5 [00:05<00:00,  1.09s/it]


In [19]:
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor

results_without_grid = train_model_with_cv_gridsearch(
    df=train_scaled_df, 
    model=GaussianProcessRegressor(), 
    use_grid_search=False,
    model_params={'kernel': 1**2 * Matern(length_scale=1, nu=1.5) + WhiteKernel(noise_level=1)}
)


Performing cross-validation...


Fold 5 - MAE: 0.0940, MSE: 0.0163, R2: 0.9520, TOL90: 0.2071: 100%|██████████| 5/5 [56:41<00:00, 680.34s/it]


In [20]:
from sklearn.neighbors import KNeighborsRegressor

knn_model = KNeighborsRegressor()

knn_param_grid = {
    'n_neighbors': list(range(2, 15)),
    'leaf_size': [20, 30, 40, 50],
    'weights': ['uniform', 'distance']
}

knn_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=knn_model,
    param_grid=knn_param_grid
)

Performing GridSearch...

Best parameters: {'leaf_size': 20, 'n_neighbors': 10, 'weights': 'distance'}

Performing cross-validation...


Fold 5 - MAE: 0.1056, MSE: 0.0219, R2: 0.9356, TOL90: 0.2345: 100%|██████████| 5/5 [00:00<00:00,  5.85it/s]


In [21]:
def cv_saved_model_architecture(saved_model_path, df, target_column='r_value', n_splits=5, 
                               epochs=100, batch_size=32, random_state=42):
    """
    Perform cross-validation using architecture and parameters from a saved model
    
    Parameters:
    -----------
    saved_model_path : str
        Path to saved .h5 model file
    df : pandas DataFrame
        Input data
    target_column : str
        Name of target column
    n_splits : int
        Number of CV folds
    epochs : int
        Number of training epochs
    batch_size : int
        Batch size for training
    random_state : int
        Random seed for reproducibility
        
    Returns:
    --------
    dict containing CV metrics
    """
    # Load saved model to get architecture and parameters
    base_model = tf.keras.models.load_model(saved_model_path)
    
            # Get learning rate from saved model and convert to Python float
    learning_rate = float(base_model.optimizer.learning_rate.numpy())
    
    # Prepare data
    X = df.drop([target_column], axis=1)
    y = df[target_column]
    
    # Initialize KFold
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    # Storage for CV metrics
    cv_scores = {
        'mae': [],
        'mse': [],
        'r2': []
    }
    
    # Perform cross-validation
    print("\nPerforming cross-validation...")
    for fold, (train_idx, val_idx) in enumerate(kf.split(X), 1):
        print(f"\nFold {fold}/{n_splits}")
        
        # Clear previous model from memory
        tf.keras.backend.clear_session()
        
        # Split data
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        # Convert to float32
        X_train = np.array(X_train, dtype=np.float32)
        y_train = np.array(y_train, dtype=np.float32)
        X_val = np.array(X_val, dtype=np.float32)
        y_val = np.array(y_val, dtype=np.float32)
        
        # Create new model with same architecture
        model = tf.keras.models.clone_model(base_model)
        
        # Compile with same optimizer type and learning rate
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        model.compile(
            optimizer=optimizer,
            loss='mean_absolute_error',
            metrics=['mae', 'mse']
        )
        
        # Early stopping
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            min_delta=1e-4
        )
        
        # Create TF datasets
        train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
        train_dataset = (train_dataset
            .batch(batch_size, drop_remainder=True)
            .repeat())
        
        val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
        val_dataset = val_dataset.batch(batch_size)
        
        steps_per_epoch = len(X_train) // batch_size
        
        # Train model
        history = model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            callbacks=[early_stopping],
            verbose=1
        )
        
        # Make predictions
        y_pred = model.predict(X_val, verbose=0)
        
        # Calculate metrics
        mae = mean_absolute_error(y_val, y_pred)
        mse = mean_squared_error(y_val, y_pred)
        r2 = r2_score(y_val, y_pred)
        
        cv_scores['mae'].append(mae)
        cv_scores['mse'].append(mse)
        cv_scores['r2'].append(r2)
        
        print(f"Fold {fold} - MAE: {mae:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}")
    
    # Calculate average metrics
    results = {
        'avg_mae': np.mean(cv_scores['mae']),
        'std_mae': np.std(cv_scores['mae']),
        'avg_mse': np.mean(cv_scores['mse']),
        'std_mse': np.std(cv_scores['mse']),
        'avg_r2': np.mean(cv_scores['r2']),
        'std_r2': np.std(cv_scores['r2']),
        'cv_scores': cv_scores
    }
    
    return results

In [22]:
class SteelPropertiesANN:
    def __init__(self, input_dim, target_column):
        self.input_dim = input_dim
        self.target_column = target_column
        self.best_model = None
        self.best_params = None
        self.best_score = float('inf')

    def build_model(self, config):
        hidden_layers = config['layers']
        learning_rate = config['learning_rate']
        l2_strength = config['l2_regularization']
        
        model = keras.Sequential()
        model.add(keras.layers.Input(shape=(self.input_dim,)))
        
        for units, activation in hidden_layers:
            model.add(keras.layers.Dense(
                units=units,
                activation=activation,
                kernel_regularizer=keras.regularizers.l2(l2_strength)
            ))
        
        model.add(keras.layers.Dense(1))
        
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=learning_rate,
            decay_steps=100,
            decay_rate=0.9,
            staircase=True
        )
        
        model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=1.0),
            loss='mean_absolute_error',
            metrics=['mae', 'mse']
        )
        return model

    def generate_grid_configs(self, 
        layer_options=[(64, 'relu'), (128, 'relu'), (256, 'relu')],
        layer_depths=[2, 3, 4],
        learning_rates=[1e-2, 1e-3, 1e-4],
        l2_regularization=[1e-3, 1e-4, 1e-5],
        batch_sizes=[16, 32, 64]
    ):
        from itertools import product
        grid_configs = []
        
        for depth in layer_depths:
            for lr in learning_rates:
                for l2_reg in l2_regularization:
                    for batch_size in batch_sizes:
                        layer_combinations = list(product(layer_options, repeat=depth))
                        for layers in layer_combinations:
                            config = {
                                'layers': layers,
                                'learning_rate': lr,
                                'l2_regularization': l2_reg,
                                'batch_size': batch_size
                            }
                            grid_configs.append(config)
        
        return grid_configs

    def grid_search(self, train_scaled_df, grid_configs=None, epochs=100, max_configs=None):
        # Split training data into training and validation sets
        train_data, val_data = train_test_split(train_scaled_df, test_size=0.2, random_state=42)
    
        if grid_configs is None:
            grid_configs = self.generate_grid_configs()
        
        X_train = train_data.drop([self.target_column], axis=1)
        y_train = train_data[self.target_column]
        X_val = val_data.drop([self.target_column], axis=1)
        y_val = val_data[self.target_column]
        
        if max_configs:
            grid_configs = grid_configs[:max_configs]
        
        results = []
        for config in tqdm(grid_configs, desc="Training models"):
            tf.keras.backend.clear_session()
            model = self.build_model(config)
            batch_size = min(config['batch_size'], len(X_train))
            
            early_stopping = keras.callbacks.EarlyStopping(
                monitor='val_loss', 
                patience=10, 
                restore_best_weights=True,
                min_delta=1e-4
            )
            
            try:
                X_train = np.array(X_train, dtype=np.float32)
                y_train = np.array(y_train, dtype=np.float32)
                X_val = np.array(X_val, dtype=np.float32)
                y_val = np.array(y_val, dtype=np.float32)

                train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
                train_dataset = (train_dataset
                    .batch(batch_size, drop_remainder=True)
                    .repeat())
                
                val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
                val_dataset = val_dataset.batch(batch_size)
                
                steps_per_epoch = len(X_train) // batch_size
                
                history = model.fit(
                    train_dataset,
                    validation_data=val_dataset,
                    epochs=epochs,
                    steps_per_epoch=steps_per_epoch,
                    callbacks=[early_stopping],
                    verbose=0
                )
                
                val_loss = model.evaluate(X_val, y_val, verbose=0)[0]
                
                result_entry = config.copy()
                result_entry.update({'val_loss': val_loss})
                results.append(result_entry)
                
                if val_loss < self.best_score:
                    self.best_score = val_loss
                    self.best_model = model
                    self.best_params = config
                
            except Exception as e:
                print(f"Error with config {config}: {str(e)}")
                continue
        
        return self.best_model, self.best_params

In [23]:
cv_results = cv_saved_model_architecture(
    saved_model_path='model_mecomep2.h5',
    df=train_scaled_df,
    target_column='r_value',
    n_splits=5,
    epochs=100,
    batch_size=32
)

# Print CV results
print("\nCross-Validation Results:")
print("-" * 50)
print(f"Average MAE: {cv_results['avg_mae']:.4f} ± {cv_results['std_mae']:.4f}")
print(f"Average MSE: {cv_results['avg_mse']:.4f} ± {cv_results['std_mse']:.4f}")
print(f"Average R2: {cv_results['avg_r2']:.4f} ± {cv_results['std_r2']:.4f}")


Performing cross-validation...

Fold 1/5
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Fold 1 - MAE: 0.0945, MSE: 0.0173, R2: 0.9478

Fold 2/5
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/1

In [24]:
from sklearn.svm import SVR

svr_param_grid = {
    'C': [0.01, 0.1, 1, 10, 100, 1000],
    'epsilon': [0.001, 0.01, 0.1, 0.5, 1, 2]
}

svr_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=SVR(),
    param_grid=svr_param_grid
)

Performing GridSearch...

Best parameters: {'C': 1, 'epsilon': 0.01}

Performing cross-validation...


Fold 5 - MAE: 0.0956, MSE: 0.0176, R2: 0.9482, TOL90: 0.2191: 100%|██████████| 5/5 [00:44<00:00,  8.94s/it]


In [25]:
from sklearn.linear_model import Ridge

ridge_param_grid = {
    'alpha': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
}

ridge_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=Ridge(random_state=42),
    param_grid=ridge_param_grid
)

Performing GridSearch...

Best parameters: {'alpha': 0.1}

Performing cross-validation...


Fold 5 - MAE: 0.1208, MSE: 0.0250, R2: 0.9264, TOL90: 0.2566: 100%|██████████| 5/5 [00:00<00:00, 45.18it/s]


In [26]:
def load_and_evaluate_model(data_df, features_dict, model_path, n_splits=5, batch_size=32):
    """
    Load saved model and perform cross validation
    
    Args:
        data_df: Pandas DataFrame containing the data
        features_dict: Dictionary of features by category
        model_path: Path to saved model file
        n_splits: Number of CV folds
        batch_size: Batch size for evaluation
    
    Returns:
        Dictionary of metrics averaged across folds, including tol90
    """
    # Load the saved model
    model = torch.load(model_path)
    model.eval()
    
    # Initialize feature arrays and dimensions
    feature_arrays = {}
    feature_dims = {}
    
    # Process each feature category
    for category in ['chemical', 'time', 'process', 'model']:
        available_features = [col for col in features_dict[category] 
                            if col in data_df.columns]
        
        if available_features:
            feature_arrays[category] = data_df[available_features].values.astype(np.float32)
            feature_dims[category] = len(available_features)
        else:
            feature_arrays[category] = np.zeros((len(data_df), 0), dtype=np.float32)
            feature_dims[category] = 0
    
    # Prepare targets
    targets = data_df['r_value'].values
    
    # Initialize KFold
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    # Lists to store metrics for each fold
    fold_metrics = {
        'mae': [],
        'mse': [],
        'rmse': [],
        'r2': [],
        'tol90': []  # Add tol90 to metrics
    }
    
    # Cross validation loop
    for fold, (train_idx, val_idx) in enumerate(kfold.split(targets)):
        # Prepare validation tensors for this fold
        val_tensors = {
            'chemical': torch.FloatTensor(feature_arrays['chemical'][val_idx]),
            'time': torch.FloatTensor(feature_arrays['time'][val_idx]),
            'process': torch.FloatTensor(feature_arrays['process'][val_idx]),
            'model': torch.FloatTensor(feature_arrays['model'][val_idx])
        }
        
        val_targets = torch.FloatTensor(targets[val_idx])
        
        # Create validation DataLoader
        val_dataset = TensorDataset(
            val_tensors['chemical'],
            val_tensors['time'],
            val_tensors['process'],
            val_tensors['model'],
            val_targets
        )
        val_loader = DataLoader(val_dataset, batch_size=batch_size)
        
        # Evaluation for this fold
        predictions = []
        actuals = []
        
        with torch.no_grad():
            for batch_chem, batch_time, batch_proc, batch_model, batch_targets in val_loader:
                outputs = model(batch_chem, batch_time, batch_proc, batch_model)
                predictions.extend(outputs.numpy().flatten())
                actuals.extend(batch_targets.numpy().flatten())
        
        # Calculate metrics for this fold
        predictions = np.array(predictions)
        actuals = np.array(actuals)
        
        mae = mean_absolute_error(actuals, predictions)
        mse = mean_squared_error(actuals, predictions)
        rmse = np.sqrt(mse)
        r2 = r2_score(actuals, predictions)
        
        # Calculate tol90 (90th percentile of absolute errors)
        abs_errors = np.abs(actuals - predictions)
        tol90 = np.percentile(abs_errors, 90)
        
        fold_metrics['mae'].append(mae)
        fold_metrics['mse'].append(mse)
        fold_metrics['rmse'].append(rmse)
        fold_metrics['r2'].append(r2)
        fold_metrics['tol90'].append(tol90)
        
        print(f"Fold {fold+1} - MAE: {mae:.4f}, MSE: {mse:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}, TOL90: {tol90:.4f}")
    
    # Calculate and return average metrics
    avg_metrics = {
        'mae': np.mean(fold_metrics['mae']),
        'mae_std': np.std(fold_metrics['mae']),
        'mse': np.mean(fold_metrics['mse']),
        'mse_std': np.std(fold_metrics['mse']), 
        'rmse': np.mean(fold_metrics['rmse']),
        'rmse_std': np.std(fold_metrics['rmse']),
        'r2': np.mean(fold_metrics['r2']),
        'r2_std': np.std(fold_metrics['r2']),
        'tol90': np.mean(fold_metrics['tol90']),  # Add average tol90
        'tol90_std': np.std(fold_metrics['tol90'])  # Add tol90 standard deviation
    }
    
    print("\nAverage Metrics across folds:")
    print(f"MAE: {avg_metrics['mae']:.4f} ± {avg_metrics['mae_std']:.4f}")
    print(f"MSE: {avg_metrics['mse']:.4f} ± {avg_metrics['mse_std']:.4f}")
    print(f"RMSE: {avg_metrics['rmse']:.4f} ± {avg_metrics['rmse_std']:.4f}")
    print(f"R2: {avg_metrics['r2']:.4f} ± {avg_metrics['r2_std']:.4f}")
    print(f"TOL90: {avg_metrics['tol90']:.4f} ± {avg_metrics['tol90_std']:.4f}")
    
    return avg_metrics

In [27]:
# labeling the features for each branch
features = [col for col in df.columns if col not in ['r_value', 'steel_family', 'steel_grade']]
features_dict = {
   'time': [col for col in features if 'time' in col.lower()], 
   'chemical': ['pct_al', 'pct_b', 'pct_c', 'pct_cr', 'pct_mn', 'pct_n', 'pct_nb', 'pct_si', 'pct_ti', 'pct_v', 'mfia_coil_frac_fer', 'mfia_et1_frac_fer', 'mfia_et2_frac_fer'],
   'model': ["rm", "ag", "a80", "n_value"]
}
features_dict['process'] = [col for col in features if col not in features_dict['time'] and col not in features_dict['chemical']]

In [28]:
class MultiBranchSteelRegressor(nn.Module):
    def __init__(self, chemical_dim, time_dim, process_dim, model_dim, hidden_units=64, dropout_rate=0.2):
        super().__init__()
        # Track which branches are active
        self.has_chemical = chemical_dim > 0
        self.has_time = time_dim > 0
        self.has_process = process_dim > 0
        self.has_model = model_dim > 0
        
        # Count active branches
        self.active_branches = sum([self.has_chemical, self.has_time, self.has_process, self.has_model])
        
        # Adjust hidden units for each branch
        self.branch_hidden = min(hidden_units, max(16, hidden_units // 2))
        
        # Creating branch
        def create_branch(input_dim):
            return nn.Sequential(
                nn.Linear(input_dim, self.branch_hidden),
                nn.BatchNorm1d(self.branch_hidden),
                nn.ReLU(),
                nn.Dropout(dropout_rate)
            )
        
        # Only create branches that have features
        if self.has_chemical:
            self.chemical_branch = create_branch(chemical_dim)
        if self.has_time:
            self.time_branch = create_branch(time_dim)
        if self.has_process:
            self.process_branch = create_branch(process_dim)
        if self.has_model:
            self.model_branch = create_branch(model_dim)
        
        # Combined input dimension based on active branches only
        combined_dim = self.branch_hidden * self.active_branches
        
        # Final layers after concatenation
        self.final_layers = nn.Sequential(
            nn.Linear(combined_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 1)
        )
    
    def forward(self, chemical, time, process, model):
        features = []
        # Only process branches that have features
        if self.has_chemical:
            if chemical.dim() == 1:
                chemical = chemical.unsqueeze(0)
            features.append(self.chemical_branch(chemical))
        
        if self.has_time:
            if time.dim() == 1:
                time = time.unsqueeze(0)
            features.append(self.time_branch(time))
        
        if self.has_process:
            if process.dim() == 1:
                process = process.unsqueeze(0)
            features.append(self.process_branch(process))
        
        if self.has_model:
            if model.dim() == 1:
                model = model.unsqueeze(0)
            features.append(self.model_branch(model))
        
        # Concatenate only active features
        combined = torch.cat(features, dim=1) if len(features) > 1 else features[0]
        return self.final_layers(combined)

In [29]:
def train_model_regular(df, features_dict, num_epochs, hyperparameters, use_l2=False):
    batch_size = hyperparameters['batch_size']
    
    # Initialize feature arrays and dimensions
    feature_arrays = {}
    feature_dims = {}
    
    # Process each feature category
    for category in ['chemical', 'time', 'process', 'model']:
        available_features = [col for col in features_dict[category] 
                            if col in df.columns]
        
        if available_features:
            feature_arrays[category] = df[available_features].values.astype(np.float32)
            feature_dims[category] = len(available_features)
        else:
            feature_arrays[category] = np.zeros((len(df), 0), dtype=np.float32)
            feature_dims[category] = 0
    
    # Prepare targets
    targets = df['r_value'].values
    
    # Split data
    split_data = train_test_split(
        feature_arrays['chemical'],
        feature_arrays['time'],
        feature_arrays['process'],
        feature_arrays['model'],
        targets,
        test_size=0.2,
        random_state=42
    )
    
    (X_train_chem, X_test_chem, X_train_time, X_test_time, 
     X_train_proc, X_test_proc, X_train_model, X_test_model, 
     y_train, y_test) = split_data
    
    # Convert to tensors
    train_tensors = {
        'chemical': torch.FloatTensor(X_train_chem),
        'time': torch.FloatTensor(X_train_time),
        'process': torch.FloatTensor(X_train_proc),
        'model': torch.FloatTensor(X_train_model)
    }
    
    test_tensors = {
        'chemical': torch.FloatTensor(X_test_chem),
        'time': torch.FloatTensor(X_test_time),
        'process': torch.FloatTensor(X_test_proc),
        'model': torch.FloatTensor(X_test_model)
    }
    
    y_train_tensor = torch.FloatTensor(y_train)
    y_test_tensor = torch.FloatTensor(y_test)
    
    # Create DataLoader
    train_dataset = TensorDataset(
        train_tensors['chemical'],
        train_tensors['time'],
        train_tensors['process'],
        train_tensors['model'],
        y_train_tensor
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    # Initialize model
    model = MultiBranchSteelRegressor(
        chemical_dim=feature_dims['chemical'],
        time_dim=feature_dims['time'],
        process_dim=feature_dims['process'],
        model_dim=feature_dims['model'],
        hidden_units=hyperparameters['hidden_units'],
        dropout_rate=hyperparameters['dropout_rate']
    )
    
    if use_l2:
        weight_decay = 0.001
    else:
        weight_decay = 0.0
    
    optimizer = torch.optim.AdamW(model.parameters(), weight_decay=weight_decay)
    criterion = nn.L1Loss()
    
    # Training loop
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_chem, batch_time, batch_proc, batch_model, batch_targets in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_chem, batch_time, batch_proc, batch_model)
            loss = criterion(outputs, batch_targets.unsqueeze(1))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")
    
    # Evaluation
    model.eval()
    with torch.no_grad():
        y_pred = model(
            test_tensors['chemical'],
            test_tensors['time'],
            test_tensors['process'],
            test_tensors['model']
        )
        test_loss = criterion(y_pred, y_test_tensor.unsqueeze(1)).item()
        y_pred_np = y_pred.numpy().flatten()
        r2 = r2_score(y_test, y_pred_np)
        mae = mean_absolute_error(y_test, y_pred_np)
        mse = mean_squared_error(y_test, y_pred_np)
        
        metrics = {
            'r2_score': r2,
            'mae': mae,
            'mse': mse,
            'test_loss': test_loss
        }
        print(f"Evaluation - Test Loss: {test_loss:.4f}, R2: {r2:.4f}")
    
    return model, metrics

In [30]:
from sklearn.model_selection import ParameterGrid

param_grid = {
    'learning_rate': [0.1, 0.01, 1e-3],
    'batch_size': [16, 32, 64],
    'hidden_units': [64, 128, 256],
    'dropout_rate': [0, 0.2]
}
grid = ParameterGrid(param_grid)

In [31]:
num_epochs = 100
best_params = None
best_results = {'mae': float('inf')}

for params in tqdm(grid, desc="Grid Search Progress", leave=True):
    print(f"Evaluating hyperparameters: {params}")
    
    model, metrics = train_model_regular(train_scaled_df, features_dict, num_epochs, params)
    mae = metrics['mae']
    
    if best_params is None or mae < best_results['mae']:
        best_results = {
            'mae': mae,
            'metrics': metrics
        }
        best_params = params

print(f"Best parameters found: {best_params}")
print(f"Best MAE: {best_results['mae']:.4f}")


Grid Search Progress:   0%|          | 0/54 [00:00<?, ?it/s]

Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1149
Epoch [20/100], Loss: 0.1068
Epoch [30/100], Loss: 0.0995
Epoch [40/100], Loss: 0.0983
Epoch [50/100], Loss: 0.0949
Epoch [60/100], Loss: 0.0948
Epoch [70/100], Loss: 0.0930
Epoch [80/100], Loss: 0.0909
Epoch [90/100], Loss: 0.0912


Grid Search Progress:   2%|▏         | 1/54 [03:36<3:11:39, 216.97s/it]

Epoch [100/100], Loss: 0.0891
Evaluation - Test Loss: 0.0910, R2: 0.9505
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1158
Epoch [20/100], Loss: 0.1055
Epoch [30/100], Loss: 0.0999
Epoch [40/100], Loss: 0.0975
Epoch [50/100], Loss: 0.0946
Epoch [60/100], Loss: 0.0931
Epoch [70/100], Loss: 0.0909
Epoch [80/100], Loss: 0.0920
Epoch [90/100], Loss: 0.0904


Grid Search Progress:   4%|▎         | 2/54 [06:50<2:56:07, 203.22s/it]

Epoch [100/100], Loss: 0.0887
Evaluation - Test Loss: 0.0890, R2: 0.9522
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1161
Epoch [20/100], Loss: 0.1073
Epoch [30/100], Loss: 0.0997
Epoch [40/100], Loss: 0.0970
Epoch [50/100], Loss: 0.0967
Epoch [60/100], Loss: 0.0938
Epoch [70/100], Loss: 0.0934
Epoch [80/100], Loss: 0.0911
Epoch [90/100], Loss: 0.0907


Grid Search Progress:   6%|▌         | 3/54 [10:03<2:48:52, 198.68s/it]

Epoch [100/100], Loss: 0.0887
Evaluation - Test Loss: 0.0897, R2: 0.9533
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1149
Epoch [20/100], Loss: 0.1036
Epoch [30/100], Loss: 0.1002
Epoch [40/100], Loss: 0.0965
Epoch [50/100], Loss: 0.0934
Epoch [60/100], Loss: 0.0924
Epoch [70/100], Loss: 0.0897
Epoch [80/100], Loss: 0.0901
Epoch [90/100], Loss: 0.0879


Grid Search Progress:   7%|▋         | 4/54 [13:22<2:45:25, 198.51s/it]

Epoch [100/100], Loss: 0.0864
Evaluation - Test Loss: 0.0904, R2: 0.9519
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1159
Epoch [20/100], Loss: 0.1044
Epoch [30/100], Loss: 0.1000
Epoch [40/100], Loss: 0.0962
Epoch [50/100], Loss: 0.0928
Epoch [60/100], Loss: 0.0917
Epoch [70/100], Loss: 0.0896
Epoch [80/100], Loss: 0.0897
Epoch [90/100], Loss: 0.0887


Grid Search Progress:   9%|▉         | 5/54 [16:57<2:47:00, 204.49s/it]

Epoch [100/100], Loss: 0.0866
Evaluation - Test Loss: 0.0923, R2: 0.9492
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1156
Epoch [20/100], Loss: 0.1045
Epoch [30/100], Loss: 0.1004
Epoch [40/100], Loss: 0.0959
Epoch [50/100], Loss: 0.0945
Epoch [60/100], Loss: 0.0917
Epoch [70/100], Loss: 0.0902
Epoch [80/100], Loss: 0.0899
Epoch [90/100], Loss: 0.0875


Grid Search Progress:  11%|█         | 6/54 [20:18<2:42:48, 203.52s/it]

Epoch [100/100], Loss: 0.0865
Evaluation - Test Loss: 0.0943, R2: 0.9473
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1164
Epoch [20/100], Loss: 0.1035
Epoch [30/100], Loss: 0.0985
Epoch [40/100], Loss: 0.0951
Epoch [50/100], Loss: 0.0930
Epoch [60/100], Loss: 0.0909
Epoch [70/100], Loss: 0.0895
Epoch [80/100], Loss: 0.0886
Epoch [90/100], Loss: 0.0863


Grid Search Progress:  13%|█▎        | 7/54 [23:51<2:41:44, 206.47s/it]

Epoch [100/100], Loss: 0.0860
Evaluation - Test Loss: 0.0898, R2: 0.9498
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1148
Epoch [20/100], Loss: 0.1047
Epoch [30/100], Loss: 0.0991
Epoch [40/100], Loss: 0.0960
Epoch [50/100], Loss: 0.0931
Epoch [60/100], Loss: 0.0912
Epoch [70/100], Loss: 0.0892
Epoch [80/100], Loss: 0.0897
Epoch [90/100], Loss: 0.0857


Grid Search Progress:  15%|█▍        | 8/54 [27:28<2:40:53, 209.86s/it]

Epoch [100/100], Loss: 0.0863
Evaluation - Test Loss: 0.0903, R2: 0.9513
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1139
Epoch [20/100], Loss: 0.1038
Epoch [30/100], Loss: 0.0977
Epoch [40/100], Loss: 0.0951
Epoch [50/100], Loss: 0.0923
Epoch [60/100], Loss: 0.0920
Epoch [70/100], Loss: 0.0888
Epoch [80/100], Loss: 0.0887
Epoch [90/100], Loss: 0.0848


Grid Search Progress:  17%|█▋        | 9/54 [31:04<2:38:50, 211.79s/it]

Epoch [100/100], Loss: 0.0846
Evaluation - Test Loss: 0.0904, R2: 0.9496
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1555
Epoch [20/100], Loss: 0.1427
Epoch [30/100], Loss: 0.1378
Epoch [40/100], Loss: 0.1350
Epoch [50/100], Loss: 0.1306
Epoch [60/100], Loss: 0.1288
Epoch [70/100], Loss: 0.1274
Epoch [80/100], Loss: 0.1261
Epoch [90/100], Loss: 0.1271


Grid Search Progress:  19%|█▊        | 10/54 [34:35<2:35:13, 211.66s/it]

Epoch [100/100], Loss: 0.1239
Evaluation - Test Loss: 0.1087, R2: 0.9356
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1559
Epoch [20/100], Loss: 0.1443
Epoch [30/100], Loss: 0.1367
Epoch [40/100], Loss: 0.1364
Epoch [50/100], Loss: 0.1343
Epoch [60/100], Loss: 0.1303
Epoch [70/100], Loss: 0.1293
Epoch [80/100], Loss: 0.1301
Epoch [90/100], Loss: 0.1292


Grid Search Progress:  20%|██        | 11/54 [38:00<2:30:05, 209.42s/it]

Epoch [100/100], Loss: 0.1253
Evaluation - Test Loss: 0.1119, R2: 0.9306
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1557
Epoch [20/100], Loss: 0.1440
Epoch [30/100], Loss: 0.1380
Epoch [40/100], Loss: 0.1349
Epoch [50/100], Loss: 0.1334
Epoch [60/100], Loss: 0.1310
Epoch [70/100], Loss: 0.1299
Epoch [80/100], Loss: 0.1279
Epoch [90/100], Loss: 0.1265


Grid Search Progress:  22%|██▏       | 12/54 [41:22<2:25:06, 207.30s/it]

Epoch [100/100], Loss: 0.1277
Evaluation - Test Loss: 0.1153, R2: 0.9281
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1544
Epoch [20/100], Loss: 0.1400
Epoch [30/100], Loss: 0.1362
Epoch [40/100], Loss: 0.1314
Epoch [50/100], Loss: 0.1311
Epoch [60/100], Loss: 0.1296
Epoch [70/100], Loss: 0.1267
Epoch [80/100], Loss: 0.1255
Epoch [90/100], Loss: 0.1243


Grid Search Progress:  24%|██▍       | 13/54 [44:52<2:22:08, 208.02s/it]

Epoch [100/100], Loss: 0.1242
Evaluation - Test Loss: 0.0948, R2: 0.9487
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1531
Epoch [20/100], Loss: 0.1394
Epoch [30/100], Loss: 0.1326
Epoch [40/100], Loss: 0.1318
Epoch [50/100], Loss: 0.1289
Epoch [60/100], Loss: 0.1271
Epoch [70/100], Loss: 0.1267
Epoch [80/100], Loss: 0.1239
Epoch [90/100], Loss: 0.1242


Grid Search Progress:  26%|██▌       | 14/54 [48:22<2:19:05, 208.63s/it]

Epoch [100/100], Loss: 0.1226
Evaluation - Test Loss: 0.1021, R2: 0.9418
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1553
Epoch [20/100], Loss: 0.1390
Epoch [30/100], Loss: 0.1357
Epoch [40/100], Loss: 0.1316
Epoch [50/100], Loss: 0.1295
Epoch [60/100], Loss: 0.1286
Epoch [70/100], Loss: 0.1272
Epoch [80/100], Loss: 0.1241
Epoch [90/100], Loss: 0.1239


Grid Search Progress:  28%|██▊       | 15/54 [51:50<2:15:34, 208.58s/it]

Epoch [100/100], Loss: 0.1219
Evaluation - Test Loss: 0.1063, R2: 0.9360
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1511
Epoch [20/100], Loss: 0.1389
Epoch [30/100], Loss: 0.1330
Epoch [40/100], Loss: 0.1283
Epoch [50/100], Loss: 0.1258
Epoch [60/100], Loss: 0.1241
Epoch [70/100], Loss: 0.1224
Epoch [80/100], Loss: 0.1209
Epoch [90/100], Loss: 0.1202


Grid Search Progress:  30%|██▉       | 16/54 [55:29<2:14:00, 211.59s/it]

Epoch [100/100], Loss: 0.1193
Evaluation - Test Loss: 0.1005, R2: 0.9432
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1553
Epoch [20/100], Loss: 0.1387
Epoch [30/100], Loss: 0.1332
Epoch [40/100], Loss: 0.1288
Epoch [50/100], Loss: 0.1252
Epoch [60/100], Loss: 0.1273
Epoch [70/100], Loss: 0.1240
Epoch [80/100], Loss: 0.1213
Epoch [90/100], Loss: 0.1216


Grid Search Progress:  31%|███▏      | 17/54 [59:07<2:11:46, 213.68s/it]

Epoch [100/100], Loss: 0.1220
Evaluation - Test Loss: 0.0993, R2: 0.9445
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1517
Epoch [20/100], Loss: 0.1361
Epoch [30/100], Loss: 0.1323
Epoch [40/100], Loss: 0.1293
Epoch [50/100], Loss: 0.1284
Epoch [60/100], Loss: 0.1258
Epoch [70/100], Loss: 0.1262
Epoch [80/100], Loss: 0.1239
Epoch [90/100], Loss: 0.1220


Grid Search Progress:  33%|███▎      | 18/54 [1:02:47<2:09:11, 215.33s/it]

Epoch [100/100], Loss: 0.1203
Evaluation - Test Loss: 0.1086, R2: 0.9336
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1111
Epoch [20/100], Loss: 0.1022
Epoch [30/100], Loss: 0.0964
Epoch [40/100], Loss: 0.0925
Epoch [50/100], Loss: 0.0902
Epoch [60/100], Loss: 0.0887
Epoch [70/100], Loss: 0.0863
Epoch [80/100], Loss: 0.0836
Epoch [90/100], Loss: 0.0824


Grid Search Progress:  35%|███▌      | 19/54 [1:04:32<1:46:25, 182.43s/it]

Epoch [100/100], Loss: 0.0809
Evaluation - Test Loss: 0.0889, R2: 0.9534
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1096
Epoch [20/100], Loss: 0.1005
Epoch [30/100], Loss: 0.0964
Epoch [40/100], Loss: 0.0937
Epoch [50/100], Loss: 0.0904
Epoch [60/100], Loss: 0.0876
Epoch [70/100], Loss: 0.0865
Epoch [80/100], Loss: 0.0840
Epoch [90/100], Loss: 0.0832


Grid Search Progress:  37%|███▋      | 20/54 [1:06:19<1:30:25, 159.56s/it]

Epoch [100/100], Loss: 0.0820
Evaluation - Test Loss: 0.0921, R2: 0.9515
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1106
Epoch [20/100], Loss: 0.1021
Epoch [30/100], Loss: 0.0950
Epoch [40/100], Loss: 0.0922
Epoch [50/100], Loss: 0.0902
Epoch [60/100], Loss: 0.0875
Epoch [70/100], Loss: 0.0864
Epoch [80/100], Loss: 0.0843
Epoch [90/100], Loss: 0.0815


Grid Search Progress:  39%|███▉      | 21/54 [1:08:05<1:18:56, 143.54s/it]

Epoch [100/100], Loss: 0.0818
Evaluation - Test Loss: 0.0920, R2: 0.9504
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1089
Epoch [20/100], Loss: 0.0991
Epoch [30/100], Loss: 0.0932
Epoch [40/100], Loss: 0.0894
Epoch [50/100], Loss: 0.0879
Epoch [60/100], Loss: 0.0847
Epoch [70/100], Loss: 0.0823
Epoch [80/100], Loss: 0.0821
Epoch [90/100], Loss: 0.0805


Grid Search Progress:  41%|████      | 22/54 [1:09:57<1:11:27, 133.97s/it]

Epoch [100/100], Loss: 0.0787
Evaluation - Test Loss: 0.0907, R2: 0.9496
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1126
Epoch [20/100], Loss: 0.0999
Epoch [30/100], Loss: 0.0956
Epoch [40/100], Loss: 0.0906
Epoch [50/100], Loss: 0.0865
Epoch [60/100], Loss: 0.0855
Epoch [70/100], Loss: 0.0830
Epoch [80/100], Loss: 0.0803
Epoch [90/100], Loss: 0.0798


Grid Search Progress:  43%|████▎     | 23/54 [1:11:47<1:05:32, 126.85s/it]

Epoch [100/100], Loss: 0.0781
Evaluation - Test Loss: 0.0913, R2: 0.9482
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1079
Epoch [20/100], Loss: 0.0990
Epoch [30/100], Loss: 0.0935
Epoch [40/100], Loss: 0.0904
Epoch [50/100], Loss: 0.0868
Epoch [60/100], Loss: 0.0871
Epoch [70/100], Loss: 0.0840
Epoch [80/100], Loss: 0.0811
Epoch [90/100], Loss: 0.0789


Grid Search Progress:  44%|████▍     | 24/54 [1:13:36<1:00:47, 121.57s/it]

Epoch [100/100], Loss: 0.0781
Evaluation - Test Loss: 0.0946, R2: 0.9462
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1068
Epoch [20/100], Loss: 0.0986
Epoch [30/100], Loss: 0.0928
Epoch [40/100], Loss: 0.0890
Epoch [50/100], Loss: 0.0872
Epoch [60/100], Loss: 0.0832
Epoch [70/100], Loss: 0.0810
Epoch [80/100], Loss: 0.0789
Epoch [90/100], Loss: 0.0773


Grid Search Progress:  46%|████▋     | 25/54 [1:15:32<57:56, 119.87s/it]  

Epoch [100/100], Loss: 0.0778
Evaluation - Test Loss: 0.0900, R2: 0.9520
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1093
Epoch [20/100], Loss: 0.0982
Epoch [30/100], Loss: 0.0956
Epoch [40/100], Loss: 0.0895
Epoch [50/100], Loss: 0.0872
Epoch [60/100], Loss: 0.0840
Epoch [70/100], Loss: 0.0825
Epoch [80/100], Loss: 0.0812
Epoch [90/100], Loss: 0.0787


Grid Search Progress:  48%|████▊     | 26/54 [1:17:27<55:13, 118.34s/it]

Epoch [100/100], Loss: 0.0766
Evaluation - Test Loss: 0.0924, R2: 0.9507
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1090
Epoch [20/100], Loss: 0.0976
Epoch [30/100], Loss: 0.0929
Epoch [40/100], Loss: 0.0905
Epoch [50/100], Loss: 0.0853
Epoch [60/100], Loss: 0.0826
Epoch [70/100], Loss: 0.0828
Epoch [80/100], Loss: 0.0788
Epoch [90/100], Loss: 0.0793


Grid Search Progress:  50%|█████     | 27/54 [1:19:22<52:51, 117.46s/it]

Epoch [100/100], Loss: 0.0781
Evaluation - Test Loss: 0.0931, R2: 0.9486
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1545
Epoch [20/100], Loss: 0.1379
Epoch [30/100], Loss: 0.1280
Epoch [40/100], Loss: 0.1241
Epoch [50/100], Loss: 0.1258
Epoch [60/100], Loss: 0.1212
Epoch [70/100], Loss: 0.1204
Epoch [80/100], Loss: 0.1187
Epoch [90/100], Loss: 0.1176


Grid Search Progress:  52%|█████▏    | 28/54 [1:21:14<50:13, 115.91s/it]

Epoch [100/100], Loss: 0.1161
Evaluation - Test Loss: 0.0974, R2: 0.9470
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1550
Epoch [20/100], Loss: 0.1396
Epoch [30/100], Loss: 0.1309
Epoch [40/100], Loss: 0.1266
Epoch [50/100], Loss: 0.1259
Epoch [60/100], Loss: 0.1236
Epoch [70/100], Loss: 0.1203
Epoch [80/100], Loss: 0.1205
Epoch [90/100], Loss: 0.1192


Grid Search Progress:  54%|█████▎    | 29/54 [1:23:07<47:53, 114.93s/it]

Epoch [100/100], Loss: 0.1174
Evaluation - Test Loss: 0.0937, R2: 0.9507
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1579
Epoch [20/100], Loss: 0.1410
Epoch [30/100], Loss: 0.1305
Epoch [40/100], Loss: 0.1267
Epoch [50/100], Loss: 0.1263
Epoch [60/100], Loss: 0.1231
Epoch [70/100], Loss: 0.1220
Epoch [80/100], Loss: 0.1202
Epoch [90/100], Loss: 0.1206


Grid Search Progress:  56%|█████▌    | 30/54 [1:25:00<45:42, 114.26s/it]

Epoch [100/100], Loss: 0.1157
Evaluation - Test Loss: 0.0932, R2: 0.9503
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1555
Epoch [20/100], Loss: 0.1359
Epoch [30/100], Loss: 0.1298
Epoch [40/100], Loss: 0.1240
Epoch [50/100], Loss: 0.1230
Epoch [60/100], Loss: 0.1193
Epoch [70/100], Loss: 0.1182
Epoch [80/100], Loss: 0.1166
Epoch [90/100], Loss: 0.1164


Grid Search Progress:  57%|█████▋    | 31/54 [1:26:56<44:01, 114.86s/it]

Epoch [100/100], Loss: 0.1137
Evaluation - Test Loss: 0.0926, R2: 0.9513
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1542
Epoch [20/100], Loss: 0.1377
Epoch [30/100], Loss: 0.1285
Epoch [40/100], Loss: 0.1230
Epoch [50/100], Loss: 0.1202
Epoch [60/100], Loss: 0.1192
Epoch [70/100], Loss: 0.1145
Epoch [80/100], Loss: 0.1147
Epoch [90/100], Loss: 0.1141


Grid Search Progress:  59%|█████▉    | 32/54 [1:28:52<42:12, 115.13s/it]

Epoch [100/100], Loss: 0.1141
Evaluation - Test Loss: 0.0928, R2: 0.9494
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1490
Epoch [20/100], Loss: 0.1348
Epoch [30/100], Loss: 0.1277
Epoch [40/100], Loss: 0.1257
Epoch [50/100], Loss: 0.1219
Epoch [60/100], Loss: 0.1198
Epoch [70/100], Loss: 0.1173
Epoch [80/100], Loss: 0.1163
Epoch [90/100], Loss: 0.1143


Grid Search Progress:  61%|██████    | 33/54 [1:30:49<40:29, 115.69s/it]

Epoch [100/100], Loss: 0.1139
Evaluation - Test Loss: 0.0963, R2: 0.9465
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1525
Epoch [20/100], Loss: 0.1317
Epoch [30/100], Loss: 0.1247
Epoch [40/100], Loss: 0.1225
Epoch [50/100], Loss: 0.1173
Epoch [60/100], Loss: 0.1156
Epoch [70/100], Loss: 0.1137
Epoch [80/100], Loss: 0.1126
Epoch [90/100], Loss: 0.1125


Grid Search Progress:  63%|██████▎   | 34/54 [1:32:52<39:20, 118.02s/it]

Epoch [100/100], Loss: 0.1118
Evaluation - Test Loss: 0.0910, R2: 0.9521
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1512
Epoch [20/100], Loss: 0.1326
Epoch [30/100], Loss: 0.1256
Epoch [40/100], Loss: 0.1208
Epoch [50/100], Loss: 0.1184
Epoch [60/100], Loss: 0.1163
Epoch [70/100], Loss: 0.1138
Epoch [80/100], Loss: 0.1105
Epoch [90/100], Loss: 0.1119


Grid Search Progress:  65%|██████▍   | 35/54 [1:34:57<38:00, 120.03s/it]

Epoch [100/100], Loss: 0.1109
Evaluation - Test Loss: 0.0990, R2: 0.9429
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1514
Epoch [20/100], Loss: 0.1352
Epoch [30/100], Loss: 0.1251
Epoch [40/100], Loss: 0.1226
Epoch [50/100], Loss: 0.1202
Epoch [60/100], Loss: 0.1164
Epoch [70/100], Loss: 0.1141
Epoch [80/100], Loss: 0.1128
Epoch [90/100], Loss: 0.1131


Grid Search Progress:  67%|██████▋   | 36/54 [1:37:01<36:19, 121.10s/it]

Epoch [100/100], Loss: 0.1110
Evaluation - Test Loss: 0.0931, R2: 0.9521
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1033
Epoch [20/100], Loss: 0.0951
Epoch [30/100], Loss: 0.0904
Epoch [40/100], Loss: 0.0867
Epoch [50/100], Loss: 0.0845
Epoch [60/100], Loss: 0.0834
Epoch [70/100], Loss: 0.0797
Epoch [80/100], Loss: 0.0785
Epoch [90/100], Loss: 0.0765


Grid Search Progress:  69%|██████▊   | 37/54 [1:38:03<29:18, 103.47s/it]

Epoch [100/100], Loss: 0.0752
Evaluation - Test Loss: 0.0906, R2: 0.9511
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1021
Epoch [20/100], Loss: 0.0963
Epoch [30/100], Loss: 0.0908
Epoch [40/100], Loss: 0.0888
Epoch [50/100], Loss: 0.0844
Epoch [60/100], Loss: 0.0811
Epoch [70/100], Loss: 0.0794
Epoch [80/100], Loss: 0.0780
Epoch [90/100], Loss: 0.0751


Grid Search Progress:  70%|███████   | 38/54 [1:39:06<24:19, 91.22s/it] 

Epoch [100/100], Loss: 0.0749
Evaluation - Test Loss: 0.0924, R2: 0.9507
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1025
Epoch [20/100], Loss: 0.0955
Epoch [30/100], Loss: 0.0902
Epoch [40/100], Loss: 0.0864
Epoch [50/100], Loss: 0.0837
Epoch [60/100], Loss: 0.0807
Epoch [70/100], Loss: 0.0793
Epoch [80/100], Loss: 0.0782
Epoch [90/100], Loss: 0.0757


Grid Search Progress:  72%|███████▏  | 39/54 [1:40:08<20:39, 82.64s/it]

Epoch [100/100], Loss: 0.0746
Evaluation - Test Loss: 0.0970, R2: 0.9480
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1048
Epoch [20/100], Loss: 0.0956
Epoch [30/100], Loss: 0.0905
Epoch [40/100], Loss: 0.0876
Epoch [50/100], Loss: 0.0850
Epoch [60/100], Loss: 0.0797
Epoch [70/100], Loss: 0.0778
Epoch [80/100], Loss: 0.0758
Epoch [90/100], Loss: 0.0739


Grid Search Progress:  74%|███████▍  | 40/54 [1:41:13<18:01, 77.26s/it]

Epoch [100/100], Loss: 0.0719
Evaluation - Test Loss: 0.0988, R2: 0.9418
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1044
Epoch [20/100], Loss: 0.0922
Epoch [30/100], Loss: 0.0887
Epoch [40/100], Loss: 0.0865
Epoch [50/100], Loss: 0.0832
Epoch [60/100], Loss: 0.0788
Epoch [70/100], Loss: 0.0774
Epoch [80/100], Loss: 0.0743
Epoch [90/100], Loss: 0.0737


Grid Search Progress:  76%|███████▌  | 41/54 [1:42:17<15:53, 73.32s/it]

Epoch [100/100], Loss: 0.0716
Evaluation - Test Loss: 0.0965, R2: 0.9466
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1038
Epoch [20/100], Loss: 0.0976
Epoch [30/100], Loss: 0.0903
Epoch [40/100], Loss: 0.0852
Epoch [50/100], Loss: 0.0847
Epoch [60/100], Loss: 0.0798
Epoch [70/100], Loss: 0.0783
Epoch [80/100], Loss: 0.0782
Epoch [90/100], Loss: 0.0744


Grid Search Progress:  78%|███████▊  | 42/54 [1:43:21<14:05, 70.46s/it]

Epoch [100/100], Loss: 0.0725
Evaluation - Test Loss: 0.0940, R2: 0.9496
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.0999
Epoch [20/100], Loss: 0.0943
Epoch [30/100], Loss: 0.0893
Epoch [40/100], Loss: 0.0842
Epoch [50/100], Loss: 0.0816
Epoch [60/100], Loss: 0.0805
Epoch [70/100], Loss: 0.0761
Epoch [80/100], Loss: 0.0752
Epoch [90/100], Loss: 0.0719


Grid Search Progress:  80%|███████▉  | 43/54 [1:44:31<12:55, 70.50s/it]

Epoch [100/100], Loss: 0.0713
Evaluation - Test Loss: 0.0939, R2: 0.9492
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1024
Epoch [20/100], Loss: 0.0938
Epoch [30/100], Loss: 0.0887
Epoch [40/100], Loss: 0.0866
Epoch [50/100], Loss: 0.0817
Epoch [60/100], Loss: 0.0799
Epoch [70/100], Loss: 0.0780
Epoch [80/100], Loss: 0.0743
Epoch [90/100], Loss: 0.0723


Grid Search Progress:  81%|████████▏ | 44/54 [1:45:43<11:48, 70.83s/it]

Epoch [100/100], Loss: 0.0699
Evaluation - Test Loss: 0.0912, R2: 0.9495
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1003
Epoch [20/100], Loss: 0.0942
Epoch [30/100], Loss: 0.0878
Epoch [40/100], Loss: 0.0856
Epoch [50/100], Loss: 0.0817
Epoch [60/100], Loss: 0.0769
Epoch [70/100], Loss: 0.0762
Epoch [80/100], Loss: 0.0741
Epoch [90/100], Loss: 0.0716


Grid Search Progress:  83%|████████▎ | 45/54 [1:46:52<10:31, 70.16s/it]

Epoch [100/100], Loss: 0.0700
Evaluation - Test Loss: 0.0912, R2: 0.9518
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1569
Epoch [20/100], Loss: 0.1386
Epoch [30/100], Loss: 0.1269
Epoch [40/100], Loss: 0.1242
Epoch [50/100], Loss: 0.1186
Epoch [60/100], Loss: 0.1176
Epoch [70/100], Loss: 0.1164
Epoch [80/100], Loss: 0.1151
Epoch [90/100], Loss: 0.1128


Grid Search Progress:  85%|████████▌ | 46/54 [1:47:56<09:08, 68.54s/it]

Epoch [100/100], Loss: 0.1139
Evaluation - Test Loss: 0.0909, R2: 0.9523
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1545
Epoch [20/100], Loss: 0.1359
Epoch [30/100], Loss: 0.1261
Epoch [40/100], Loss: 0.1227
Epoch [50/100], Loss: 0.1214
Epoch [60/100], Loss: 0.1211
Epoch [70/100], Loss: 0.1174
Epoch [80/100], Loss: 0.1160
Epoch [90/100], Loss: 0.1180


Grid Search Progress:  87%|████████▋ | 47/54 [1:49:02<07:53, 67.62s/it]

Epoch [100/100], Loss: 0.1150
Evaluation - Test Loss: 0.0937, R2: 0.9502
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1563
Epoch [20/100], Loss: 0.1365
Epoch [30/100], Loss: 0.1278
Epoch [40/100], Loss: 0.1250
Epoch [50/100], Loss: 0.1183
Epoch [60/100], Loss: 0.1165
Epoch [70/100], Loss: 0.1151
Epoch [80/100], Loss: 0.1149
Epoch [90/100], Loss: 0.1118


Grid Search Progress:  89%|████████▉ | 48/54 [1:50:07<06:41, 66.91s/it]

Epoch [100/100], Loss: 0.1128
Evaluation - Test Loss: 0.0924, R2: 0.9522
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1580
Epoch [20/100], Loss: 0.1367
Epoch [30/100], Loss: 0.1234
Epoch [40/100], Loss: 0.1199
Epoch [50/100], Loss: 0.1192
Epoch [60/100], Loss: 0.1153
Epoch [70/100], Loss: 0.1130
Epoch [80/100], Loss: 0.1125
Epoch [90/100], Loss: 0.1119


Grid Search Progress:  91%|█████████ | 49/54 [1:51:16<05:37, 67.44s/it]

Epoch [100/100], Loss: 0.1092
Evaluation - Test Loss: 0.0941, R2: 0.9495
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1515
Epoch [20/100], Loss: 0.1331
Epoch [30/100], Loss: 0.1250
Epoch [40/100], Loss: 0.1195
Epoch [50/100], Loss: 0.1155
Epoch [60/100], Loss: 0.1134
Epoch [70/100], Loss: 0.1135
Epoch [80/100], Loss: 0.1114
Epoch [90/100], Loss: 0.1109


Grid Search Progress:  93%|█████████▎| 50/54 [1:52:25<04:31, 68.00s/it]

Epoch [100/100], Loss: 0.1093
Evaluation - Test Loss: 0.0894, R2: 0.9528
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1572
Epoch [20/100], Loss: 0.1392
Epoch [30/100], Loss: 0.1278
Epoch [40/100], Loss: 0.1221
Epoch [50/100], Loss: 0.1182
Epoch [60/100], Loss: 0.1131
Epoch [70/100], Loss: 0.1118
Epoch [80/100], Loss: 0.1129
Epoch [90/100], Loss: 0.1101


Grid Search Progress:  94%|█████████▍| 51/54 [1:53:34<03:24, 68.23s/it]

Epoch [100/100], Loss: 0.1090
Evaluation - Test Loss: 0.0891, R2: 0.9541
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1524
Epoch [20/100], Loss: 0.1367
Epoch [30/100], Loss: 0.1227
Epoch [40/100], Loss: 0.1182
Epoch [50/100], Loss: 0.1145
Epoch [60/100], Loss: 0.1127
Epoch [70/100], Loss: 0.1088
Epoch [80/100], Loss: 0.1090
Epoch [90/100], Loss: 0.1075


Grid Search Progress:  96%|█████████▋| 52/54 [1:54:48<02:19, 69.96s/it]

Epoch [100/100], Loss: 0.1047
Evaluation - Test Loss: 0.0920, R2: 0.9510
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1567
Epoch [20/100], Loss: 0.1324
Epoch [30/100], Loss: 0.1239
Epoch [40/100], Loss: 0.1192
Epoch [50/100], Loss: 0.1121
Epoch [60/100], Loss: 0.1131
Epoch [70/100], Loss: 0.1089
Epoch [80/100], Loss: 0.1095
Epoch [90/100], Loss: 0.1077


Grid Search Progress:  98%|█████████▊| 53/54 [1:56:03<01:11, 71.51s/it]

Epoch [100/100], Loss: 0.1078
Evaluation - Test Loss: 0.0941, R2: 0.9507
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1476
Epoch [20/100], Loss: 0.1319
Epoch [30/100], Loss: 0.1223
Epoch [40/100], Loss: 0.1159
Epoch [50/100], Loss: 0.1152
Epoch [60/100], Loss: 0.1132
Epoch [70/100], Loss: 0.1114
Epoch [80/100], Loss: 0.1086
Epoch [90/100], Loss: 0.1066


Grid Search Progress: 100%|██████████| 54/54 [1:57:19<00:00, 130.36s/it]

Epoch [100/100], Loss: 0.1060
Evaluation - Test Loss: 0.0911, R2: 0.9521
Best parameters found: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.1}
Best MAE: 0.0889





In [32]:
# # Perform cross validation
# metrics = load_and_evaluate_model(
#     data_df=train_scaled_df,
#     features_dict=features_dict,
#     model_path='branchmlp.pth',
#     n_splits=5,
#     batch_size=32
# )