In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from torch.utils.data import DataLoader, TensorDataset
from xgboost import XGBRegressor
from tensorflow import keras
from scipy.interpolate import interpn
from tqdm import tqdm
from joblib import dump, load
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import tensorflow as tf

In [4]:
from load_data import process_steel_data

full_path = 'data/'
path = 'data/MDC_Data_Descriptions_MeCoMeP-r-value.xlsx'
correlation_rate = 0.2
dvl_line = 1

df = process_steel_data(full_path, path, correlation_rate, dvl_line, model_output=False)
df = pd.get_dummies(df, columns=['steel_family'], prefix='steel').drop(['steel_grade'], axis=1)

Dropped 25 columns


  for idx, row in parser.parse():


In [5]:
def scale_data(df, binary_prefix='steel_'):

    # Identify binary columns
    binary_columns = [col for col in df.columns if col.startswith(binary_prefix)]
    
    # Identify columns to scale (non-binary columns)
    columns_to_scale = [col for col in df.columns if col not in binary_columns + ['r_value']]
    
    # Scale numerical features
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df[columns_to_scale])
    
    # Create new dataframe with scaled data
    scaled_df = pd.DataFrame(scaled_data, columns=columns_to_scale)
    
    # Add back binary columns
    for col in binary_columns:
        scaled_df[col] = df[col].values
    
    # Add target variable if present
    if 'r_value' in df.columns:
        scaled_df['r_value'] = df['r_value'].values
    
    return scaled_df, scaler

def train_model_with_cv_gridsearch(df, model, param_grid=None, n_splits=5, random_state=42, use_grid_search=True, model_params=None):
    """
    Train a model with optional grid search and cross-validation
    
    Parameters:
    -----------
    df : pandas.DataFrame
        Input dataframe
    model : estimator object
        Machine learning model to train
    param_grid : dict, optional
        Parameter grid for grid search (used if use_grid_search=True)
    n_splits : int, optional
        Number of cross-validation splits (default: 5)
    random_state : int, optional
        Random state for reproducibility (default: 42)
    use_grid_search : bool, optional
        Whether to perform grid search (default: True)
    model_params : dict, optional
        Direct model parameters to use if use_grid_search=False
    
    Returns:
    --------
    dict containing model results and performance metrics including tol90
    """
    # Prepare X and y
    X = df.drop(['r_value'], axis=1)
    y = df['r_value']
    
    # Initialize cross-validation
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    # Initialize metrics storage
    cv_scores = {
        'mae': [],
        'mse': [],
        'r2': [],
        'tol90': []  # Add tol90 metric
    }
    
    # Determine model parameters
    if use_grid_search:
        if param_grid is None:
            raise ValueError("param_grid must be provided when use_grid_search is True")
        
        # Initialize GridSearchCV
        grid_search = GridSearchCV(
            estimator=model,
            param_grid=param_grid,
            cv=n_splits,
            scoring='neg_mean_absolute_error',
            n_jobs=-1,
            verbose=0
        )
        
        # Fit GridSearchCV
        print("Performing GridSearch...")
        grid_search.fit(X, y)
        print(f"\nBest parameters: {grid_search.best_params_}")
        best_model = grid_search.best_estimator_
    else:
        # Use directly specified parameters or default model
        if model_params:
            best_model = type(model)(**model_params)
        else:
            best_model = model
        
        grid_search = None
    
    # Perform cross-validation
    print("\nPerforming cross-validation...")
    pbar = tqdm(enumerate(kf.split(X), 1),
                total=n_splits,
                desc="Cross-validation",
                leave=True)
    
    for fold, (train_idx, val_idx) in pbar:
        # Split data
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        # Train model
        best_model.fit(X_train, y_train)
        
        # Make predictions
        y_pred = best_model.predict(X_val)
        
        # Calculate metrics
        mae = mean_absolute_error(y_val, y_pred)
        mse = mean_squared_error(y_val, y_pred)
        r2 = r2_score(y_val, y_pred)
        
        # Calculate tol90 (90th percentile of absolute errors)
        abs_errors = np.abs(y_val - y_pred)
        tol90 = np.percentile(abs_errors, 90)
        
        cv_scores['mae'].append(mae)
        cv_scores['mse'].append(mse)
        cv_scores['r2'].append(r2)
        cv_scores['tol90'].append(tol90)
        
        # Update progress bar description
        pbar.set_description(
            f"Fold {fold} - MAE: {mae:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}, TOL90: {tol90:.4f}"
        )
    
    # Prepare results
    results = {
        'model': best_model,
        'best_params': grid_search.best_params_ if use_grid_search else model_params or {},
        'avg_mae': np.mean(cv_scores['mae']),
        'std_mae': np.std(cv_scores['mae']),
        'avg_mse': np.mean(cv_scores['mse']),
        'std_mse': np.std(cv_scores['mse']),
        'avg_r2': np.mean(cv_scores['r2']),
        'std_r2': np.std(cv_scores['r2']),
        'avg_tol90': np.mean(cv_scores['tol90']),  # Add average tol90
        'std_tol90': np.std(cv_scores['tol90']),   # Add std of tol90
        'cv_scores': cv_scores,
        'grid_search_results': grid_search.cv_results_ if use_grid_search else None
    }
    
    return results

def report_cv_results(results):
    print("\nCross-Validation Results:")
    print("-" * 50)
    print(f"Best Parameters: {results['best_params']}")
    print(f"Average MAE: {results['avg_mae']:.4f} ± {results['std_mae']:.4f}")
    print(f"Average MSE: {results['avg_mse']:.4f} ± {results['std_mse']:.4f}")
    print(f"Average R2: {results['avg_r2']:.4f} ± {results['std_r2']:.4f}")
    print(f"Average TOL90: {results['avg_tol90']:.4f} ± {results['std_tol90']:.4f}")

def report_cv_results(results):
    print("\nCross-Validation Results:")
    print("-" * 50)
    print(f"Best Parameters: {results['best_params']}")
    print(f"Average MAE: {results['avg_mae']:.4f} ± {results['std_mae']:.4f}")
    print(f"Average MSE: {results['avg_mse']:.4f} ± {results['std_mse']:.4f}")
    print(f"Average R2: {results['avg_r2']:.4f} ± {results['std_r2']:.4f}")

In [6]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_scaled_df, scaler = scale_data(train_df)
binary_columns = [col for col in test_df.columns if col.startswith('steel_')]
columns_to_scale = [col for col in test_df.columns if col not in binary_columns + ['r_value']]
scaled_test_data = scaler.transform(test_df[columns_to_scale])
test_scaled_df = pd.DataFrame(scaled_test_data, columns=columns_to_scale)
for col in binary_columns:
    test_scaled_df[col] = test_df[col].values
if 'r_value' in test_df.columns:
    test_scaled_df['r_value'] = test_df['r_value'].values

In [46]:
rfr = RandomForestRegressor(random_state=42)
rfr_param_grid = {
    'n_estimators': [350]
}

rfr_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=rfr,
    param_grid=rfr_param_grid,
    n_splits=5
)

Performing GridSearch...

Best parameters: {'n_estimators': 350}

Performing cross-validation...


Fold 5 - MAE: 0.0941, MSE: 0.0180, R2: 0.9468, TOL90: 0.2190: 100%|██████████| 5/5 [04:18<00:00, 51.79s/it]


In [47]:
xgb_model = XGBRegressor(random_state=42)

xgb_param_grid = {
    'eta': [0.01, 0.05, 0.1, 0.2, 0.3, 0.4],
    'lambda': [0, 0.01, 0.1, 1, 10, 50],
    'max_depth': [3, 4, 5, 6, 7, 8]
}

xgb_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=xgb_model,
    param_grid=xgb_param_grid,
    n_splits=5
)

Performing GridSearch...

Best parameters: {'eta': 0.1, 'lambda': 1, 'max_depth': 8}

Performing cross-validation...


Fold 5 - MAE: 0.0969, MSE: 0.0189, R2: 0.9443, TOL90: 0.2258: 100%|██████████| 5/5 [00:02<00:00,  1.81it/s]


In [48]:
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor

results_without_grid = train_model_with_cv_gridsearch(
    df=train_scaled_df, 
    model=GaussianProcessRegressor(), 
    use_grid_search=False,
    model_params={'kernel': 1**2 * Matern(length_scale=1, nu=1.5) + WhiteKernel(noise_level=1)}
)


Performing cross-validation...


Cross-validation:   0%|          | 0/5 [00:00<?, ?it/s]

Fold 5 - MAE: 0.1037, MSE: 0.0204, R2: 0.9399, TOL90: 0.2299: 100%|██████████| 5/5 [28:29<00:00, 341.91s/it]


In [49]:
from sklearn.neighbors import KNeighborsRegressor

knn_model = KNeighborsRegressor()

knn_param_grid = {
    'n_neighbors': list(range(2, 15)),
    'leaf_size': [20, 30, 40, 50],
    'weights': ['uniform', 'distance']
}

knn_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=knn_model,
    param_grid=knn_param_grid
)

Performing GridSearch...

Best parameters: {'leaf_size': 20, 'n_neighbors': 9, 'weights': 'distance'}

Performing cross-validation...


Fold 5 - MAE: 0.1075, MSE: 0.0224, R2: 0.9338, TOL90: 0.2421: 100%|██████████| 5/5 [00:00<00:00, 12.02it/s]


In [17]:
def cv_saved_model_architecture(saved_model_path, df, target_column='r_value', n_splits=5, 
                               epochs=100, batch_size=32, random_state=42):

    # Load saved model to get architecture and parameters
    base_model = tf.keras.models.load_model(saved_model_path)
    
    # Get learning rate from saved model and convert to Python float
    learning_rate = float(base_model.optimizer.learning_rate.numpy())
    
    # Prepare data
    X = df.drop([target_column], axis=1)
    y = df[target_column]
    
    # Initialize KFold
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    # Storage for CV metrics
    cv_scores = {
        'mae': [],
        'mse': [],
        'r2': [],
        'tol90': []  # Add tol90 metric
    }
    
    # Perform cross-validation
    print("\nPerforming cross-validation...")
    for fold, (train_idx, val_idx) in enumerate(kf.split(X), 1):
        print(f"\nFold {fold}/{n_splits}")
        
        # Clear previous model from memory
        tf.keras.backend.clear_session()
        
        # Split data
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        # Convert to float32
        X_train = np.array(X_train, dtype=np.float32)
        y_train = np.array(y_train, dtype=np.float32)
        X_val = np.array(X_val, dtype=np.float32)
        y_val = np.array(y_val, dtype=np.float32)
        
        # Create new model with same architecture
        model = tf.keras.models.clone_model(base_model)
        
        # Compile with same optimizer type and learning rate
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        model.compile(
            optimizer=optimizer,
            loss='mean_absolute_error',
            metrics=['mae', 'mse']
        )
        
        # Early stopping
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            min_delta=1e-4
        )
        
        # Create TF datasets
        train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
        train_dataset = (train_dataset
            .batch(batch_size, drop_remainder=True)
            .repeat())
        
        val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
        val_dataset = val_dataset.batch(batch_size)
        
        steps_per_epoch = len(X_train) // batch_size
        
        # Train model
        history = model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            callbacks=[early_stopping],
            verbose=1
        )
        
        # Make predictions
        y_pred = model.predict(X_val, verbose=0)
        
        # Calculate metrics
        mae = mean_absolute_error(y_val, y_pred)
        mse = mean_squared_error(y_val, y_pred)
        r2 = r2_score(y_val, y_pred)
        
        # Calculate tol90 (90th percentile of absolute errors)
        abs_errors = np.abs(y_val - y_pred)
        tol90 = np.percentile(abs_errors, 90)
        
        cv_scores['mae'].append(mae)
        cv_scores['mse'].append(mse)
        cv_scores['r2'].append(r2)
        cv_scores['tol90'].append(tol90)
        
        print(f"Fold {fold} - MAE: {mae:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}, TOL90: {tol90:.4f}")
    
    # Calculate average metrics
    results = {
        'avg_mae': np.mean(cv_scores['mae']),
        'std_mae': np.std(cv_scores['mae']),
        'avg_mse': np.mean(cv_scores['mse']),
        'std_mse': np.std(cv_scores['mse']),
        'avg_r2': np.mean(cv_scores['r2']),
        'std_r2': np.std(cv_scores['r2']),
        'avg_tol90': np.mean(cv_scores['tol90']),
        'std_tol90': np.std(cv_scores['tol90']),
        'cv_scores': cv_scores
    }
    
    return results

In [11]:
class SteelPropertiesANN:
    def __init__(self, input_dim, target_column):
        self.input_dim = input_dim
        self.target_column = target_column
        self.best_model = None
        self.best_params = None
        self.best_score = float('inf')

    def build_model(self, config):
        hidden_layers = config['layers']
        learning_rate = config['learning_rate']
        l2_strength = config['l2_regularization']
        
        model = keras.Sequential()
        model.add(keras.layers.Input(shape=(self.input_dim,)))
        
        for units, activation in hidden_layers:
            model.add(keras.layers.Dense(
                units=units,
                activation=activation,
                kernel_regularizer=keras.regularizers.l2(l2_strength)
            ))
        
        model.add(keras.layers.Dense(1))
        
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=learning_rate,
            decay_steps=100,
            decay_rate=0.9,
            staircase=True
        )
        
        model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=1.0),
            loss='mean_absolute_error',
            metrics=['mae', 'mse']
        )
        return model

    def generate_grid_configs(self, 
        layer_options=[(64, 'relu'), (128, 'relu'), (256, 'relu')],
        layer_depths=[2, 3, 4],
        learning_rates=[1e-2, 1e-3, 1e-4],
        l2_regularization=[1e-3, 1e-4, 1e-5],
        batch_sizes=[16, 32, 64]
    ):
        from itertools import product
        grid_configs = []
        
        for depth in layer_depths:
            for lr in learning_rates:
                for l2_reg in l2_regularization:
                    for batch_size in batch_sizes:
                        layer_combinations = list(product(layer_options, repeat=depth))
                        for layers in layer_combinations:
                            config = {
                                'layers': layers,
                                'learning_rate': lr,
                                'l2_regularization': l2_reg,
                                'batch_size': batch_size
                            }
                            grid_configs.append(config)
        
        return grid_configs

    def grid_search(self, train_scaled_df, grid_configs=None, epochs=100, max_configs=None):
        # Split training data into training and validation sets
        train_data, val_data = train_test_split(train_scaled_df, test_size=0.2, random_state=42)
    
        if grid_configs is None:
            grid_configs = self.generate_grid_configs()
        
        X_train = train_data.drop([self.target_column], axis=1)
        y_train = train_data[self.target_column]
        X_val = val_data.drop([self.target_column], axis=1)
        y_val = val_data[self.target_column]
        
        if max_configs:
            grid_configs = grid_configs[:max_configs]
        
        results = []
        for config in tqdm(grid_configs, desc="Training models"):
            tf.keras.backend.clear_session()
            model = self.build_model(config)
            batch_size = min(config['batch_size'], len(X_train))
            
            early_stopping = keras.callbacks.EarlyStopping(
                monitor='val_loss', 
                patience=10, 
                restore_best_weights=True,
                min_delta=1e-4
            )
            
            try:
                X_train = np.array(X_train, dtype=np.float32)
                y_train = np.array(y_train, dtype=np.float32)
                X_val = np.array(X_val, dtype=np.float32)
                y_val = np.array(y_val, dtype=np.float32)

                train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
                train_dataset = (train_dataset
                    .batch(batch_size, drop_remainder=True)
                    .repeat())
                
                val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
                val_dataset = val_dataset.batch(batch_size)
                
                steps_per_epoch = len(X_train) // batch_size
                
                history = model.fit(
                    train_dataset,
                    validation_data=val_dataset,
                    epochs=epochs,
                    steps_per_epoch=steps_per_epoch,
                    callbacks=[early_stopping],
                    verbose=0
                )
                
                val_loss = model.evaluate(X_val, y_val, verbose=0)[0]
                
                result_entry = config.copy()
                result_entry.update({'val_loss': val_loss})
                results.append(result_entry)
                
                if val_loss < self.best_score:
                    self.best_score = val_loss
                    self.best_model = model
                    self.best_params = config
                
            except Exception as e:
                print(f"Error with config {config}: {str(e)}")
                continue
        
        return self.best_model, self.best_params

In [15]:
import tensorflow as tf

# Assuming you have the best_params from grid search
best_params = {
    'layers': ((256, 'relu'), (64, 'relu')),
    'learning_rate': 0.001,
    'l2_regularization': 0.0001,
    'batch_size': 64
}

train_data, val_data = train_test_split(train_scaled_df, test_size=0.2, random_state=42)

# Instantiate the ANN class
ann = SteelPropertiesANN(input_dim=train_scaled_df.drop(['r_value'], axis=1).shape[1], target_column='r_value')

# Build the model using best_params
best_model = ann.build_model(config=best_params)

# Prepare data for training
X_train = train_data.drop(['r_value'], axis=1)
y_train = train_data['r_value']

X_val = val_data.drop(['r_value'], axis=1)
y_val = val_data['r_value']

# Convert to NumPy arrays
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.float32)
X_val = np.array(X_val, dtype=np.float32)
y_val = np.array(y_val, dtype=np.float32)

# Create datasets
batch_size = best_params['batch_size']
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).repeat()
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size)

# Define early stopping
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True, min_delta=1e-4
)

# Train the model
steps_per_epoch = len(X_train) // batch_size
history = best_model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=100,
    steps_per_epoch=steps_per_epoch,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model on validation data
val_loss = best_model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Loss: {val_loss}")

# Save the model
best_model.save('best_model.h5')


Epoch 1/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.3760 - mae: 0.3589 - mse: 0.3522 - val_loss: 0.1554 - val_mae: 0.1387 - val_mse: 0.0360
Epoch 2/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1555 - mae: 0.1389 - mse: 0.0340 - val_loss: 0.1448 - val_mae: 0.1286 - val_mse: 0.0308
Epoch 3/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 880us/step - loss: 0.1407 - mae: 0.1246 - mse: 0.0277 - val_loss: 0.1367 - val_mae: 0.1210 - val_mse: 0.0282
Epoch 4/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 821us/step - loss: 0.1380 - mae: 0.1223 - mse: 0.0269 - val_loss: 0.1348 - val_mae: 0.1195 - val_mse: 0.0270
Epoch 5/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 819us/step - loss: 0.1311 - mae: 0.1159 - mse: 0.0243 - val_loss: 0.1326 - val_mae: 0.1176 - val_mse: 0.0261
Epoch 6/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [



Validation Loss: [0.12338341772556305, 0.1106322780251503, 0.02351290173828602]


In [18]:
cv_results = cv_saved_model_architecture(
    saved_model_path='best_model.h5',
    df=train_scaled_df,
    target_column='r_value',
    n_splits=5,
    epochs=100,
    batch_size=32
)

# Print CV results
print("\nCross-Validation Results:")
print("-" * 50)
print(f"Average MAE: {cv_results['avg_mae']:.4f} ± {cv_results['std_mae']:.4f}")
print(f"Average MSE: {cv_results['avg_mse']:.4f} ± {cv_results['std_mse']:.4f}")
print(f"Average R2: {cv_results['avg_r2']:.4f} ± {cv_results['std_r2']:.4f}")




Performing cross-validation...

Fold 1/5
Epoch 1/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 994us/step - loss: 0.2652 - mae: 0.2482 - mse: 0.1450 - val_loss: 0.1579 - val_mae: 0.1418 - val_mse: 0.0351
Epoch 2/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 929us/step - loss: 0.1544 - mae: 0.1385 - mse: 0.0335 - val_loss: 0.1519 - val_mae: 0.1367 - val_mse: 0.0325
Epoch 3/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 863us/step - loss: 0.1411 - mae: 0.1262 - mse: 0.0286 - val_loss: 0.1424 - val_mae: 0.1281 - val_mse: 0.0309
Epoch 4/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 689us/step - loss: 0.1381 - mae: 0.1240 - mse: 0.0278 - val_loss: 0.1316 - val_mae: 0.1180 - val_mse: 0.0257
Epoch 5/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 693us/step - loss: 0.1300 - mae: 0.1166 - mse: 0.0251 - val_loss: 0.1336 - val_mae: 0.1206 - val_mse: 0.0255
Epoch 6/100
[1m308/308

In [19]:
cv_results

{'avg_mae': 0.109099224,
 'std_mae': 0.003003221,
 'avg_mse': 0.022822645,
 'std_mse': 0.0016381636,
 'avg_r2': 0.9322552306664029,
 'std_r2': 0.004790306009933363,
 'avg_tol90': 1.3353492259979247,
 'std_tol90': 0.020452509554258386,
 'cv_scores': {'mae': [0.10957691,
   0.10651408,
   0.10750166,
   0.11474446,
   0.10715896],
  'mse': [0.022413583, 0.021979269, 0.021708783, 0.026067216, 0.021944378],
  'r2': [0.932395797492249,
   0.9348832884736349,
   0.935731378504229,
   0.9229592508228391,
   0.9353064380390621],
  'tol90': [1.3217065334320068,
   1.3192760944366455,
   1.3668313026428223,
   1.352554440498352,
   1.3163777589797974]}}

In [53]:
from sklearn.svm import SVR

svr_param_grid = {
    'C': [0.01, 0.1, 1, 10, 100, 1000],
    'epsilon': [0.001, 0.01, 0.1, 0.5, 1, 2]
}

svr_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=SVR(),
    param_grid=svr_param_grid
)

Performing GridSearch...





Best parameters: {'C': 1, 'epsilon': 0.01}

Performing cross-validation...


Fold 5 - MAE: 0.1044, MSE: 0.0216, R2: 0.9362, TOL90: 0.2364: 100%|██████████| 5/5 [00:28<00:00,  5.73s/it]


In [54]:
from sklearn.linear_model import Ridge

ridge_param_grid = {
    'alpha': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
}

ridge_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=Ridge(random_state=42),
    param_grid=ridge_param_grid
)

Performing GridSearch...

Best parameters: {'alpha': 0.001}

Performing cross-validation...


Fold 5 - MAE: 0.1217, MSE: 0.0266, R2: 0.9216, TOL90: 0.2564: 100%|██████████| 5/5 [00:00<00:00, 39.98it/s]


In [40]:
def load_and_evaluate_model(data_df, features_dict, model_path, n_splits=5, batch_size=32):
    """
    Load saved model and perform cross validation
    
    Args:
        data_df: Pandas DataFrame containing the data
        features_dict: Dictionary of features by category
        model_path: Path to saved model file
        n_splits: Number of CV folds
        batch_size: Batch size for evaluation
    
    Returns:
        Dictionary of metrics averaged across folds, including tol90
    """
    # Load the saved model
    model = torch.load(model_path)
    model.eval()
    
    # Initialize feature arrays and dimensions
    feature_arrays = {}
    feature_dims = {}
    
    # Process each feature category
    for category in ['chemical', 'time', 'process', 'model']:
        available_features = [col for col in features_dict[category] 
                            if col in data_df.columns]
        
        if available_features:
            feature_arrays[category] = data_df[available_features].values.astype(np.float32)
            feature_dims[category] = len(available_features)
        else:
            feature_arrays[category] = np.zeros((len(data_df), 0), dtype=np.float32)
            feature_dims[category] = 0
    
    # Prepare targets
    targets = data_df['r_value'].values
    
    # Initialize KFold
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    # Lists to store metrics for each fold
    fold_metrics = {
        'mae': [],
        'mse': [],
        'rmse': [],
        'r2': [],
        'tol90': []  # Add tol90 to metrics
    }
    
    # Cross validation loop
    for fold, (train_idx, val_idx) in enumerate(kfold.split(targets)):
        # Prepare validation tensors for this fold
        val_tensors = {
            'chemical': torch.FloatTensor(feature_arrays['chemical'][val_idx]),
            'time': torch.FloatTensor(feature_arrays['time'][val_idx]),
            'process': torch.FloatTensor(feature_arrays['process'][val_idx]),
            'model': torch.FloatTensor(feature_arrays['model'][val_idx])
        }
        
        val_targets = torch.FloatTensor(targets[val_idx])
        
        # Create validation DataLoader
        val_dataset = TensorDataset(
            val_tensors['chemical'],
            val_tensors['time'],
            val_tensors['process'],
            val_tensors['model'],
            val_targets
        )
        val_loader = DataLoader(val_dataset, batch_size=batch_size)
        
        # Evaluation for this fold
        predictions = []
        actuals = []
        
        with torch.no_grad():
            for batch_chem, batch_time, batch_proc, batch_model, batch_targets in val_loader:
                outputs = model(batch_chem, batch_time, batch_proc, batch_model)
                predictions.extend(outputs.numpy().flatten())
                actuals.extend(batch_targets.numpy().flatten())
        
        # Calculate metrics for this fold
        predictions = np.array(predictions)
        actuals = np.array(actuals)
        
        mae = mean_absolute_error(actuals, predictions)
        mse = mean_squared_error(actuals, predictions)
        rmse = np.sqrt(mse)
        r2 = r2_score(actuals, predictions)
        
        # Calculate tol90 (90th percentile of absolute errors)
        abs_errors = np.abs(actuals - predictions)
        tol90 = np.percentile(abs_errors, 90)
        
        fold_metrics['mae'].append(mae)
        fold_metrics['mse'].append(mse)
        fold_metrics['rmse'].append(rmse)
        fold_metrics['r2'].append(r2)
        fold_metrics['tol90'].append(tol90)
        
        print(f"Fold {fold+1} - MAE: {mae:.4f}, MSE: {mse:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}, TOL90: {tol90:.4f}")
    
    # Calculate and return average metrics
    avg_metrics = {
        'mae': np.mean(fold_metrics['mae']),
        'mae_std': np.std(fold_metrics['mae']),
        'mse': np.mean(fold_metrics['mse']),
        'mse_std': np.std(fold_metrics['mse']), 
        'rmse': np.mean(fold_metrics['rmse']),
        'rmse_std': np.std(fold_metrics['rmse']),
        'r2': np.mean(fold_metrics['r2']),
        'r2_std': np.std(fold_metrics['r2']),
        'tol90': np.mean(fold_metrics['tol90']),  # Add average tol90
        'tol90_std': np.std(fold_metrics['tol90'])  # Add tol90 standard deviation
    }
    
    print("\nAverage Metrics across folds:")
    print(f"MAE: {avg_metrics['mae']:.4f} ± {avg_metrics['mae_std']:.4f}")
    print(f"MSE: {avg_metrics['mse']:.4f} ± {avg_metrics['mse_std']:.4f}")
    print(f"RMSE: {avg_metrics['rmse']:.4f} ± {avg_metrics['rmse_std']:.4f}")
    print(f"R2: {avg_metrics['r2']:.4f} ± {avg_metrics['r2_std']:.4f}")
    print(f"TOL90: {avg_metrics['tol90']:.4f} ± {avg_metrics['tol90_std']:.4f}")
    
    return avg_metrics

In [15]:
# labeling the features for each branch
features = [col for col in df.columns if col not in ['r_value', 'steel_family', 'steel_grade']]
features_dict = {
   'time': [col for col in features if 'time' in col.lower()], 
   'chemical': ['pct_al', 'pct_b', 'pct_c', 'pct_cr', 'pct_mn', 'pct_n', 'pct_nb', 'pct_si', 'pct_ti', 'pct_v', 'mfia_coil_frac_fer', 'mfia_et1_frac_fer', 'mfia_et2_frac_fer'],
   'model': ["rm", "ag", "a80", "n_value"]
}
features_dict['process'] = [col for col in features if col not in features_dict['time'] and col not in features_dict['chemical']]

In [16]:
class MultiBranchSteelRegressor(nn.Module):
    def __init__(self, chemical_dim, time_dim, process_dim, model_dim, hidden_units=64, dropout_rate=0.2):
        super().__init__()
        # Track which branches are active
        self.has_chemical = chemical_dim > 0
        self.has_time = time_dim > 0
        self.has_process = process_dim > 0
        self.has_model = model_dim > 0
        
        # Count active branches
        self.active_branches = sum([self.has_chemical, self.has_time, self.has_process, self.has_model])
        
        # Adjust hidden units for each branch
        self.branch_hidden = min(hidden_units, max(16, hidden_units // 2))
        
        # Creating branch
        def create_branch(input_dim):
            return nn.Sequential(
                nn.Linear(input_dim, self.branch_hidden),
                nn.BatchNorm1d(self.branch_hidden),
                nn.ReLU(),
                nn.Dropout(dropout_rate)
            )
        
        # Only create branches that have features
        if self.has_chemical:
            self.chemical_branch = create_branch(chemical_dim)
        if self.has_time:
            self.time_branch = create_branch(time_dim)
        if self.has_process:
            self.process_branch = create_branch(process_dim)
        if self.has_model:
            self.model_branch = create_branch(model_dim)
        
        # Combined input dimension based on active branches only
        combined_dim = self.branch_hidden * self.active_branches
        
        # Final layers after concatenation
        self.final_layers = nn.Sequential(
            nn.Linear(combined_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 1)
        )
    
    def forward(self, chemical, time, process, model):
        features = []
        # Only process branches that have features
        if self.has_chemical:
            if chemical.dim() == 1:
                chemical = chemical.unsqueeze(0)
            features.append(self.chemical_branch(chemical))
        
        if self.has_time:
            if time.dim() == 1:
                time = time.unsqueeze(0)
            features.append(self.time_branch(time))
        
        if self.has_process:
            if process.dim() == 1:
                process = process.unsqueeze(0)
            features.append(self.process_branch(process))
        
        if self.has_model:
            if model.dim() == 1:
                model = model.unsqueeze(0)
            features.append(self.model_branch(model))
        
        # Concatenate only active features
        combined = torch.cat(features, dim=1) if len(features) > 1 else features[0]
        return self.final_layers(combined)

In [17]:
def train_model_regular(df, features_dict, num_epochs, hyperparameters, use_l2=False):
    batch_size = hyperparameters['batch_size']
    
    # Initialize feature arrays and dimensions
    feature_arrays = {}
    feature_dims = {}
    
    # Process each feature category
    for category in ['chemical', 'time', 'process', 'model']:
        available_features = [col for col in features_dict[category] 
                            if col in df.columns]
        
        if available_features:
            feature_arrays[category] = df[available_features].values.astype(np.float32)
            feature_dims[category] = len(available_features)
        else:
            feature_arrays[category] = np.zeros((len(df), 0), dtype=np.float32)
            feature_dims[category] = 0
    
    # Prepare targets
    targets = df['r_value'].values
    
    # Split data
    split_data = train_test_split(
        feature_arrays['chemical'],
        feature_arrays['time'],
        feature_arrays['process'],
        feature_arrays['model'],
        targets,
        test_size=0.2,
        random_state=42
    )
    
    (X_train_chem, X_test_chem, X_train_time, X_test_time, 
     X_train_proc, X_test_proc, X_train_model, X_test_model, 
     y_train, y_test) = split_data
    
    # Convert to tensors
    train_tensors = {
        'chemical': torch.FloatTensor(X_train_chem),
        'time': torch.FloatTensor(X_train_time),
        'process': torch.FloatTensor(X_train_proc),
        'model': torch.FloatTensor(X_train_model)
    }
    
    test_tensors = {
        'chemical': torch.FloatTensor(X_test_chem),
        'time': torch.FloatTensor(X_test_time),
        'process': torch.FloatTensor(X_test_proc),
        'model': torch.FloatTensor(X_test_model)
    }
    
    y_train_tensor = torch.FloatTensor(y_train)
    y_test_tensor = torch.FloatTensor(y_test)
    
    # Create DataLoader
    train_dataset = TensorDataset(
        train_tensors['chemical'],
        train_tensors['time'],
        train_tensors['process'],
        train_tensors['model'],
        y_train_tensor
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    # Initialize model
    model = MultiBranchSteelRegressor(
        chemical_dim=feature_dims['chemical'],
        time_dim=feature_dims['time'],
        process_dim=feature_dims['process'],
        model_dim=feature_dims['model'],
        hidden_units=hyperparameters['hidden_units'],
        dropout_rate=hyperparameters['dropout_rate']
    )
    
    if use_l2:
        weight_decay = 0.001
    else:
        weight_decay = 0.0
    
    optimizer = torch.optim.AdamW(model.parameters(), weight_decay=weight_decay)
    criterion = nn.L1Loss()
    
    # Training loop
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_chem, batch_time, batch_proc, batch_model, batch_targets in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_chem, batch_time, batch_proc, batch_model)
            loss = criterion(outputs, batch_targets.unsqueeze(1))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")
    
    # Evaluation
    model.eval()
    with torch.no_grad():
        y_pred = model(
            test_tensors['chemical'],
            test_tensors['time'],
            test_tensors['process'],
            test_tensors['model']
        )
        test_loss = criterion(y_pred, y_test_tensor.unsqueeze(1)).item()
        y_pred_np = y_pred.numpy().flatten()
        r2 = r2_score(y_test, y_pred_np)
        mae = mean_absolute_error(y_test, y_pred_np)
        mse = mean_squared_error(y_test, y_pred_np)
        
        metrics = {
            'r2_score': r2,
            'mae': mae,
            'mse': mse,
            'test_loss': test_loss
        }
        print(f"Evaluation - Test Loss: {test_loss:.4f}, R2: {r2:.4f}")
    
    return model, metrics

In [18]:
from sklearn.model_selection import ParameterGrid

param_grid = {
    'learning_rate': [0.1, 0.01, 1e-3],
    'batch_size': [16, 32, 64],
    'hidden_units': [64, 128, 256],
    'dropout_rate': [0, 0.2]
}
grid = ParameterGrid(param_grid)

In [19]:
# num_epochs = 100
# best_params = None
# best_results = {'mae': float('inf')}

# for params in tqdm(grid, desc="Grid Search Progress", leave=True):
#     print(f"Evaluating hyperparameters: {params}")
    
#     model, metrics = train_model_regular(train_scaled_df, features_dict, num_epochs, params)
#     mae = metrics['mae']
    
#     if best_params is None or mae < best_results['mae']:
#         best_results = {
#             'mae': mae,
#             'metrics': metrics
#         }
#         best_params = params

# print(f"Best parameters found: {best_params}")
# print(f"Best MAE: {best_results['mae']:.4f}")


Grid Search Progress:   0%|          | 0/54 [00:00<?, ?it/s]

Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1243
Epoch [20/100], Loss: 0.1143
Epoch [30/100], Loss: 0.1085
Epoch [40/100], Loss: 0.1073
Epoch [50/100], Loss: 0.1052
Epoch [60/100], Loss: 0.1029
Epoch [70/100], Loss: 0.1026
Epoch [80/100], Loss: 0.1008
Epoch [90/100], Loss: 0.0996


Grid Search Progress:   2%|▏         | 1/54 [04:20<3:49:50, 260.20s/it]

Epoch [100/100], Loss: 0.0991
Evaluation - Test Loss: 0.1047, R2: 0.9341
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1231
Epoch [20/100], Loss: 0.1148
Epoch [30/100], Loss: 0.1100
Epoch [40/100], Loss: 0.1080
Epoch [50/100], Loss: 0.1048
Epoch [60/100], Loss: 0.1030
Epoch [70/100], Loss: 0.1015
Epoch [80/100], Loss: 0.1001
Epoch [90/100], Loss: 0.1000


Grid Search Progress:   4%|▎         | 2/54 [08:27<3:38:49, 252.49s/it]

Epoch [100/100], Loss: 0.0999
Evaluation - Test Loss: 0.1021, R2: 0.9362
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1238
Epoch [20/100], Loss: 0.1142
Epoch [30/100], Loss: 0.1084
Epoch [40/100], Loss: 0.1069
Epoch [50/100], Loss: 0.1049
Epoch [60/100], Loss: 0.1041
Epoch [70/100], Loss: 0.1025
Epoch [80/100], Loss: 0.1010
Epoch [90/100], Loss: 0.0998


Grid Search Progress:   6%|▌         | 3/54 [11:38<3:10:46, 224.44s/it]

Epoch [100/100], Loss: 0.1004
Evaluation - Test Loss: 0.1017, R2: 0.9368
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1223
Epoch [20/100], Loss: 0.1129
Epoch [30/100], Loss: 0.1081
Epoch [40/100], Loss: 0.1055
Epoch [50/100], Loss: 0.1036
Epoch [60/100], Loss: 0.1026
Epoch [70/100], Loss: 0.0996
Epoch [80/100], Loss: 0.0990
Epoch [90/100], Loss: 0.0981


Grid Search Progress:   7%|▋         | 4/54 [14:59<2:59:18, 215.16s/it]

Epoch [100/100], Loss: 0.0961
Evaluation - Test Loss: 0.1012, R2: 0.9374
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1201
Epoch [20/100], Loss: 0.1114
Epoch [30/100], Loss: 0.1085
Epoch [40/100], Loss: 0.1063
Epoch [50/100], Loss: 0.1027
Epoch [60/100], Loss: 0.1017
Epoch [70/100], Loss: 0.1003
Epoch [80/100], Loss: 0.0988
Epoch [90/100], Loss: 0.0970


Grid Search Progress:   9%|▉         | 5/54 [18:16<2:50:28, 208.75s/it]

Epoch [100/100], Loss: 0.0965
Evaluation - Test Loss: 0.1030, R2: 0.9366
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1237
Epoch [20/100], Loss: 0.1127
Epoch [30/100], Loss: 0.1089
Epoch [40/100], Loss: 0.1061
Epoch [50/100], Loss: 0.1033
Epoch [60/100], Loss: 0.1012
Epoch [70/100], Loss: 0.1009
Epoch [80/100], Loss: 0.0987
Epoch [90/100], Loss: 0.0987


Grid Search Progress:  11%|█         | 6/54 [21:32<2:43:30, 204.40s/it]

Epoch [100/100], Loss: 0.0972
Evaluation - Test Loss: 0.1033, R2: 0.9362
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1234
Epoch [20/100], Loss: 0.1147
Epoch [30/100], Loss: 0.1085
Epoch [40/100], Loss: 0.1049
Epoch [50/100], Loss: 0.1018
Epoch [60/100], Loss: 0.0997
Epoch [70/100], Loss: 0.0988
Epoch [80/100], Loss: 0.0977
Epoch [90/100], Loss: 0.0966


Grid Search Progress:  13%|█▎        | 7/54 [24:56<2:39:57, 204.20s/it]

Epoch [100/100], Loss: 0.0961
Evaluation - Test Loss: 0.1042, R2: 0.9332
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1224
Epoch [20/100], Loss: 0.1120
Epoch [30/100], Loss: 0.1078
Epoch [40/100], Loss: 0.1052
Epoch [50/100], Loss: 0.1031
Epoch [60/100], Loss: 0.1003
Epoch [70/100], Loss: 0.0985
Epoch [80/100], Loss: 0.0978
Epoch [90/100], Loss: 0.0970


Grid Search Progress:  15%|█▍        | 8/54 [28:22<2:37:03, 204.86s/it]

Epoch [100/100], Loss: 0.0945
Evaluation - Test Loss: 0.1023, R2: 0.9359
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1231
Epoch [20/100], Loss: 0.1122
Epoch [30/100], Loss: 0.1077
Epoch [40/100], Loss: 0.1043
Epoch [50/100], Loss: 0.1020
Epoch [60/100], Loss: 0.1003
Epoch [70/100], Loss: 0.0989
Epoch [80/100], Loss: 0.0972
Epoch [90/100], Loss: 0.0964


Grid Search Progress:  17%|█▋        | 9/54 [32:04<2:37:38, 210.19s/it]

Epoch [100/100], Loss: 0.0945
Evaluation - Test Loss: 0.1040, R2: 0.9333
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1628
Epoch [20/100], Loss: 0.1495
Epoch [30/100], Loss: 0.1413
Epoch [40/100], Loss: 0.1396
Epoch [50/100], Loss: 0.1380
Epoch [60/100], Loss: 0.1351
Epoch [70/100], Loss: 0.1330
Epoch [80/100], Loss: 0.1322
Epoch [90/100], Loss: 0.1337


Grid Search Progress:  19%|█▊        | 10/54 [35:28<2:32:38, 208.14s/it]

Epoch [100/100], Loss: 0.1321
Evaluation - Test Loss: 0.1193, R2: 0.9218
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1625
Epoch [20/100], Loss: 0.1459
Epoch [30/100], Loss: 0.1422
Epoch [40/100], Loss: 0.1393
Epoch [50/100], Loss: 0.1390
Epoch [60/100], Loss: 0.1352
Epoch [70/100], Loss: 0.1350
Epoch [80/100], Loss: 0.1345
Epoch [90/100], Loss: 0.1342


Grid Search Progress:  20%|██        | 11/54 [38:52<2:28:17, 206.93s/it]

Epoch [100/100], Loss: 0.1320
Evaluation - Test Loss: 0.1077, R2: 0.9324
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1586
Epoch [20/100], Loss: 0.1466
Epoch [30/100], Loss: 0.1445
Epoch [40/100], Loss: 0.1388
Epoch [50/100], Loss: 0.1386
Epoch [60/100], Loss: 0.1356
Epoch [70/100], Loss: 0.1347
Epoch [80/100], Loss: 0.1365
Epoch [90/100], Loss: 0.1357


Grid Search Progress:  22%|██▏       | 12/54 [42:15<2:23:59, 205.70s/it]

Epoch [100/100], Loss: 0.1316
Evaluation - Test Loss: 0.1172, R2: 0.9227
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1567
Epoch [20/100], Loss: 0.1454
Epoch [30/100], Loss: 0.1418
Epoch [40/100], Loss: 0.1401
Epoch [50/100], Loss: 0.1382
Epoch [60/100], Loss: 0.1356
Epoch [70/100], Loss: 0.1321
Epoch [80/100], Loss: 0.1337
Epoch [90/100], Loss: 0.1346


Grid Search Progress:  24%|██▍       | 13/54 [45:43<2:21:00, 206.36s/it]

Epoch [100/100], Loss: 0.1317
Evaluation - Test Loss: 0.1182, R2: 0.9202
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1581
Epoch [20/100], Loss: 0.1470
Epoch [30/100], Loss: 0.1407
Epoch [40/100], Loss: 0.1386
Epoch [50/100], Loss: 0.1369
Epoch [60/100], Loss: 0.1331
Epoch [70/100], Loss: 0.1338
Epoch [80/100], Loss: 0.1335
Epoch [90/100], Loss: 0.1326


Grid Search Progress:  26%|██▌       | 14/54 [49:11<2:17:55, 206.89s/it]

Epoch [100/100], Loss: 0.1310
Evaluation - Test Loss: 0.1146, R2: 0.9240
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1603
Epoch [20/100], Loss: 0.1449
Epoch [30/100], Loss: 0.1379
Epoch [40/100], Loss: 0.1355
Epoch [50/100], Loss: 0.1364
Epoch [60/100], Loss: 0.1343
Epoch [70/100], Loss: 0.1358
Epoch [80/100], Loss: 0.1333
Epoch [90/100], Loss: 0.1307


Grid Search Progress:  28%|██▊       | 15/54 [52:39<2:14:47, 207.38s/it]

Epoch [100/100], Loss: 0.1312
Evaluation - Test Loss: 0.1162, R2: 0.9245
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1566
Epoch [20/100], Loss: 0.1420
Epoch [30/100], Loss: 0.1374
Epoch [40/100], Loss: 0.1377
Epoch [50/100], Loss: 0.1343
Epoch [60/100], Loss: 0.1322
Epoch [70/100], Loss: 0.1336
Epoch [80/100], Loss: 0.1334
Epoch [90/100], Loss: 0.1316


Grid Search Progress:  30%|██▉       | 16/54 [56:14<2:12:48, 209.71s/it]

Epoch [100/100], Loss: 0.1315
Evaluation - Test Loss: 0.1057, R2: 0.9356
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1578
Epoch [20/100], Loss: 0.1440
Epoch [30/100], Loss: 0.1367
Epoch [40/100], Loss: 0.1353
Epoch [50/100], Loss: 0.1324
Epoch [60/100], Loss: 0.1299
Epoch [70/100], Loss: 0.1295
Epoch [80/100], Loss: 0.1302
Epoch [90/100], Loss: 0.1276


Grid Search Progress:  31%|███▏      | 17/54 [59:51<2:10:33, 211.72s/it]

Epoch [100/100], Loss: 0.1273
Evaluation - Test Loss: 0.1141, R2: 0.9256
Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1550
Epoch [20/100], Loss: 0.1441
Epoch [30/100], Loss: 0.1377
Epoch [40/100], Loss: 0.1369
Epoch [50/100], Loss: 0.1322
Epoch [60/100], Loss: 0.1323
Epoch [70/100], Loss: 0.1293
Epoch [80/100], Loss: 0.1279
Epoch [90/100], Loss: 0.1281


Grid Search Progress:  33%|███▎      | 18/54 [1:03:26<2:07:43, 212.87s/it]

Epoch [100/100], Loss: 0.1286
Evaluation - Test Loss: 0.1164, R2: 0.9243
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1186
Epoch [20/100], Loss: 0.1118
Epoch [30/100], Loss: 0.1059
Epoch [40/100], Loss: 0.1031
Epoch [50/100], Loss: 0.0995
Epoch [60/100], Loss: 0.0988
Epoch [70/100], Loss: 0.0971
Epoch [80/100], Loss: 0.0962
Epoch [90/100], Loss: 0.0932


Grid Search Progress:  35%|███▌      | 19/54 [1:05:13<1:45:35, 181.03s/it]

Epoch [100/100], Loss: 0.0932
Evaluation - Test Loss: 0.1045, R2: 0.9347
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1190
Epoch [20/100], Loss: 0.1107
Epoch [30/100], Loss: 0.1058
Epoch [40/100], Loss: 0.1018
Epoch [50/100], Loss: 0.1003
Epoch [60/100], Loss: 0.0983
Epoch [70/100], Loss: 0.0966
Epoch [80/100], Loss: 0.0946
Epoch [90/100], Loss: 0.0932


Grid Search Progress:  37%|███▋      | 20/54 [1:06:58<1:29:37, 158.18s/it]

Epoch [100/100], Loss: 0.0912
Evaluation - Test Loss: 0.1029, R2: 0.9330
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1195
Epoch [20/100], Loss: 0.1094
Epoch [30/100], Loss: 0.1055
Epoch [40/100], Loss: 0.1028
Epoch [50/100], Loss: 0.1007
Epoch [60/100], Loss: 0.0970
Epoch [70/100], Loss: 0.0972
Epoch [80/100], Loss: 0.0951
Epoch [90/100], Loss: 0.0928


Grid Search Progress:  39%|███▉      | 21/54 [1:08:45<1:18:31, 142.76s/it]

Epoch [100/100], Loss: 0.0929
Evaluation - Test Loss: 0.1068, R2: 0.9335
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1152
Epoch [20/100], Loss: 0.1083
Epoch [30/100], Loss: 0.1039
Epoch [40/100], Loss: 0.1004
Epoch [50/100], Loss: 0.0983
Epoch [60/100], Loss: 0.0957
Epoch [70/100], Loss: 0.0925
Epoch [80/100], Loss: 0.0911
Epoch [90/100], Loss: 0.0909


Grid Search Progress:  41%|████      | 22/54 [1:10:32<1:10:26, 132.08s/it]

Epoch [100/100], Loss: 0.0884
Evaluation - Test Loss: 0.1034, R2: 0.9338
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1165
Epoch [20/100], Loss: 0.1068
Epoch [30/100], Loss: 0.1020
Epoch [40/100], Loss: 0.0992
Epoch [50/100], Loss: 0.0976
Epoch [60/100], Loss: 0.0949
Epoch [70/100], Loss: 0.0930
Epoch [80/100], Loss: 0.0904
Epoch [90/100], Loss: 0.0895


Grid Search Progress:  43%|████▎     | 23/54 [1:12:20<1:04:26, 124.72s/it]

Epoch [100/100], Loss: 0.0866
Evaluation - Test Loss: 0.1081, R2: 0.9297
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1176
Epoch [20/100], Loss: 0.1103
Epoch [30/100], Loss: 0.1048
Epoch [40/100], Loss: 0.0995
Epoch [50/100], Loss: 0.0982
Epoch [60/100], Loss: 0.0954
Epoch [70/100], Loss: 0.0937
Epoch [80/100], Loss: 0.0917
Epoch [90/100], Loss: 0.0900


Grid Search Progress:  44%|████▍     | 24/54 [1:14:07<59:47, 119.59s/it]  

Epoch [100/100], Loss: 0.0886
Evaluation - Test Loss: 0.1051, R2: 0.9325
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1171
Epoch [20/100], Loss: 0.1081
Epoch [30/100], Loss: 0.1032
Epoch [40/100], Loss: 0.0999
Epoch [50/100], Loss: 0.0968
Epoch [60/100], Loss: 0.0932
Epoch [70/100], Loss: 0.0906
Epoch [80/100], Loss: 0.0889
Epoch [90/100], Loss: 0.0872


Grid Search Progress:  46%|████▋     | 25/54 [1:16:01<56:54, 117.74s/it]

Epoch [100/100], Loss: 0.0852
Evaluation - Test Loss: 0.1043, R2: 0.9342
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1152
Epoch [20/100], Loss: 0.1084
Epoch [30/100], Loss: 0.1033
Epoch [40/100], Loss: 0.0988
Epoch [50/100], Loss: 0.0961
Epoch [60/100], Loss: 0.0938
Epoch [70/100], Loss: 0.0916
Epoch [80/100], Loss: 0.0888
Epoch [90/100], Loss: 0.0887


Grid Search Progress:  48%|████▊     | 26/54 [1:17:53<54:12, 116.16s/it]

Epoch [100/100], Loss: 0.0861
Evaluation - Test Loss: 0.1070, R2: 0.9310
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1156
Epoch [20/100], Loss: 0.1078
Epoch [30/100], Loss: 0.1025
Epoch [40/100], Loss: 0.0999
Epoch [50/100], Loss: 0.0966
Epoch [60/100], Loss: 0.0944
Epoch [70/100], Loss: 0.0909
Epoch [80/100], Loss: 0.0900
Epoch [90/100], Loss: 0.0893


Grid Search Progress:  50%|█████     | 27/54 [1:19:45<51:42, 114.90s/it]

Epoch [100/100], Loss: 0.0873
Evaluation - Test Loss: 0.1047, R2: 0.9342
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1638
Epoch [20/100], Loss: 0.1457
Epoch [30/100], Loss: 0.1366
Epoch [40/100], Loss: 0.1356
Epoch [50/100], Loss: 0.1333
Epoch [60/100], Loss: 0.1305
Epoch [70/100], Loss: 0.1297
Epoch [80/100], Loss: 0.1298
Epoch [90/100], Loss: 0.1275


Grid Search Progress:  52%|█████▏    | 28/54 [1:21:37<49:23, 113.98s/it]

Epoch [100/100], Loss: 0.1268
Evaluation - Test Loss: 0.1087, R2: 0.9300
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1632
Epoch [20/100], Loss: 0.1434
Epoch [30/100], Loss: 0.1354
Epoch [40/100], Loss: 0.1361
Epoch [50/100], Loss: 0.1316
Epoch [60/100], Loss: 0.1304
Epoch [70/100], Loss: 0.1284
Epoch [80/100], Loss: 0.1273
Epoch [90/100], Loss: 0.1252


Grid Search Progress:  54%|█████▎    | 29/54 [1:23:28<47:10, 113.23s/it]

Epoch [100/100], Loss: 0.1260
Evaluation - Test Loss: 0.1049, R2: 0.9351
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1619
Epoch [20/100], Loss: 0.1421
Epoch [30/100], Loss: 0.1354
Epoch [40/100], Loss: 0.1329
Epoch [50/100], Loss: 0.1304
Epoch [60/100], Loss: 0.1284
Epoch [70/100], Loss: 0.1285
Epoch [80/100], Loss: 0.1260
Epoch [90/100], Loss: 0.1245


Grid Search Progress:  56%|█████▌    | 30/54 [1:25:19<45:01, 112.57s/it]

Epoch [100/100], Loss: 0.1239
Evaluation - Test Loss: 0.1209, R2: 0.9154
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1546
Epoch [20/100], Loss: 0.1375
Epoch [30/100], Loss: 0.1341
Epoch [40/100], Loss: 0.1292
Epoch [50/100], Loss: 0.1281
Epoch [60/100], Loss: 0.1277
Epoch [70/100], Loss: 0.1243
Epoch [80/100], Loss: 0.1238
Epoch [90/100], Loss: 0.1231


Grid Search Progress:  57%|█████▋    | 31/54 [1:27:14<43:22, 113.16s/it]

Epoch [100/100], Loss: 0.1199
Evaluation - Test Loss: 0.1093, R2: 0.9302
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1584
Epoch [20/100], Loss: 0.1403
Epoch [30/100], Loss: 0.1355
Epoch [40/100], Loss: 0.1292
Epoch [50/100], Loss: 0.1266
Epoch [60/100], Loss: 0.1259
Epoch [70/100], Loss: 0.1241
Epoch [80/100], Loss: 0.1220
Epoch [90/100], Loss: 0.1234


Grid Search Progress:  59%|█████▉    | 32/54 [1:29:09<41:38, 113.58s/it]

Epoch [100/100], Loss: 0.1208
Evaluation - Test Loss: 0.1058, R2: 0.9344
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1551
Epoch [20/100], Loss: 0.1373
Epoch [30/100], Loss: 0.1330
Epoch [40/100], Loss: 0.1319
Epoch [50/100], Loss: 0.1280
Epoch [60/100], Loss: 0.1262
Epoch [70/100], Loss: 0.1252
Epoch [80/100], Loss: 0.1232
Epoch [90/100], Loss: 0.1216


Grid Search Progress:  61%|██████    | 33/54 [1:31:03<39:48, 113.73s/it]

Epoch [100/100], Loss: 0.1229
Evaluation - Test Loss: 0.1064, R2: 0.9326
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1573
Epoch [20/100], Loss: 0.1396
Epoch [30/100], Loss: 0.1317
Epoch [40/100], Loss: 0.1262
Epoch [50/100], Loss: 0.1273
Epoch [60/100], Loss: 0.1260
Epoch [70/100], Loss: 0.1227
Epoch [80/100], Loss: 0.1235
Epoch [90/100], Loss: 0.1200


Grid Search Progress:  63%|██████▎   | 34/54 [1:33:02<38:30, 115.52s/it]

Epoch [100/100], Loss: 0.1197
Evaluation - Test Loss: 0.1033, R2: 0.9353
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1554
Epoch [20/100], Loss: 0.1386
Epoch [30/100], Loss: 0.1318
Epoch [40/100], Loss: 0.1286
Epoch [50/100], Loss: 0.1243
Epoch [60/100], Loss: 0.1253
Epoch [70/100], Loss: 0.1228
Epoch [80/100], Loss: 0.1215
Epoch [90/100], Loss: 0.1215


Grid Search Progress:  65%|██████▍   | 35/54 [1:35:02<36:57, 116.71s/it]

Epoch [100/100], Loss: 0.1198
Evaluation - Test Loss: 0.1078, R2: 0.9316
Evaluating hyperparameters: {'batch_size': 32, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1503
Epoch [20/100], Loss: 0.1371
Epoch [30/100], Loss: 0.1317
Epoch [40/100], Loss: 0.1290
Epoch [50/100], Loss: 0.1256
Epoch [60/100], Loss: 0.1225
Epoch [70/100], Loss: 0.1219
Epoch [80/100], Loss: 0.1190
Epoch [90/100], Loss: 0.1177


Grid Search Progress:  67%|██████▋   | 36/54 [1:37:02<35:18, 117.69s/it]

Epoch [100/100], Loss: 0.1184
Evaluation - Test Loss: 0.1013, R2: 0.9381
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1122
Epoch [20/100], Loss: 0.1061
Epoch [30/100], Loss: 0.1020
Epoch [40/100], Loss: 0.0998
Epoch [50/100], Loss: 0.0955
Epoch [60/100], Loss: 0.0945
Epoch [70/100], Loss: 0.0918
Epoch [80/100], Loss: 0.0893
Epoch [90/100], Loss: 0.0881


Grid Search Progress:  69%|██████▊   | 37/54 [1:38:04<28:36, 100.99s/it]

Epoch [100/100], Loss: 0.0859
Evaluation - Test Loss: 0.1053, R2: 0.9326
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1130
Epoch [20/100], Loss: 0.1047
Epoch [30/100], Loss: 0.1023
Epoch [40/100], Loss: 0.0987
Epoch [50/100], Loss: 0.0961
Epoch [60/100], Loss: 0.0949
Epoch [70/100], Loss: 0.0920
Epoch [80/100], Loss: 0.0908
Epoch [90/100], Loss: 0.0898


Grid Search Progress:  70%|███████   | 38/54 [1:39:07<23:52, 89.51s/it] 

Epoch [100/100], Loss: 0.0871
Evaluation - Test Loss: 0.1168, R2: 0.9192
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1136
Epoch [20/100], Loss: 0.1064
Epoch [30/100], Loss: 0.1010
Epoch [40/100], Loss: 0.0973
Epoch [50/100], Loss: 0.0946
Epoch [60/100], Loss: 0.0918
Epoch [70/100], Loss: 0.0908
Epoch [80/100], Loss: 0.0891
Epoch [90/100], Loss: 0.0874


Grid Search Progress:  72%|███████▏  | 39/54 [1:40:08<20:17, 81.15s/it]

Epoch [100/100], Loss: 0.0862
Evaluation - Test Loss: 0.1074, R2: 0.9299
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1116
Epoch [20/100], Loss: 0.1037
Epoch [30/100], Loss: 0.1006
Epoch [40/100], Loss: 0.0957
Epoch [50/100], Loss: 0.0911
Epoch [60/100], Loss: 0.0899
Epoch [70/100], Loss: 0.0880
Epoch [80/100], Loss: 0.0857
Epoch [90/100], Loss: 0.0824


Grid Search Progress:  74%|███████▍  | 40/54 [1:41:12<17:41, 75.83s/it]

Epoch [100/100], Loss: 0.0820
Evaluation - Test Loss: 0.1058, R2: 0.9321
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1106
Epoch [20/100], Loss: 0.1035
Epoch [30/100], Loss: 0.0988
Epoch [40/100], Loss: 0.0963
Epoch [50/100], Loss: 0.0929
Epoch [60/100], Loss: 0.0897
Epoch [70/100], Loss: 0.0888
Epoch [80/100], Loss: 0.0856
Epoch [90/100], Loss: 0.0833


Grid Search Progress:  76%|███████▌  | 41/54 [1:42:16<15:39, 72.27s/it]

Epoch [100/100], Loss: 0.0823
Evaluation - Test Loss: 0.1059, R2: 0.9326
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1125
Epoch [20/100], Loss: 0.1045
Epoch [30/100], Loss: 0.1014
Epoch [40/100], Loss: 0.0967
Epoch [50/100], Loss: 0.0923
Epoch [60/100], Loss: 0.0905
Epoch [70/100], Loss: 0.0889
Epoch [80/100], Loss: 0.0864
Epoch [90/100], Loss: 0.0835


Grid Search Progress:  78%|███████▊  | 42/54 [1:43:19<13:56, 69.74s/it]

Epoch [100/100], Loss: 0.0825
Evaluation - Test Loss: 0.1063, R2: 0.9302
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1129
Epoch [20/100], Loss: 0.1030
Epoch [30/100], Loss: 0.0998
Epoch [40/100], Loss: 0.0961
Epoch [50/100], Loss: 0.0908
Epoch [60/100], Loss: 0.0885
Epoch [70/100], Loss: 0.0858
Epoch [80/100], Loss: 0.0853
Epoch [90/100], Loss: 0.0810


Grid Search Progress:  80%|███████▉  | 43/54 [1:44:27<12:38, 68.99s/it]

Epoch [100/100], Loss: 0.0805
Evaluation - Test Loss: 0.1067, R2: 0.9306
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1127
Epoch [20/100], Loss: 0.1061
Epoch [30/100], Loss: 0.0989
Epoch [40/100], Loss: 0.0962
Epoch [50/100], Loss: 0.0917
Epoch [60/100], Loss: 0.0887
Epoch [70/100], Loss: 0.0857
Epoch [80/100], Loss: 0.0836
Epoch [90/100], Loss: 0.0797


Grid Search Progress:  81%|████████▏ | 44/54 [1:45:34<11:24, 68.45s/it]

Epoch [100/100], Loss: 0.0777
Evaluation - Test Loss: 0.1080, R2: 0.9300
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1097
Epoch [20/100], Loss: 0.1023
Epoch [30/100], Loss: 0.1000
Epoch [40/100], Loss: 0.0958
Epoch [50/100], Loss: 0.0922
Epoch [60/100], Loss: 0.0898
Epoch [70/100], Loss: 0.0870
Epoch [80/100], Loss: 0.0858
Epoch [90/100], Loss: 0.0821


Grid Search Progress:  83%|████████▎ | 45/54 [1:46:41<10:11, 67.94s/it]

Epoch [100/100], Loss: 0.0795
Evaluation - Test Loss: 0.1072, R2: 0.9318
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1611
Epoch [20/100], Loss: 0.1467
Epoch [30/100], Loss: 0.1391
Epoch [40/100], Loss: 0.1311
Epoch [50/100], Loss: 0.1291
Epoch [60/100], Loss: 0.1261
Epoch [70/100], Loss: 0.1240
Epoch [80/100], Loss: 0.1231
Epoch [90/100], Loss: 0.1244


Grid Search Progress:  85%|████████▌ | 46/54 [1:47:46<08:58, 67.30s/it]

Epoch [100/100], Loss: 0.1226
Evaluation - Test Loss: 0.1040, R2: 0.9350
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1611
Epoch [20/100], Loss: 0.1458
Epoch [30/100], Loss: 0.1390
Epoch [40/100], Loss: 0.1309
Epoch [50/100], Loss: 0.1258
Epoch [60/100], Loss: 0.1264
Epoch [70/100], Loss: 0.1259
Epoch [80/100], Loss: 0.1239
Epoch [90/100], Loss: 0.1225


Grid Search Progress:  87%|████████▋ | 47/54 [1:48:52<07:47, 66.75s/it]

Epoch [100/100], Loss: 0.1235
Evaluation - Test Loss: 0.1067, R2: 0.9324
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 64, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1651
Epoch [20/100], Loss: 0.1447
Epoch [30/100], Loss: 0.1366
Epoch [40/100], Loss: 0.1315
Epoch [50/100], Loss: 0.1282
Epoch [60/100], Loss: 0.1275
Epoch [70/100], Loss: 0.1244
Epoch [80/100], Loss: 0.1232
Epoch [90/100], Loss: 0.1242


Grid Search Progress:  89%|████████▉ | 48/54 [1:49:57<06:38, 66.34s/it]

Epoch [100/100], Loss: 0.1215
Evaluation - Test Loss: 0.1045, R2: 0.9352
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1611
Epoch [20/100], Loss: 0.1438
Epoch [30/100], Loss: 0.1351
Epoch [40/100], Loss: 0.1302
Epoch [50/100], Loss: 0.1257
Epoch [60/100], Loss: 0.1252
Epoch [70/100], Loss: 0.1217
Epoch [80/100], Loss: 0.1206
Epoch [90/100], Loss: 0.1208


Grid Search Progress:  91%|█████████ | 49/54 [1:51:05<05:33, 66.77s/it]

Epoch [100/100], Loss: 0.1197
Evaluation - Test Loss: 0.1040, R2: 0.9358
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1642
Epoch [20/100], Loss: 0.1450
Epoch [30/100], Loss: 0.1365
Epoch [40/100], Loss: 0.1287
Epoch [50/100], Loss: 0.1264
Epoch [60/100], Loss: 0.1221
Epoch [70/100], Loss: 0.1259
Epoch [80/100], Loss: 0.1220
Epoch [90/100], Loss: 0.1215


Grid Search Progress:  93%|█████████▎| 50/54 [1:52:12<04:27, 66.99s/it]

Epoch [100/100], Loss: 0.1204
Evaluation - Test Loss: 0.1026, R2: 0.9375
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 128, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1634
Epoch [20/100], Loss: 0.1467
Epoch [30/100], Loss: 0.1361
Epoch [40/100], Loss: 0.1314
Epoch [50/100], Loss: 0.1237
Epoch [60/100], Loss: 0.1233
Epoch [70/100], Loss: 0.1215
Epoch [80/100], Loss: 0.1205
Epoch [90/100], Loss: 0.1195


Grid Search Progress:  94%|█████████▍| 51/54 [1:53:21<03:22, 67.37s/it]

Epoch [100/100], Loss: 0.1183
Evaluation - Test Loss: 0.1047, R2: 0.9349
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.1}
Epoch [10/100], Loss: 0.1599
Epoch [20/100], Loss: 0.1404
Epoch [30/100], Loss: 0.1330
Epoch [40/100], Loss: 0.1282
Epoch [50/100], Loss: 0.1253
Epoch [60/100], Loss: 0.1240
Epoch [70/100], Loss: 0.1222
Epoch [80/100], Loss: 0.1202
Epoch [90/100], Loss: 0.1188


Grid Search Progress:  96%|█████████▋| 52/54 [1:54:33<02:17, 68.70s/it]

Epoch [100/100], Loss: 0.1182
Evaluation - Test Loss: 0.1032, R2: 0.9361
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.01}
Epoch [10/100], Loss: 0.1623
Epoch [20/100], Loss: 0.1399
Epoch [30/100], Loss: 0.1297
Epoch [40/100], Loss: 0.1229
Epoch [50/100], Loss: 0.1219
Epoch [60/100], Loss: 0.1185
Epoch [70/100], Loss: 0.1179
Epoch [80/100], Loss: 0.1168
Epoch [90/100], Loss: 0.1173


Grid Search Progress:  98%|█████████▊| 53/54 [1:55:44<01:09, 69.61s/it]

Epoch [100/100], Loss: 0.1134
Evaluation - Test Loss: 0.1032, R2: 0.9357
Evaluating hyperparameters: {'batch_size': 64, 'dropout_rate': 0.2, 'hidden_units': 256, 'learning_rate': 0.001}
Epoch [10/100], Loss: 0.1546
Epoch [20/100], Loss: 0.1345
Epoch [30/100], Loss: 0.1274
Epoch [40/100], Loss: 0.1219
Epoch [50/100], Loss: 0.1225
Epoch [60/100], Loss: 0.1176
Epoch [70/100], Loss: 0.1153
Epoch [80/100], Loss: 0.1146
Epoch [90/100], Loss: 0.1142


Grid Search Progress: 100%|██████████| 54/54 [1:56:56<00:00, 129.94s/it]

Epoch [100/100], Loss: 0.1136
Evaluation - Test Loss: 0.1046, R2: 0.9351
Best parameters found: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.1}
Best MAE: 0.1012





In [55]:
best_params_bmlp = {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 128, 'learning_rate': 0.1}

best_mlp = train_model_regular(train_scaled_df, features_dict, 100, best_params_bmlp)

Epoch [10/50], Loss: 0.1228
Epoch [20/50], Loss: 0.1116
Epoch [30/50], Loss: 0.1070
Epoch [40/50], Loss: 0.1042
Epoch [50/50], Loss: 0.1029
Evaluation - Test Loss: 0.1054, R2: 0.9339


In [38]:
torch.save(best_mlp[0], 'bmlp_baseline.pth')

In [56]:
# Perform cross validation
metrics = load_and_evaluate_model(
    data_df=train_scaled_df,
    features_dict=features_dict,
    model_path='bmlp_baseline.pth',
    n_splits=5,
    batch_size=32
)

Fold 1 - MAE: 0.1043, MSE: 0.0213, RMSE: 0.1459, R2: 0.9358, TOL90: 0.2363
Fold 2 - MAE: 0.0877, MSE: 0.0166, RMSE: 0.1287, R2: 0.9509, TOL90: 0.2100
Fold 3 - MAE: 0.0892, MSE: 0.0164, RMSE: 0.1281, R2: 0.9514, TOL90: 0.2175
Fold 4 - MAE: 0.0950, MSE: 0.0198, RMSE: 0.1407, R2: 0.9415, TOL90: 0.2226
Fold 5 - MAE: 0.0886, MSE: 0.0166, RMSE: 0.1287, R2: 0.9511, TOL90: 0.2133

Average Metrics across folds:
MAE: 0.0930 ± 0.0062
MSE: 0.0181 ± 0.0020
RMSE: 0.1345 ± 0.0074
R2: 0.9461 ± 0.0064
TOL90: 0.2199 ± 0.0092


In [20]:
def tol90(y_true, y_pred):
    errors = np.abs(y_true - y_pred)
    return np.percentile(errors, 90)

def density_scatter(x, y, bins=30, **kwargs):
    """Create a density scatter plot"""
    # Ensure inputs are 1D arrays
    x = np.asarray(x).flatten()
    y = np.asarray(y).flatten()
    data, x_e, y_e = np.histogram2d(x, y, bins=bins)
    z = interpn((0.5*(x_e[1:] + x_e[:-1]), 0.5*(y_e[1:] + y_e[:-1])),
                data.T/data.max(),
                np.vstack([x,y]).T,
                method="splinef2d",
                bounds_error=False)
    z[np.where(np.isnan(z))] = 0.0
    idx = z.argsort()
    x, y, z = x[idx], y[idx], z[idx]
    plt.scatter(x, y, c=z, **kwargs)

def plot_predicted_vs_actual(model, df, features_dict=None, model_type=None, target_column='r_value', title=None, figsize=(10, 8)):
    plt.figure(figsize=figsize)
    
    # Get actual values
    actual = df[target_column].values
    
    # Get predictions based on model type
    if model_type == 'pytorch_mlp':
        # Prepare feature arrays for PyTorch MLP
        feature_arrays = {}
        for category in features_dict.keys():
            available_features = [col for col in features_dict[category] 
                                if col in df.columns]
            if available_features:
                scaler = StandardScaler()
                feature_arrays[category] = scaler.fit_transform(df[available_features].values)
            else:
                feature_arrays[category] = np.zeros((len(df), 0))
        
        # Convert features to tensors
        input_tensors = {
            category: torch.FloatTensor(arr) for category, arr in feature_arrays.items()
        }
        
        # Make predictions
        model.eval()
        with torch.no_grad():
            predicted = model(**input_tensors).numpy().flatten()
            
    elif model_type == 'keras':
        X = df.drop([target_column], axis=1).values
        X = X.astype('float32')
        predicted = model.predict(X).flatten()
        
    else:  # sklearn
        X = df.drop([target_column], axis=1)
        predicted = model['model'].predict(X)
    
    # Calculate metrics
    r2 = r2_score(actual, predicted)
    mse = mean_squared_error(actual, predicted)
    mae = mean_absolute_error(actual, predicted)
    tol90_value = tol90(actual, predicted)
    
    # Create density scatter plot
    density_scatter(actual, predicted, bins=30, alpha=0.6)
    
    # Plot perfect prediction line
    min_val = min(actual.min(), predicted.min())
    max_val = max(actual.max(), predicted.max())
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Perfect Prediction')
    
    # Plot tolerance lines
    plt.plot([min_val, max_val], [min_val + tol90_value, max_val + tol90_value],
             'g--', lw=1.5, label=f'+tol90 ({tol90_value:.4f})')
    plt.plot([min_val, max_val], [min_val - tol90_value, max_val - tol90_value],
             'b--', lw=1.5, label=f'-tol90 ({tol90_value:.4f})')
    
    # Set plot limits with buffer
    buffer = 0.2
    x_min = actual.min() - (actual.max() - actual.min()) * buffer
    x_max = actual.max() + (actual.max() - actual.min()) * buffer
    y_min = predicted.min() - (predicted.max() - predicted.min()) * buffer
    y_max = predicted.max() + (predicted.max() - predicted.min()) * buffer
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    
    # Annotate with metrics
    plt.text(0.05, 0.95,
             f"R²: {r2:.4f}\n" +
             f"MSE: {mse:.4f}\n" +
             f"MAE: {mae:.4f}\n" +
             f"tol90: {tol90_value:.4f}",
             transform=plt.gca().transAxes,
             verticalalignment='top',
             bbox=dict(facecolor='white', alpha=0.8))
    
    # Labeling
    plt.xlabel('Actual Value')
    plt.ylabel('Predicted Value')
    if title is None:
        title = f'Predicted vs Actual {target_column}'
    plt.title(title)
    plt.legend(loc='lower right')
    plt.grid(True, alpha=0.3)
    plt.axis('equal')
    plt.tight_layout()
    plt.show()
    
    return {
        'R2': r2,
        'MSE': mse,
        'MAE': mae,
        'tol90': tol90_value
    }

In [None]:
plot_predicted_vs_actual(rfr_results['model'])