In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from torch.utils.data import DataLoader, TensorDataset
from xgboost import XGBRegressor
from tensorflow import keras
from scipy.interpolate import interpn
from tqdm import tqdm
from joblib import dump, load
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import tensorflow as tf

In [2]:
from load_data import process_steel_data

full_path = 'data/'
path = 'data/MDC_Data_Descriptions_MeCoMeP-r-value.xlsx'
correlation_rate = 0.2
dvl_line = 1

df = process_steel_data(full_path, path, correlation_rate, dvl_line, model_output=False)
df = pd.get_dummies(df, columns=['steel_family'], prefix='steel').drop(['steel_grade'], axis=1)

Dropped 25 columns


  for idx, row in parser.parse():


In [3]:
def scale_data(df, binary_prefix='steel_'):

    # Identify binary columns
    binary_columns = [col for col in df.columns if col.startswith(binary_prefix)]
    
    # Identify columns to scale (non-binary columns)
    columns_to_scale = [col for col in df.columns if col not in binary_columns + ['r_value']]
    
    # Scale numerical features
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df[columns_to_scale])
    
    # Create new dataframe with scaled data
    scaled_df = pd.DataFrame(scaled_data, columns=columns_to_scale)
    
    # Add back binary columns
    for col in binary_columns:
        scaled_df[col] = df[col].values
    
    # Add target variable if present
    if 'r_value' in df.columns:
        scaled_df['r_value'] = df['r_value'].values
    
    return scaled_df, scaler

def train_model_with_cv_gridsearch(df, model, param_grid=None, n_splits=5, random_state=42, use_grid_search=True, model_params=None):
    """
    Train a model with optional grid search and cross-validation
    
    Parameters:
    -----------
    df : pandas.DataFrame
        Input dataframe
    model : estimator object
        Machine learning model to train
    param_grid : dict, optional
        Parameter grid for grid search (used if use_grid_search=True)
    n_splits : int, optional
        Number of cross-validation splits (default: 5)
    random_state : int, optional
        Random state for reproducibility (default: 42)
    use_grid_search : bool, optional
        Whether to perform grid search (default: True)
    model_params : dict, optional
        Direct model parameters to use if use_grid_search=False
    
    Returns:
    --------
    dict containing model results and performance metrics
    """
    # Prepare X and y
    X = df.drop(['r_value'], axis=1)
    y = df['r_value']
    
    # Initialize cross-validation
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    # Initialize metrics storage
    cv_scores = {
        'mae': [],
        'mse': [],
        'r2': []
    }
    
    # Determine model parameters
    if use_grid_search:
        if param_grid is None:
            raise ValueError("param_grid must be provided when use_grid_search is True")
        
        # Initialize GridSearchCV
        grid_search = GridSearchCV(
            estimator=model,
            param_grid=param_grid,
            cv=n_splits,
            scoring='neg_mean_absolute_error',
            n_jobs=-1,
            verbose=0
        )
        
        # Fit GridSearchCV
        print("Performing GridSearch...")
        grid_search.fit(X, y)
        print(f"\nBest parameters: {grid_search.best_params_}")
        best_model = grid_search.best_estimator_
    else:
        # Use directly specified parameters or default model
        if model_params:
            best_model = type(model)(**model_params)
        else:
            best_model = model
        
        grid_search = None
    
    # Perform cross-validation
    print("\nPerforming cross-validation...")
    pbar = tqdm(enumerate(kf.split(X), 1),
                total=n_splits,
                desc="Cross-validation",
                leave=True)
    
    for fold, (train_idx, val_idx) in pbar:
        # Split data
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        # Train model
        best_model.fit(X_train, y_train)
        
        # Make predictions
        y_pred = best_model.predict(X_val)
        
        # Calculate metrics
        mae = mean_absolute_error(y_val, y_pred)
        mse = mean_squared_error(y_val, y_pred)
        r2 = r2_score(y_val, y_pred)
        
        cv_scores['mae'].append(mae)
        cv_scores['mse'].append(mse)
        cv_scores['r2'].append(r2)
        
        # Update progress bar description
        pbar.set_description(
            f"Fold {fold} - MAE: {mae:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}"
        )
    
    # Prepare results
    results = {
        'model': best_model,
        'best_params': grid_search.best_params_ if use_grid_search else model_params or {},
        'avg_mae': np.mean(cv_scores['mae']),
        'std_mae': np.std(cv_scores['mae']),
        'avg_mse': np.mean(cv_scores['mse']),
        'std_mse': np.std(cv_scores['mse']),
        'avg_r2': np.mean(cv_scores['r2']),
        'std_r2': np.std(cv_scores['r2']),
        'cv_scores': cv_scores,
        'grid_search_results': grid_search.cv_results_ if use_grid_search else None
    }
    
    return results

def report_cv_results(results):
    print("\nCross-Validation Results:")
    print("-" * 50)
    print(f"Best Parameters: {results['best_params']}")
    print(f"Average MAE: {results['avg_mae']:.4f} ± {results['std_mae']:.4f}")
    print(f"Average MSE: {results['avg_mse']:.4f} ± {results['std_mse']:.4f}")
    print(f"Average R2: {results['avg_r2']:.4f} ± {results['std_r2']:.4f}")

In [4]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_scaled_df, scaler = scale_data(train_df)
binary_columns = [col for col in test_df.columns if col.startswith('steel_')]
columns_to_scale = [col for col in test_df.columns if col not in binary_columns + ['r_value']]
scaled_test_data = scaler.transform(test_df[columns_to_scale])
test_scaled_df = pd.DataFrame(scaled_test_data, columns=columns_to_scale)
for col in binary_columns:
    test_scaled_df[col] = test_df[col].values
if 'r_value' in test_df.columns:
    test_scaled_df['r_value'] = test_df['r_value'].values

In [27]:
rfr = RandomForestRegressor(random_state=42)
rfr_param_grid = {
    'n_estimators': [350]
}

rfr_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=rfr,
    param_grid=rfr_param_grid,
    n_splits=5
)

Performing GridSearch...

Best parameters: {'n_estimators': 350}

Performing cross-validation with best model...


Fold 5 - MAE: 0.0941, MSE: 0.0180, R2: 0.9468: 100%|██████████| 5/5 [04:20<00:00, 52.16s/it]


In [28]:
xgb_model = XGBRegressor(random_state=42)

xgb_param_grid = {
    'eta': [0.01, 0.05, 0.1, 0.2, 0.3, 0.4],
    'lambda': [0, 0.01, 0.1, 1, 10, 50],
    'max_depth': [3, 4, 5, 6, 7, 8]
}

xgb_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=xgb_model,
    param_grid=xgb_param_grid,
    n_splits=5
)

Performing GridSearch...

Best parameters: {'eta': 0.1, 'lambda': 1, 'max_depth': 8}

Performing cross-validation with best model...


Fold 5 - MAE: 0.0969, MSE: 0.0189, R2: 0.9443: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


In [32]:
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor

results_without_grid = train_model_with_cv_gridsearch(
    df=train_scaled_df, 
    model=GaussianProcessRegressor(), 
    use_grid_search=False,
    model_params={'kernel': 1**2 * Matern(length_scale=1, nu=1.5) + WhiteKernel(noise_level=1)}
)


Performing cross-validation...


Fold 5 - MAE: 0.1037, MSE: 0.0204, R2: 0.9399: 100%|██████████| 5/5 [27:16<00:00, 327.35s/it]


In [6]:
from sklearn.neighbors import KNeighborsRegressor

knn_model = KNeighborsRegressor()

knn_param_grid = {
    'n_neighbors': list(range(2, 15)),
    'leaf_size': [20, 30, 40, 50],
    'weights': ['uniform', 'distance']
}

knn_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=knn_model,
    param_grid=knn_param_grid
)

Performing GridSearch...

Best parameters: {'leaf_size': 20, 'n_neighbors': 9, 'weights': 'distance'}

Performing cross-validation...


Fold 5 - MAE: 0.1075, MSE: 0.0224, R2: 0.9338: 100%|██████████| 5/5 [00:00<00:00, 20.07it/s]


In [13]:
def cv_saved_model_architecture(saved_model_path, df, target_column='r_value', n_splits=5, 
                               epochs=100, batch_size=32, random_state=42):
    """
    Perform cross-validation using architecture and parameters from a saved model
    
    Parameters:
    -----------
    saved_model_path : str
        Path to saved .h5 model file
    df : pandas DataFrame
        Input data
    target_column : str
        Name of target column
    n_splits : int
        Number of CV folds
    epochs : int
        Number of training epochs
    batch_size : int
        Batch size for training
    random_state : int
        Random seed for reproducibility
        
    Returns:
    --------
    dict containing CV metrics
    """
    # Load saved model to get architecture and parameters
    base_model = tf.keras.models.load_model(saved_model_path)
    
            # Get learning rate from saved model and convert to Python float
    learning_rate = float(base_model.optimizer.learning_rate.numpy())
    
    # Prepare data
    X = df.drop([target_column], axis=1)
    y = df[target_column]
    
    # Initialize KFold
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    # Storage for CV metrics
    cv_scores = {
        'mae': [],
        'mse': [],
        'r2': []
    }
    
    # Perform cross-validation
    print("\nPerforming cross-validation...")
    for fold, (train_idx, val_idx) in enumerate(kf.split(X), 1):
        print(f"\nFold {fold}/{n_splits}")
        
        # Clear previous model from memory
        tf.keras.backend.clear_session()
        
        # Split data
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        # Convert to float32
        X_train = np.array(X_train, dtype=np.float32)
        y_train = np.array(y_train, dtype=np.float32)
        X_val = np.array(X_val, dtype=np.float32)
        y_val = np.array(y_val, dtype=np.float32)
        
        # Create new model with same architecture
        model = tf.keras.models.clone_model(base_model)
        
        # Compile with same optimizer type and learning rate
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        model.compile(
            optimizer=optimizer,
            loss='mean_absolute_error',
            metrics=['mae', 'mse']
        )
        
        # Early stopping
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            min_delta=1e-4
        )
        
        # Create TF datasets
        train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
        train_dataset = (train_dataset
            .batch(batch_size, drop_remainder=True)
            .repeat())
        
        val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
        val_dataset = val_dataset.batch(batch_size)
        
        steps_per_epoch = len(X_train) // batch_size
        
        # Train model
        history = model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            callbacks=[early_stopping],
            verbose=1
        )
        
        # Make predictions
        y_pred = model.predict(X_val, verbose=0)
        
        # Calculate metrics
        mae = mean_absolute_error(y_val, y_pred)
        mse = mean_squared_error(y_val, y_pred)
        r2 = r2_score(y_val, y_pred)
        
        cv_scores['mae'].append(mae)
        cv_scores['mse'].append(mse)
        cv_scores['r2'].append(r2)
        
        print(f"Fold {fold} - MAE: {mae:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}")
    
    # Calculate average metrics
    results = {
        'avg_mae': np.mean(cv_scores['mae']),
        'std_mae': np.std(cv_scores['mae']),
        'avg_mse': np.mean(cv_scores['mse']),
        'std_mse': np.std(cv_scores['mse']),
        'avg_r2': np.mean(cv_scores['r2']),
        'std_r2': np.std(cv_scores['r2']),
        'cv_scores': cv_scores
    }
    
    return results

In [14]:
class SteelPropertiesANN:
    def __init__(self, input_dim, target_column):
        self.input_dim = input_dim
        self.target_column = target_column
        self.best_model = None
        self.best_params = None
        self.best_score = float('inf')

    def build_model(self, config):
        hidden_layers = config['layers']
        learning_rate = config['learning_rate']
        l2_strength = config['l2_regularization']
        
        model = keras.Sequential()
        model.add(keras.layers.Input(shape=(self.input_dim,)))
        
        for units, activation in hidden_layers:
            model.add(keras.layers.Dense(
                units=units,
                activation=activation,
                kernel_regularizer=keras.regularizers.l2(l2_strength)
            ))
        
        model.add(keras.layers.Dense(1))
        
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=learning_rate,
            decay_steps=100,
            decay_rate=0.9,
            staircase=True
        )
        
        model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=1.0),
            loss='mean_absolute_error',
            metrics=['mae', 'mse']
        )
        return model

    def generate_grid_configs(self, 
        layer_options=[(64, 'relu'), (128, 'relu'), (256, 'relu')],
        layer_depths=[2, 3, 4],
        learning_rates=[1e-2, 1e-3, 1e-4],
        l2_regularization=[1e-3, 1e-4, 1e-5],
        batch_sizes=[16, 32, 64]
    ):
        from itertools import product
        grid_configs = []
        
        for depth in layer_depths:
            for lr in learning_rates:
                for l2_reg in l2_regularization:
                    for batch_size in batch_sizes:
                        layer_combinations = list(product(layer_options, repeat=depth))
                        for layers in layer_combinations:
                            config = {
                                'layers': layers,
                                'learning_rate': lr,
                                'l2_regularization': l2_reg,
                                'batch_size': batch_size
                            }
                            grid_configs.append(config)
        
        return grid_configs

    def grid_search(self, train_scaled_df, grid_configs=None, epochs=100, max_configs=None):
        # Split training data into training and validation sets
        train_data, val_data = train_test_split(train_scaled_df, test_size=0.2, random_state=42)
    
        if grid_configs is None:
            grid_configs = self.generate_grid_configs()
        
        X_train = train_data.drop([self.target_column], axis=1)
        y_train = train_data[self.target_column]
        X_val = val_data.drop([self.target_column], axis=1)
        y_val = val_data[self.target_column]
        
        if max_configs:
            grid_configs = grid_configs[:max_configs]
        
        results = []
        for config in tqdm(grid_configs, desc="Training models"):
            tf.keras.backend.clear_session()
            model = self.build_model(config)
            batch_size = min(config['batch_size'], len(X_train))
            
            early_stopping = keras.callbacks.EarlyStopping(
                monitor='val_loss', 
                patience=10, 
                restore_best_weights=True,
                min_delta=1e-4
            )
            
            try:
                X_train = np.array(X_train, dtype=np.float32)
                y_train = np.array(y_train, dtype=np.float32)
                X_val = np.array(X_val, dtype=np.float32)
                y_val = np.array(y_val, dtype=np.float32)

                train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
                train_dataset = (train_dataset
                    .batch(batch_size, drop_remainder=True)
                    .repeat())
                
                val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
                val_dataset = val_dataset.batch(batch_size)
                
                steps_per_epoch = len(X_train) // batch_size
                
                history = model.fit(
                    train_dataset,
                    validation_data=val_dataset,
                    epochs=epochs,
                    steps_per_epoch=steps_per_epoch,
                    callbacks=[early_stopping],
                    verbose=0
                )
                
                val_loss = model.evaluate(X_val, y_val, verbose=0)[0]
                
                result_entry = config.copy()
                result_entry.update({'val_loss': val_loss})
                results.append(result_entry)
                
                if val_loss < self.best_score:
                    self.best_score = val_loss
                    self.best_model = model
                    self.best_params = config
                
            except Exception as e:
                print(f"Error with config {config}: {str(e)}")
                continue
        
        return self.best_model, self.best_params

In [15]:
cv_results = cv_saved_model_architecture(
    saved_model_path='model.h5',
    df=train_scaled_df,
    target_column='r_value',
    n_splits=5,
    epochs=100,
    batch_size=32
)

# Print CV results
print("\nCross-Validation Results:")
print("-" * 50)
print(f"Average MAE: {cv_results['avg_mae']:.4f} ± {cv_results['std_mae']:.4f}")
print(f"Average MSE: {cv_results['avg_mse']:.4f} ± {cv_results['std_mse']:.4f}")
print(f"Average R2: {cv_results['avg_r2']:.4f} ± {cv_results['std_r2']:.4f}")




Performing cross-validation...

Fold 1/5
Epoch 1/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 972us/step - loss: 0.3032 - mae: 0.2864 - mse: 0.2022 - val_loss: 0.1535 - val_mae: 0.1375 - val_mse: 0.0332
Epoch 2/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 709us/step - loss: 0.1515 - mae: 0.1358 - mse: 0.0322 - val_loss: 0.1416 - val_mae: 0.1265 - val_mse: 0.0284
Epoch 3/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 700us/step - loss: 0.1428 - mae: 0.1279 - mse: 0.0292 - val_loss: 0.1391 - val_mae: 0.1247 - val_mse: 0.0276
Epoch 4/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 699us/step - loss: 0.1354 - mae: 0.1213 - mse: 0.0267 - val_loss: 0.1378 - val_mae: 0.1241 - val_mse: 0.0269
Epoch 5/100
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 688us/step - loss: 0.1321 - mae: 0.1186 - mse: 0.0260 - val_loss: 0.1313 - val_mae: 0.1183 - val_mse: 0.0252
Epoch 6/100
[1m308/308

In [16]:
from sklearn.svm import SVR

svr_param_grid = {
    'C': [0.01, 0.1, 1, 10, 100, 1000],
    'epsilon': [0.001, 0.01, 0.1, 0.5, 1, 2]
}

svr_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=SVR(),
    param_grid=svr_param_grid
)

Performing GridSearch...

Best parameters: {'C': 1, 'epsilon': 0.01}

Performing cross-validation...


Fold 5 - MAE: 0.1044, MSE: 0.0216, R2: 0.9362: 100%|██████████| 5/5 [00:28<00:00,  5.68s/it]


In [17]:
from sklearn.linear_model import Ridge

ridge_param_grid = {
    'alpha': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
}

ridge_results = train_model_with_cv_gridsearch(
    df=train_scaled_df,
    model=Ridge(random_state=42),
    param_grid=ridge_param_grid
)

Performing GridSearch...

Best parameters: {'alpha': 0.001}

Performing cross-validation...


Fold 5 - MAE: 0.1217, MSE: 0.0266, R2: 0.9216: 100%|██████████| 5/5 [00:00<00:00, 17.77it/s]


In [18]:
def load_and_evaluate_model(data_df, features_dict, model_path, n_splits=5, batch_size=32):
    """
    Load saved model and perform cross validation
    
    Args:
        data_df: Pandas DataFrame containing the data
        features_dict: Dictionary of features by category
        model_path: Path to saved model file
        n_splits: Number of CV folds
        batch_size: Batch size for evaluation
    
    Returns:
        Dictionary of metrics averaged across folds
    """
    # Load the saved model
    model = torch.load(model_path)
    model.eval()
    
    # Initialize feature arrays and dimensions
    feature_arrays = {}
    feature_dims = {}
    
    # Process each feature category
    for category in ['chemical', 'time', 'process', 'model']:
        available_features = [col for col in features_dict[category] 
                            if col in data_df.columns]
        
        if available_features:
            feature_arrays[category] = data_df[available_features].values.astype(np.float32)
            feature_dims[category] = len(available_features)
        else:
            feature_arrays[category] = np.zeros((len(data_df), 0), dtype=np.float32)
            feature_dims[category] = 0
    
    # Prepare targets
    targets = data_df['r_value'].values
    
    # Initialize KFold
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    # Lists to store metrics for each fold
    fold_metrics = {
        'mae': [],
        'mse': [],
        'rmse': [],
        'r2': []
    }
    
    # Cross validation loop
    for fold, (train_idx, val_idx) in enumerate(kfold.split(targets)):
        # Prepare validation tensors for this fold
        val_tensors = {
            'chemical': torch.FloatTensor(feature_arrays['chemical'][val_idx]),
            'time': torch.FloatTensor(feature_arrays['time'][val_idx]),
            'process': torch.FloatTensor(feature_arrays['process'][val_idx]),
            'model': torch.FloatTensor(feature_arrays['model'][val_idx])
        }
        
        val_targets = torch.FloatTensor(targets[val_idx])
        
        # Create validation DataLoader
        val_dataset = TensorDataset(
            val_tensors['chemical'],
            val_tensors['time'],
            val_tensors['process'],
            val_tensors['model'],
            val_targets
        )
        val_loader = DataLoader(val_dataset, batch_size=batch_size)
        
        # Evaluation for this fold
        predictions = []
        actuals = []
        
        with torch.no_grad():
            for batch_chem, batch_time, batch_proc, batch_model, batch_targets in val_loader:
                outputs = model(batch_chem, batch_time, batch_proc, batch_model)
                predictions.extend(outputs.numpy().flatten())
                actuals.extend(batch_targets.numpy().flatten())
        
        # Calculate metrics for this fold
        predictions = np.array(predictions)
        actuals = np.array(actuals)
        
        mae = mean_absolute_error(actuals, predictions)
        mse = mean_squared_error(actuals, predictions)
        rmse = np.sqrt(mse)
        r2 = r2_score(actuals, predictions)
        
        fold_metrics['mae'].append(mae)
        fold_metrics['mse'].append(mse)
        fold_metrics['rmse'].append(rmse)
        fold_metrics['r2'].append(r2)
        
        print(f"Fold {fold+1} - MAE: {mae:.4f}, MSE: {mse:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}")
    
    # Calculate and return average metrics
    avg_metrics = {
        'mae': np.mean(fold_metrics['mae']),
        'mae_std': np.std(fold_metrics['mae']),
        'mse': np.mean(fold_metrics['mse']),
        'mse_std': np.std(fold_metrics['mse']), 
        'rmse': np.mean(fold_metrics['rmse']),
        'rmse_std': np.std(fold_metrics['rmse']),
        'r2': np.mean(fold_metrics['r2']),
        'r2_std': np.std(fold_metrics['r2'])
    }
    
    print("\nAverage Metrics across folds:")
    print(f"MAE: {avg_metrics['mae']:.4f} ± {avg_metrics['mae_std']:.4f}")
    print(f"MSE: {avg_metrics['mse']:.4f} ± {avg_metrics['mse_std']:.4f}")
    print(f"RMSE: {avg_metrics['rmse']:.4f} ± {avg_metrics['rmse_std']:.4f}")
    print(f"R2: {avg_metrics['r2']:.4f} ± {avg_metrics['r2_std']:.4f}")
    
    return avg_metrics

In [19]:
# labeling the features for each branch
features = [col for col in df.columns if col not in ['r_value', 'steel_family', 'steel_grade']]
features_dict = {
   'time': [col for col in features if 'time' in col.lower()], 
   'chemical': ['pct_al', 'pct_b', 'pct_c', 'pct_cr', 'pct_mn', 'pct_n', 'pct_nb', 'pct_si', 'pct_ti', 'pct_v', 'mfia_coil_frac_fer', 'mfia_et1_frac_fer', 'mfia_et2_frac_fer'],
   'model': ["rm", "ag", "a80", "n_value"]
}
features_dict['process'] = [col for col in features if col not in features_dict['time'] and col not in features_dict['chemical']]

In [28]:
class MultiBranchSteelRegressor(nn.Module):
    def __init__(self, chemical_dim, time_dim, process_dim, model_dim, hidden_units=64, dropout_rate=0.2):
        super().__init__()
        # Track which branches are active
        self.has_chemical = chemical_dim > 0
        self.has_time = time_dim > 0
        self.has_process = process_dim > 0
        self.has_model = model_dim > 0
        
        # Count active branches
        self.active_branches = sum([self.has_chemical, self.has_time, self.has_process, self.has_model])
        
        # Adjust hidden units for each branch
        self.branch_hidden = min(hidden_units, max(16, hidden_units // 2))
        
        # Creating branch
        def create_branch(input_dim):
            return nn.Sequential(
                nn.Linear(input_dim, self.branch_hidden),
                nn.BatchNorm1d(self.branch_hidden),
                nn.ReLU(),
                nn.Dropout(dropout_rate)
            )
        
        # Only create branches that have features
        if self.has_chemical:
            self.chemical_branch = create_branch(chemical_dim)
        if self.has_time:
            self.time_branch = create_branch(time_dim)
        if self.has_process:
            self.process_branch = create_branch(process_dim)
        if self.has_model:
            self.model_branch = create_branch(model_dim)
        
        # Combined input dimension based on active branches only
        combined_dim = self.branch_hidden * self.active_branches
        
        # Final layers after concatenation
        self.final_layers = nn.Sequential(
            nn.Linear(combined_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 1)
        )
    
    def forward(self, chemical, time, process, model):
        features = []
        # Only process branches that have features
        if self.has_chemical:
            if chemical.dim() == 1:
                chemical = chemical.unsqueeze(0)
            features.append(self.chemical_branch(chemical))
        
        if self.has_time:
            if time.dim() == 1:
                time = time.unsqueeze(0)
            features.append(self.time_branch(time))
        
        if self.has_process:
            if process.dim() == 1:
                process = process.unsqueeze(0)
            features.append(self.process_branch(process))
        
        if self.has_model:
            if model.dim() == 1:
                model = model.unsqueeze(0)
            features.append(self.model_branch(model))
        
        # Concatenate only active features
        combined = torch.cat(features, dim=1) if len(features) > 1 else features[0]
        return self.final_layers(combined)

In [29]:
def train_model_regular(df, features_dict, num_epochs, hyperparameters, use_l2=False):
    batch_size = hyperparameters['batch_size']
    
    # Initialize feature arrays and dimensions
    feature_arrays = {}
    feature_dims = {}
    
    # Process each feature category
    for category in ['chemical', 'time', 'process', 'model']:
        available_features = [col for col in features_dict[category] 
                            if col in df.columns]
        
        if available_features:
            feature_arrays[category] = df[available_features].values.astype(np.float32)
            feature_dims[category] = len(available_features)
        else:
            feature_arrays[category] = np.zeros((len(df), 0), dtype=np.float32)
            feature_dims[category] = 0
    
    # Prepare targets
    targets = df['r_value'].values
    
    # Split data
    split_data = train_test_split(
        feature_arrays['chemical'],
        feature_arrays['time'],
        feature_arrays['process'],
        feature_arrays['model'],
        targets,
        test_size=0.2,
        random_state=42
    )
    
    (X_train_chem, X_test_chem, X_train_time, X_test_time, 
     X_train_proc, X_test_proc, X_train_model, X_test_model, 
     y_train, y_test) = split_data
    
    # Convert to tensors
    train_tensors = {
        'chemical': torch.FloatTensor(X_train_chem),
        'time': torch.FloatTensor(X_train_time),
        'process': torch.FloatTensor(X_train_proc),
        'model': torch.FloatTensor(X_train_model)
    }
    
    test_tensors = {
        'chemical': torch.FloatTensor(X_test_chem),
        'time': torch.FloatTensor(X_test_time),
        'process': torch.FloatTensor(X_test_proc),
        'model': torch.FloatTensor(X_test_model)
    }
    
    y_train_tensor = torch.FloatTensor(y_train)
    y_test_tensor = torch.FloatTensor(y_test)
    
    # Create DataLoader
    train_dataset = TensorDataset(
        train_tensors['chemical'],
        train_tensors['time'],
        train_tensors['process'],
        train_tensors['model'],
        y_train_tensor
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    # Initialize model
    model = MultiBranchSteelRegressor(
        chemical_dim=feature_dims['chemical'],
        time_dim=feature_dims['time'],
        process_dim=feature_dims['process'],
        model_dim=feature_dims['model'],
        hidden_units=hyperparameters['hidden_units'],
        dropout_rate=hyperparameters['dropout_rate']
    )
    
    if use_l2:
        weight_decay = 0.001
    else:
        weight_decay = 0.0
    
    optimizer = torch.optim.AdamW(model.parameters(), weight_decay=weight_decay)
    criterion = nn.L1Loss()
    
    # Training loop
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_chem, batch_time, batch_proc, batch_model, batch_targets in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_chem, batch_time, batch_proc, batch_model)
            loss = criterion(outputs, batch_targets.unsqueeze(1))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")
    
    # Evaluation
    model.eval()
    with torch.no_grad():
        y_pred = model(
            test_tensors['chemical'],
            test_tensors['time'],
            test_tensors['process'],
            test_tensors['model']
        )
        test_loss = criterion(y_pred, y_test_tensor.unsqueeze(1)).item()
        y_pred_np = y_pred.numpy().flatten()
        r2 = r2_score(y_test, y_pred_np)
        mae = mean_absolute_error(y_test, y_pred_np)
        mse = mean_squared_error(y_test, y_pred_np)
        
        metrics = {
            'r2_score': r2,
            'mae': mae,
            'mse': mse,
            'test_loss': test_loss
        }
        print(f"Evaluation - Test Loss: {test_loss:.4f}, R2: {r2:.4f}")
    
    return model, metrics

In [30]:
from sklearn.model_selection import ParameterGrid

param_grid = {
    'learning_rate': [0.1, 0.01, 1e-3],
    'batch_size': [16, 32, 64],
    'hidden_units': [64, 128, 256],
    'dropout_rate': [0, 0.2]
}
grid = ParameterGrid(param_grid)

In [31]:
num_epochs = 100
best_params = None
best_results = {'mae': float('inf')}

for params in tqdm(grid, desc="Grid Search Progress", leave=True):
    print(f"Evaluating hyperparameters: {params}")
    
    model, metrics = train_model_regular(train_scaled_df, features_dict, num_epochs, params)
    mae = metrics['mae']
    
    if best_params is None or mae < best_results['mae']:
        best_results = {
            'mae': mae,
            'metrics': metrics
        }
        best_params = params

print(f"Best parameters found: {best_params}")
print(f"Best MAE: {best_results['mae']:.4f}")


Grid Search Progress:   0%|          | 0/54 [00:00<?, ?it/s]

Evaluating hyperparameters: {'batch_size': 16, 'dropout_rate': 0, 'hidden_units': 64, 'learning_rate': 0.1}


Grid Search Progress:   0%|          | 0/54 [00:03<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# # Perform cross validation
# metrics = load_and_evaluate_model(
#     data_df=train_scaled_df,
#     features_dict=features_dict,
#     model_path='branchmlp.pth',
#     n_splits=5,
#     batch_size=32
# )

Fold 1 - MAE: 1.5456, MSE: 2.5139, RMSE: 1.5855, R2: -6.5826
Fold 2 - MAE: 1.5541, MSE: 2.5343, RMSE: 1.5919, R2: -6.5082
Fold 3 - MAE: 1.5625, MSE: 2.5602, RMSE: 1.6001, R2: -6.5794
Fold 4 - MAE: 1.5420, MSE: 2.5037, RMSE: 1.5823, R2: -6.3996
Fold 5 - MAE: 1.5491, MSE: 2.5186, RMSE: 1.5870, R2: -6.4250

Average Metrics across folds:
MAE: 1.5506 ± 0.0071
MSE: 2.5261 ± 0.0197
RMSE: 1.5894 ± 0.0062
R2: -6.4990 ± 0.0760
