In [1]:
import os
import numpy as np
import cv2
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import joblib
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, train_test_split
from sklearn.base import ClassifierMixin
from sklearn.metrics import f1_score, accuracy_score, classification_report, precision_score, recall_score, confusion_matrix
from tqdm import tqdm
import warnings
import time
from skimage.feature import local_binary_pattern, hog 
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA
import lightgbm as lgb
warnings.filterwarnings('ignore')
from keras.models import Sequential, Model
from keras.layers import (
    Input, Conv2D, MaxPooling2D, BatchNormalization,
    Dropout, Flatten, Dense, LeakyReLU, ReLU
)

class SklearnKerasClassifier(KerasClassifier, ClassifierMixin):
    def __init__(self, model=None, **kwargs):
        super().__init__(model=model, **kwargs)

    @property
    def _tags(self): return self.model._tags if hasattr(self.model, "_tags") else {"binary_only": True}


def create_custom_cnn(
    input_shape=None,
    conv_blocks=((32, (3,3)), (64, (3,3))),
    dense_layers=(128,),
    dropout_rate=0.4,
    activation='relu',
    meta=None 
):
    """Conv -> BN -> Activation -> Pool -> Dropout"""
    if input_shape is None:
        if meta is None or "X_shape_" not in meta: raise ValueError("meta or input_shape parameter is missing")
        input_shape = meta["X_shape_"][1:] 

    model = Sequential(name="Custom_CNN")
    model.add(Input(shape=input_shape))

    for filters, kernel_size in conv_blocks:
        model.add(Conv2D(filters, kernel_size, padding='same'))
        model.add(BatchNormalization())
        if activation == 'leaky_relu': model.add(LeakyReLU(alpha=0.1))
        else: model.add(ReLU())
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(dropout_rate))
    model.add(Flatten()) 

    for units in dense_layers:
        model.add(Dense(units))
        model.add(BatchNormalization())
        if activation == 'leaky_relu': model.add(LeakyReLU(alpha=0.1))
        else: model.add(ReLU())
        model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))

    return model

def load_prep_4_cnn(data_dir, target_size=(128, 128)):
    all_images = []
    all_labels = []
    class_dirs = {'fire_images': 1, 'non_fire_images': 0}

    for class_name, label in class_dirs.items():
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path): continue
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            if not img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                continue
            try:
                img = cv2.imread(img_path)
                if img is None: continue
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_resized = cv2.resize(img, target_size)
                img_normalized = img_resized.astype(np.float32) / 255.0
                all_images.append(img_normalized)
                all_labels.append(label)

            except Exception as e:
                continue

    return np.array(all_images), np.array(all_labels)

def create_custom_mlp(hidden_layer_1_neurons=128, hidden_layer_2_neurons=64,
                        dropout_rate=0.3, activation='leaky_relu', learning_rate=0.001,
                        meta=None):
    n_features_in = meta["n_features_in_"]

    model = Sequential()
    model.add(Input(shape=(n_features_in,)))
    model.add(Dense(hidden_layer_1_neurons))
    model.add(BatchNormalization())
    if activation == 'leaky_relu': model.add(LeakyReLU(alpha=0.1))
    else: model.add(tf.keras.layers.ReLU())
    model.add(Dropout(dropout_rate))
    if hidden_layer_2_neurons is not None and hidden_layer_2_neurons > 0:
        model.add(Dense(hidden_layer_2_neurons))
        model.add(BatchNormalization())
        if activation == 'leaky_relu': model.add(LeakyReLU(alpha=0.1))
        else: model.add(tf.keras.layers.ReLU())
        model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def split_data(features_array, labels_array, test_size=0.2, random_state=42):
    if features_array.shape[0] == 0: return None, None, None, None
    X_train, X_test, y_train, y_test = train_test_split(
        features_array,
        labels_array,
        test_size=test_size,
        random_state=random_state,
        stratify=labels_array
    )

    print(f"training features shape: {X_train.shape}")
    print(f"testing features shape: {X_test.shape}")
    print(f"training labels shape: {y_train.shape}")
    print(f"testing labels shape: {y_test.shape}")
    return X_train, X_test, y_train, y_test

def scale_features(X_train, X_test):
    if X_train is None or X_test is None or X_train.shape[0] == 0: return None, None, None
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, scaler

def perform_correlation_selection(X_train, y_train, X_test, k_features):
    if X_train is None or X_test is None or X_train.shape[0] == 0: return X_train, X_test, None
    
    n_total_features = X_train.shape[1]
    k_features_int = k_features
    percentage_str = None

    if isinstance(k_features, str) and k_features.endswith('%'):
        try:
            percentage_str = k_features
            percentage = float(k_features[:-1]) / 100.0
            k_features_int = max(1, int(n_total_features * percentage))
        except ValueError: return X_train, X_test, None
    elif k_features == 'all': return X_train, X_test, None
    elif isinstance(k_features, int) and k_features > 0: k_features_int = min(k_features, n_total_features)
    else: return X_train, X_test, None

    if k_features_int <= 0 or k_features_int >= n_total_features: return X_train, X_test, None
    selector = SelectKBest(score_func=f_classif, k=k_features_int)
    selector.fit(X_train, y_train)
    X_train_selected = selector.transform(X_train)
    X_test_selected = selector.transform(X_test)

    print(f"original feature shape: {X_train.shape}")
    print(f"selected feature shape: {X_train_selected.shape}")
    return X_train_selected, X_test_selected, selector

def perform_rfe_selection(X_train, y_train, X_test, n_features_to_select, step=0.1, estimator=None):
    if X_train is None or X_test is None or X_train.shape[0] == 0: return X_train, X_test, None
    n_total_features = X_train.shape[1]
    n_features_int = n_features_to_select
    if estimator is None: estimator = LogisticRegression(solver='liblinear', random_state=42, max_iter=2000)

    if isinstance(n_features_to_select, str) and n_features_to_select.endswith('%'):
        try:
            percentage = float(n_features_to_select[:-1]) / 100.0
            n_features_int = max(1, int(n_total_features * percentage))
        except ValueError: return X_train, X_test, None
    elif isinstance(n_features_to_select, int) and n_features_to_select > 0:
        n_features_int = min(n_features_to_select, n_total_features)
    elif n_features_to_select == 'auto': return X_train, X_test, None
    else: return X_train, X_test, None
    
    if n_features_int <= 0 or n_features_int >= n_total_features: return X_train, X_test, None

    try:
        rfe = RFE(estimator=estimator, n_features_to_select=n_features_int, step=step)
        rfe.fit(X_train, y_train)
        X_train_selected = rfe.transform(X_train)
        X_test_selected = rfe.transform(X_test)
        print(f"original feature shape: {X_train.shape}")
        print(f"selected feature shape: {X_train_selected.shape}")
        return X_train_selected, X_test_selected, rfe
    except Exception as e: return X_train, X_test, None

def tune_model_hyperparameters(model_estimator, X_train, y_train, param_grid, cv_strategy, scoring='f1', search_method='RandomSearch', n_iter=10, validation_split_keras=0.2):
    if X_train is None or y_train is None or X_train.shape[0] == 0: return None
    print(f"\{search_method} tuning (scoring='{scoring}')... with {n_iter} iterations")
    start_time = time.time()
    fit_params = {}
    if isinstance(model_estimator, KerasClassifier):
        fit_params['validation_split'] = validation_split_keras
    if search_method == 'RandomSearch':
         search_cv = RandomizedSearchCV(
            estimator=model_estimator,
            param_distributions=param_grid,
            n_iter=n_iter,
            cv=cv_strategy,
            scoring=scoring,
            n_jobs=1,
            verbose=1,
            random_state=42
         )
    else: return None
    search_cv.fit(X_train, y_train, **fit_params)
    end_time = time.time()
    print(f"{search_method} duration: {end_time - start_time:.2f} seconds")
    print("\nbest params:")
    print(search_cv.best_params_)
    print("\nbest CV score:")
    print(search_cv.best_score_)
    return search_cv

def evaluate_model(model, X_test, y_test, model_name="Model", feature_set_name="Unknown Feature Set"):
    if model is None or X_test is None or y_test is None or X_test.shape[0] == 0: return {}
    print(f"\{model_name} on the test set using {feature_set_name}.")
    start_time = time.time()
    y_pred = model.predict(X_test)
    if isinstance(model, KerasClassifier): y_pred = (y_pred > 0.5).astype(int)
    end_time = time.time()
    print(f"duration: {end_time - start_time:.4f} seconds")
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    print(f"accuracy: {accuracy:.4f}")
    print(f"precision: {precision:.4f}")
    print(f"recall: {recall:.4f}")
    print(f"f1 score: {f1:.4f}")
    print(f"\nconfusion matrix ({model_name} on {feature_set_name}):")
    print(conf_matrix)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'confusion_matrix': conf_matrix.tolist()
    }

def perform_pca_dimension_reduction(X_train, X_test, n_components):
    if X_train is None or X_test is None or X_train.shape[0] == 0: return None, None, None
    try:        
        pca = PCA(n_components=n_components, random_state=42)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)

        print(f"original feature shape: {X_train.shape}")
        print(f"PCA transformed feature shape: {X_train_pca.shape}")
        print(f"variance ratio with {pca.n_components_} components: {np.sum(pca.explained_variance_ratio_):.4f}")

        return X_train_pca, X_test_pca, pca
    except Exception as e: return X_train, X_test, None


def create_cnn_feature_extractor(cnn_model_architecture):
    flatten_layer = None
    for layer in cnn_model_architecture.layers:
        if isinstance(layer, tf.keras.layers.Flatten):
            flatten_layer = layer
            break
            
    if flatten_layer is None: raise ValueError()
    feature_extractor_model = Model(inputs=cnn_model_architecture.inputs, outputs=flatten_layer.output)
    return feature_extractor_model


In [2]:
data_directory = os.path.join('..', 'data_subsets', 'fire_dataset')
target_image_width = 128
target_image_height = 128 #? i think i dont have time for image w/h opt.

X_images, y_labels = load_prep_4_cnn(data_directory, target_size=(target_image_width, target_image_height))
if X_images.shape[0] == 0: exit()

print("\n--- extracting cnn features: ---")
cnn_architecture = create_custom_cnn(input_shape=X_images.shape[1:])
cnn_feature_extractor = create_cnn_feature_extractor(cnn_architecture)
features_array_orig = cnn_feature_extractor.predict(X_images, verbose=0)
X_train_orig, X_test_orig, y_train, y_test = split_data(features_array_orig, y_labels, test_size=0.25, random_state=42)

if X_train_orig is None or X_train_orig.shape[0] == 0: exit()

print("\n--- scaling cnn features: ---")
X_train_scaled, X_test_scaled, scaler = scale_features(X_train_orig, X_test_orig)
feature_sets = {}
feature_transformers = {}
if X_train_scaled is not None:
    feature_sets['Scaled_All_CNN'] = (X_train_scaled, X_test_scaled)
    feature_transformers['Scaled_All_CNN'] = scaler
else: exit()

print("\n--- selection & pca: ---")
original_feature_count = X_train_scaled.shape[1]
corr_feature_percentages = ['75%', '50%']
for percentage_str in corr_feature_percentages:
    print(f"\ncorr selection: {percentage_str}...")
    try:
        X_train_corr, X_test_corr, corr_selector = perform_correlation_selection(
            X_train_scaled, y_train, X_test_scaled, k_features=percentage_str
        )
        if X_train_corr is not None and X_train_corr.shape[1] < original_feature_count:
            feature_sets[f'Scaled_Corr{percentage_str}_CNN'] = (X_train_corr, X_test_corr)
            feature_transformers[f'Scaled_Corr{percentage_str}_CNN'] = corr_selector
        else: continue
    except Exception as e: pass

rfe_feature_percentages = ['75%', '50%']
rfe_step_val = 0.1
rfe_estimator = LogisticRegression(solver='liblinear', random_state=42, max_iter=2000)

for percentage_str in rfe_feature_percentages:
    print(f"\nrfe selection with {percentage_str} (step={rfe_step_val})...")
    try:
        X_train_rfe, X_test_rfe, rfe_selector = perform_rfe_selection(
            X_train_scaled, y_train, X_test_scaled, n_features_to_select=percentage_str, step=rfe_step_val, estimator=rfe_estimator
        )
        if X_train_rfe is not None and X_train_rfe.shape[1] < original_feature_count:
            feature_sets[f'Scaled_RFE{percentage_str}_CNN'] = (X_train_rfe, X_test_rfe)
            feature_transformers[f'Scaled_RFE{percentage_str}_CNN'] = rfe_selector
        else: continue
    except Exception as e: pass

pca_components = [0.95, 500]
for n_comp in pca_components:
    print(f"\npca with n_components={n_comp}...")
    try:
        X_train_pca, X_test_pca, pca_transformer = perform_pca_dimension_reduction(X_train_scaled, X_test_scaled, n_components=n_comp)
        if X_train_pca is not None and (isinstance(n_comp, int) and X_train_pca.shape[1] < original_feature_count or isinstance(n_comp, float)):
            fs_name_suffix = f"{int(n_comp*100)}%" if isinstance(n_comp, float) else str(n_comp)
            fs_name = f'Scaled_PCA_{fs_name_suffix}_CNN'
            feature_sets[fs_name] = (X_train_pca, X_test_pca)
            feature_transformers[fs_name] = pca_transformer
        else: continue
    except Exception as e: pass

print("\n--- feat sets for tuning: ---")
for name, (X_train_fs, _) in feature_sets.items():
    print(f"- {name}: {X_train_fs.shape[1]} features")


--- extracting cnn features: ---
training features shape: (748, 65536)
testing features shape: (250, 65536)
training labels shape: (748,)
testing labels shape: (250,)

--- scaling cnn features: ---

--- selection & pca: ---

corr selection: 75%...
original feature shape: (748, 65536)
selected feature shape: (748, 49152)

corr selection: 50%...
original feature shape: (748, 65536)
selected feature shape: (748, 32768)

rfe selection with 75% (step=0.1)...
original feature shape: (748, 65536)
selected feature shape: (748, 49152)

rfe selection with 50% (step=0.1)...
original feature shape: (748, 65536)
selected feature shape: (748, 32768)

pca with n_components=0.95...
original feature shape: (748, 65536)
PCA transformed feature shape: (748, 489)
variance ratio with 489 components: 0.9501

pca with n_components=500...
original feature shape: (748, 65536)
PCA transformed feature shape: (748, 500)
variance ratio with 500 components: 0.9527

--- feat sets for tuning: ---
- Scaled_All_CNN: 6

In [3]:
print("\n--- model training and randomsearchcv: ---")
import lightgbm as lgb
models_to_tune = {
     'LightGBM': {
        'estimator': lgb.LGBMClassifier(random_state=42, objective='binary', metric='binary_logloss', verbosity=-1, n_jobs=4),
        'param_grid': {
            'n_estimators': [50, 80, 120], 
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [8, 15],
            'num_leaves': [20, 40, 60],
            'subsample': [0.8, 0.9],
            'colsample_bytree': [0.8, 0.9, 1.0],
            'min_split_gain': [0.1],
            'min_child_samples': [5]
    }
    },
    'SVM': {
        'estimator': SVC(random_state=42),
        'param_grid': {
            'C': [0.1, 1, 10, 50],
            'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
            'kernel': ['rbf', 'linear']
        }
    },
    'Custom_MLP': {
        'estimator': SklearnKerasClassifier(
            model=create_custom_mlp,
            loss=tf.keras.losses.BinaryCrossentropy(),
            epochs=100,
            batch_size=32,
            verbose=0,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, verbose=0, restore_best_weights=True)]
        ),
        'param_grid': {
            'model__hidden_layer_1_neurons': [64, 128, 256],
            'model__hidden_layer_2_neurons': [0, 64, 128],
            'model__dropout_rate': [0.2, 0.4, 0.6],
            'model__activation': ['relu', 'leaky_relu'],
            'optimizer__learning_rate': [0.001, 0.005, 0.01]
        }
    }
}

cv_strategy = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
scoring_metric = 'f1'
all_results = {}
best_overall_test_score = -np.inf
best_overall_combination = None
best_overall_trained_model = None
best_overall_X_test = None
best_overall_transformer = None

for model_name, model_config in models_to_tune.items():
    all_results[model_name] = {}
    estimator = model_config['estimator']
    param_distributions = model_config['param_grid']
    n_iter_search = model_config.get('n_iter', 8)
    print(f"\n\n=== train&tune {model_name} (Hybrid) ===")
    for fs_name in sorted(feature_sets.keys()):
        X_train_fs, X_test_fs = feature_sets[fs_name]
        print(f"\n--- tune {model_name} on fs: {fs_name} ({X_train_fs.shape[1]} features) ---")

        if X_train_fs is None or X_train_fs.shape[0] == 0: continue

        tuned_search = tune_model_hyperparameters(
            estimator,
            X_train_fs,
            y_train,
            param_grid=param_distributions,
            cv_strategy=cv_strategy,
            scoring=scoring_metric,
            search_method='RandomSearch',
            n_iter=n_iter_search,
            validation_split_keras=0.2
        )

        if tuned_search:
            best_model_for_combination = tuned_search.best_estimator_
            best_cv_score = tuned_search.best_score_
            best_params = tuned_search.best_params_
            print(f"best CV {scoring_metric} for {model_name} on {fs_name}: {best_cv_score:.4f}")
            test_metrics = evaluate_model(best_model_for_combination, X_test_fs, y_test, model_name, fs_name)
            all_results[model_name][fs_name] = {
                'best_cv_score': best_cv_score,
                'best_params': best_params,
                'test_metrics': test_metrics,
                'trained_model': best_model_for_combination,
                'transformer': feature_transformers.get(fs_name)
            }
            if test_metrics and test_metrics.get('f1_score', -np.inf) > best_overall_test_score:
                best_overall_test_score = test_metrics['f1_score']
                best_overall_combination = (model_name, fs_name)
                best_overall_trained_model = best_model_for_combination
                best_overall_X_test = X_test_fs
                best_overall_transformer = feature_transformers.get(fs_name)

print("\n\n=== results summary for all models ===")
if not all_results: pass
else:
    print("\nbest cv f1 scores:")
    print("-------------------------------------------------")
    for model_name, fs_results in all_results.items():
        print(f"\n{model_name}:")
        if fs_results:
            for fs_name in sorted(fs_results.keys()):
                result = fs_results[fs_name]
                cv_score = result.get('best_cv_score', float('nan'))
                print(f"  - {fs_name}: {cv_score:.4f}")
        else: pass

    print("\ntest results - f1:")
    print("----------------------------")
    best_f1_per_model = {}
    for model_name, fs_results in all_results.items():
        print(f"\n{model_name}:")
        if fs_results:
            best_test_f1_for_model = -np.inf
            best_fs_name_for_model = None

            for fs_name in sorted(fs_results.keys()):
                result = fs_results[fs_name]
                test_f1 = result.get('test_metrics', {}).get('f1_score', float('nan'))
                print(f"  - {fs_name}: {test_f1:.4f}")
                if not np.isnan(test_f1) and test_f1 > best_test_f1_for_model:
                    best_test_f1_for_model = test_f1
                    best_fs_name_for_model = fs_name
            if best_fs_name_for_model:
                best_f1_per_model[model_name] = (best_fs_name_for_model, best_test_f1_for_model)
        else: continue

    print("\n=== best combo based on f1's ===")
    if best_overall_combination:
        model_name, fs_name = best_overall_combination
        best_result = all_results[model_name][fs_name]
        test_metrics = best_result['test_metrics']

        print(f"best model: {model_name}")
        actual_feature_count = feature_sets[fs_name][0].shape[1] if fs_name in feature_sets and feature_sets[fs_name][0] is not None else 'N/A'
        print(f"best fs: {fs_name} ({actual_feature_count} features)")
        print(f"best cvf1 sc: {best_result['best_cv_score']:.4f}")
        print(f"test f1: {test_metrics['f1_score']:.4f}")
        print(f"test acc: {test_metrics['accuracy']:.4f}")
        print(f"test prec: {test_metrics['precision']:.4f}")
        print(f"test rec: {test_metrics['recall']:.4f}")
        print(f"params: {best_result['best_params']}\n")
        print(f"conf.m.:\n{np.array(test_metrics['confusion_matrix'])}")
    else: pass

MODEL_SAVE_DIR = os.path.join('..', 'models')
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
print("\n--- Saving Best Model Per Algorithm (Based on Test F1) ---")
if 'scaler' in locals() and scaler is not None:
    try:
        joblib.dump(scaler, os.path.join(MODEL_SAVE_DIR, 'hybrid_kaggle_m1_global_scaler.pkl'))
        print(f"   Saved global StandardScaler: {os.path.join(MODEL_SAVE_DIR, 'hybrid_kaggle_m1_global_scaler.pkl')}")
    except Exception as e:
        print(f"   Error saving global StandardScaler: {e}")
else:
    print("   Global StandardScaler not found or is None, skipping save.")
if 'best_f1_per_model' not in locals() or not best_result:
     print("Could not determine best feature set per model. ??")
else:
    for model_name, (best_fs_name_for_model, best_test_f1_for_model) in best_f1_per_model.items():
        print(f"\nProcessing {model_name}...")
        if model_name in all_results and best_fs_name_for_model in all_results[model_name]:
            best_combination_results = all_results[model_name][best_fs_name_for_model]
            model_to_save = best_combination_results.get('trained_model')
            transformer_to_save = best_combination_results.get('transformer')

            if model_to_save:
                is_keras_model = isinstance(model_to_save, KerasClassifier)
                file_extension = '.keras' if is_keras_model else '.pkl'
                model_filename = f'hybrid_kaggle_m1_{model_name.lower()}_best_model_{best_fs_name_for_model}{file_extension}'
                MODEL_SAVE_PATH_ALG = os.path.join(MODEL_SAVE_DIR, model_filename)
                try:
                    if is_keras_model:
                        model_to_save.model_.save(MODEL_SAVE_PATH_ALG)
                    else:
                        joblib.dump(model_to_save, MODEL_SAVE_PATH_ALG)
                    print(f"   Saved model: {MODEL_SAVE_PATH_ALG}")
                except Exception as e:
                    print(f"   Error saving {model_name} model to {MODEL_SAVE_PATH_ALG}: {e}")
            else:
                print(f"   No trained model found for {model_name} on {best_fs_name_for_model}.")
            if transformer_to_save and best_fs_name_for_model != 'Scaled_All':
                 transformer_filename = f'hybrid_kaggle_m1_selector_{best_fs_name_for_model}.pkl'
                 TRANSFORMER_SAVE_PATH = os.path.join(MODEL_SAVE_DIR, transformer_filename)
                 try:
                     joblib.dump(transformer_to_save, TRANSFORMER_SAVE_PATH)
                     print(f"   Saved feature selection transformer: {TRANSFORMER_SAVE_PATH}")
                 except Exception as e:
                    print(f"   Error saving transformer for {best_fs_name_for_model} to {TRANSFORMER_SAVE_PATH}: {e}")
            elif best_fs_name_for_model != 'Scaled_All':
                print(f"   Warning: Feature selection transformer not found for {best_fs_name_for_model}.")
        else:
            print(f"No valid results found for the best feature set '{best_fs_name_for_model}' for model {model_name}.")

print("\n--- all done!!! ---")


--- model training and randomsearchcv: ---


=== train&tune LightGBM (Hybrid) ===

--- tune LightGBM on fs: Scaled_All_CNN (65536 features) ---
\RandomSearch tuning (scoring='f1')... with 8 iterations
Fitting 3 folds for each of 8 candidates, totalling 24 fits
RandomSearch duration: 1256.39 seconds

best params:
{'subsample': 0.8, 'num_leaves': 60, 'n_estimators': 120, 'min_split_gain': 0.1, 'min_child_samples': 5, 'max_depth': 8, 'learning_rate': 0.1, 'colsample_bytree': 0.9}

best CV score:
0.9627694334532643
best CV f1 for LightGBM on Scaled_All_CNN: 0.9628
\LightGBM on the test set using Scaled_All_CNN.
duration: 0.1135 seconds
accuracy: 0.9360
precision: 0.9482
recall: 0.9683
f1 score: 0.9581

confusion matrix (LightGBM on Scaled_All_CNN):
[[ 51  10]
 [  6 183]]

--- tune LightGBM on fs: Scaled_Corr50%_CNN (32768 features) ---
\RandomSearch tuning (scoring='f1')... with 8 iterations
Fitting 3 folds for each of 8 candidates, totalling 24 fits
RandomSearch duration: 591.59 seconds
