In [1]:
import os
import numpy as np
import cv2
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import joblib
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, train_test_split
from sklearn.base import ClassifierMixin
from sklearn.metrics import f1_score, accuracy_score, classification_report, precision_score, recall_score, confusion_matrix
from tqdm import tqdm
import warnings
import time
from skimage.feature import local_binary_pattern, hog 
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA
import lightgbm as lgb
warnings.filterwarnings('ignore')
from keras.models import Sequential, Model
from keras.layers import (
    Input, Conv2D, MaxPooling2D, BatchNormalization,
    Dropout, Flatten, Dense, LeakyReLU, ReLU
)

class SklearnKerasClassifier(KerasClassifier, ClassifierMixin):
    def __init__(self, model=None, **kwargs):
        super().__init__(model=model, **kwargs)

    @property
    def _tags(self): return self.model._tags if hasattr(self.model, "_tags") else {"binary_only": True}


def create_custom_cnn(
    input_shape=None,
    conv_blocks=((16, (3, 3)),),
    dense_layers=(128,), # CNN'in sonundaki dense katmanları (sınıflandırma öncesi)
    dropout_rate=0.3,
    activation='leaky_relu',
    num_classes=1, # İkili sınıflandırma için 1
    output_activation='sigmoid', # İkili sınıflandırma için sigmoid
    learning_rate=0.00002,
    meta=None 
):
    """Conv -> BN -> Activation -> Pool -> Dropout -> Flatten -> Dense -> Output"""
    if input_shape is None:
        if meta is None or "X_shape_" not in meta: raise ValueError("meta or input_shape parameter is missing")
        input_shape = meta["X_shape_"][1:] 

    model = Sequential(name="Custom_CNN")
    model.add(Input(shape=input_shape))

    for filters, kernel_size in conv_blocks:
        model.add(Conv2D(filters, kernel_size, padding='same'))
        model.add(BatchNormalization())
        if activation == 'leaky_relu': model.add(LeakyReLU(alpha=0.1))
        else: model.add(ReLU())
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(dropout_rate))
    model.add(Flatten()) 

    for units in dense_layers:
        model.add(Dense(units))
        model.add(BatchNormalization())
        if activation == 'leaky_relu': model.add(LeakyReLU(alpha=0.1))
        else: model.add(ReLU())
        model.add(Dropout(dropout_rate))
    
    # Sınıflandırma katmanı
    model.add(Dense(num_classes, activation=output_activation))

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

def load_prep_4_cnn(data_dir, target_size=(128, 128)):
    all_images = []
    all_labels = []
    class_dirs = {'fire_images': 1, 'non_fire_images': 0}

    print(f"Loading and preprocessing images from {data_dir}...")
    for class_name, label in class_dirs.items():
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            print(f"Warning: Class directory not found: {class_path}. Skipping.")
            continue
        for img_name in tqdm(os.listdir(class_path), desc=f"Processing {class_name}"):
            img_path = os.path.join(class_path, img_name)
            if not img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                continue
            try:
                img = cv2.imread(img_path)
                if img is None: continue
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_resized = cv2.resize(img, target_size)
                img_normalized = img_resized.astype(np.float32) / 255.0
                all_images.append(img_normalized)
                all_labels.append(label)
            except Exception as e:
                # print(f"Error processing {img_path}: {e}") # Çok fazla çıktı verebilir
                continue
    print(f"Loaded {len(all_images)} images.")
    return np.array(all_images), np.array(all_labels)

def create_custom_mlp(hidden_layer_1_neurons=128, hidden_layer_2_neurons=64,
                        dropout_rate=0.3, activation='leaky_relu', learning_rate=0.001,
                        meta=None):
    n_features_in = meta["n_features_in_"]

    model = Sequential()
    model.add(Input(shape=(n_features_in,)))
    model.add(Dense(hidden_layer_1_neurons))
    model.add(BatchNormalization())
    if activation == 'leaky_relu': model.add(LeakyReLU(alpha=0.1))
    else: model.add(tf.keras.layers.ReLU())
    model.add(Dropout(dropout_rate))
    if hidden_layer_2_neurons is not None and hidden_layer_2_neurons > 0:
        model.add(Dense(hidden_layer_2_neurons))
        model.add(BatchNormalization())
        if activation == 'leaky_relu': model.add(LeakyReLU(alpha=0.1))
        else: model.add(tf.keras.layers.ReLU())
        model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def split_data(features_array, labels_array, test_size=0.2, random_state=42):
    if features_array.shape[0] == 0:
        print("No features to split.")
        return None, None, None, None
    X_train, X_test, y_train, y_test = train_test_split(
        features_array,
        labels_array,
        test_size=test_size,
        random_state=random_state,
        stratify=labels_array
    )

    print(f"Training features shape: {X_train.shape}")
    print(f"Testing features shape: {X_test.shape}")
    print(f"Training labels shape: {y_train.shape}")
    print(f"Testing labels shape: {y_test.shape}")
    return X_train, X_test, y_train, y_test

def scale_features(X_train, X_test):
    if X_train is None or X_test is None or X_train.shape[0] == 0:
        print("No data for scaling.")
        return None, None, None
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    print("Features scaled.")
    return X_train_scaled, X_test_scaled, scaler

def perform_correlation_selection(X_train, y_train, X_test, k_features):
    if X_train is None or X_test is None or X_train.shape[0] == 0:
        print("No data for correlation selection.")
        return X_train, X_test, None
    
    n_total_features = X_train.shape[1]
    k_features_int = k_features
    
    if isinstance(k_features, str) and k_features.endswith('%'):
        try:
            percentage = float(k_features[:-1]) / 100.0
            k_features_int = max(1, int(n_total_features * percentage))
        except ValueError:
            print(f"Invalid percentage string for k_features: {k_features}")
            return X_train, X_test, None
    elif k_features == 'all':
        return X_train, X_test, None # No selection
    elif isinstance(k_features, int) and k_features > 0:
        k_features_int = min(k_features, n_total_features)
    else:
        print(f"Invalid k_features value: {k_features}")
        return X_train, X_test, None

    if k_features_int <= 0 or k_features_int >= n_total_features:
        print(f"KBest: Number of features to select ({k_features_int}) is out of valid range. Skipping selection.")
        return X_train, X_test, None
        
    selector = SelectKBest(score_func=f_classif, k=k_features_int)
    selector.fit(X_train, y_train)
    X_train_selected = selector.transform(X_train)
    X_test_selected = selector.transform(X_test)

    print(f"Original feature shape: {X_train.shape}")
    print(f"Selected feature shape (Correlation {k_features}): {X_train_selected.shape}")
    return X_train_selected, X_test_selected, selector

def perform_rfe_selection(X_train, y_train, X_test, n_features_to_select, step=0.1, estimator=None):
    if X_train is None or X_test is None or X_train.shape[0] == 0:
        print("No data for RFE selection.")
        return X_train, X_test, None
    
    n_total_features = X_train.shape[1]
    n_features_int = n_features_to_select
    if estimator is None: estimator = LogisticRegression(solver='liblinear', random_state=42, max_iter=2000)

    if isinstance(n_features_to_select, str) and n_features_to_select.endswith('%'):
        try:
            percentage = float(n_features_to_select[:-1]) / 100.0
            n_features_int = max(1, int(n_total_features * percentage))
        except ValueError:
            print(f"Invalid percentage string for n_features_to_select: {n_features_to_select}")
            return X_train, X_test, None
    elif isinstance(n_features_to_select, int) and n_features_to_select > 0:
        n_features_int = min(n_features_to_select, n_total_features)
    elif n_features_to_select == 'auto': # RFE'de 'auto' özel bir durum, burada desteklenmiyor
        print("RFE: 'auto' n_features_to_select not supported for explicit percentage/count logic.")
        return X_train, X_test, None
    else:
        print(f"Invalid n_features_to_select value: {n_features_to_select}")
        return X_train, X_test, None
    
    if n_features_int <= 0 or n_features_int >= n_total_features:
        print(f"RFE: Number of features to select ({n_features_int}) is out of valid range. Skipping selection.")
        return X_train, X_test, None

    try:
        rfe = RFE(estimator=estimator, n_features_to_select=n_features_int, step=step)
        rfe.fit(X_train, y_train)
        X_train_selected = rfe.transform(X_train)
        X_test_selected = rfe.transform(X_test)
        print(f"Original feature shape: {X_train.shape}")
        print(f"Selected feature shape (RFE {n_features_to_select}): {X_train_selected.shape}")
        return X_train_selected, X_test_selected, rfe
    except Exception as e:
        print(f"Error during RFE selection: {e}")
        return X_train, X_test, None

def tune_model_hyperparameters(model_estimator, X_train, y_train, param_grid, cv_strategy, scoring='f1', search_method='RandomSearch', n_iter=10, validation_split_keras=0.2):
    if X_train is None or y_train is None or X_train.shape[0] == 0:
        print("No data for hyperparameter tuning.")
        return None
    print(f"Starting {search_method} tuning (scoring='{scoring}')... with {n_iter} iterations")
    start_time = time.time()
    fit_params = {}
    if isinstance(model_estimator, KerasClassifier):
        fit_params['validation_split'] = validation_split_keras
        # KerasClassifier'da verbose RandomSearchCV tarafından kontrol edildiği için burada ayarlamaya gerek yok.
        # callbacks de zaten estimator tanımında verilmiş.
        
    if search_method == 'RandomSearch':
         search_cv = RandomizedSearchCV(
            estimator=model_estimator,
            param_distributions=param_grid,
            n_iter=n_iter,
            cv=cv_strategy,
            scoring=scoring,
            n_jobs=1, # Keras modeli için n_jobs > 1 sorun çıkarabilir
            verbose=1,
            random_state=42
         )
    else:
        print(f"Search method '{search_method}' not supported.")
        return None
    
    try:
        search_cv.fit(X_train, y_train, **fit_params)
    except Exception as e:
        print(f"Error during RandomizedSearchCV fit: {e}")
        return None

    end_time = time.time()
    print(f"{search_method} duration: {end_time - start_time:.2f} seconds")
    print("\nBest parameters:")
    print(search_cv.best_params_)
    print("\nBest CV score:")
    print(search_cv.best_score_)
    return search_cv

def evaluate_model(model, X_test, y_test, model_name="Model", feature_set_name="Unknown Feature Set"):
    if model is None or X_test is None or y_test is None or X_test.shape[0] == 0:
        print(f"{model_name} evaluation skipped on {feature_set_name}: model not trained or test data is empty.")
        return {}
    print(f"Evaluating {model_name} on the test set using {feature_set_name}...")
    start_time = time.time()
    
    # KerasClassifier ve TF.keras.Model için özel predict handling
    if isinstance(model, KerasClassifier):
        # KerasClassifier'ın predict metodu doğrudan tahminleri döndürür
        y_pred = model.predict(X_test)
        # binary_only=True etiketi nedeniyle çıktı 0 veya 1 olacaktır, ancak yine de emin olmak için kontrol edelim
        if len(y_pred.shape) > 1 and y_pred.shape[1] > 1: # multi-class
            y_pred = np.argmax(y_pred, axis=1)
        elif len(y_pred.shape) > 1 and y_pred.shape[1] == 1: # binary, often floats
             y_pred = (y_pred > 0.5).astype(int)
        else: # 1D array of floats for binary classification
            y_pred = (y_pred > 0.5).astype(int)
        
    elif isinstance(model, tf.keras.Model):
        # Doğrudan Keras modeli ise predict_proba gibi davranır
        y_pred_proba = model.predict(X_test, verbose=0)
        y_pred = (y_pred_proba > 0.5).astype(int)
    else: # Scikit-learn modelleri
        y_pred = model.predict(X_test)

    end_time = time.time()
    print(f"Prediction duration: {end_time - start_time:.4f} seconds")
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    conf_matrix = confusion_matrix(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"\nConfusion Matrix ({model_name} on {feature_set_name}):")
    print(conf_matrix)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'confusion_matrix': conf_matrix.tolist()
    }

def perform_pca_dimension_reduction(X_train, X_test, n_components):
    if X_train is None or X_test is None or X_train.shape[0] == 0:
        print("No data for PCA reduction.")
        return None, None, None
    try:        
        pca = PCA(n_components=n_components, random_state=42)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)

        print(f"Original feature shape: {X_train.shape}")
        print(f"PCA transformed feature shape: {X_train_pca.shape}")
        print(f"Variance ratio with {pca.n_components_} components: {np.sum(pca.explained_variance_ratio_):.4f}")

        return X_train_pca, X_test_pca, pca
    except Exception as e:
        print(f"Error during PCA: {e}")
        return X_train, X_test, None # PCA başarısız olursa orijinal veriyi döndür

def create_cnn_feature_extractor(cnn_model_architecture):
    flatten_layer = None
    for layer in cnn_model_architecture.layers:
        if isinstance(layer, tf.keras.layers.Flatten):
            flatten_layer = layer
            break
            
    if flatten_layer is None:
        raise ValueError("Flatten layer not found in the CNN model architecture. Cannot create feature extractor.")
    
    # Flatten katmanının çıktısını veren bir model oluştur
    feature_extractor_model = Model(inputs=cnn_model_architecture.inputs, outputs=flatten_layer.output)
    return feature_extractor_model


# --- Ana Çalıştırma Kısmı ---
data_directory = os.path.join('..', 'data_subsets', 'fire_dataset')
target_image_width = 128
target_image_height = 128

# 1. Görüntüleri Yükle ve Ön İşle
X_images, y_labels = load_prep_4_cnn(data_directory, target_size=(target_image_width, target_image_height))
if X_images.shape[0] == 0:
    print("No images loaded. Exiting.")
    exit()

# 2. CNN Modelini Oluştur ve Eğit (Sınıflandırma Modeli Olarak)
print("\n--- Initializing and training the CNN classification model ---")
full_cnn_model = create_custom_cnn(
    input_shape=X_images.shape[1:],
    dense_layers=(128,), # CNN'in sonundaki Dense katmanları (sınıflandırma öncesi)
    num_classes=1, # İkili sınıflandırma
    output_activation='sigmoid', # İkili sınıflandırma
)

# CNN Eğitim verisi ve Doğrulama verisini ayır
X_train_cnn, X_val_cnn, y_train_cnn, y_val_cnn = train_test_split(
    X_images, y_labels, test_size=0.2, random_state=42, stratify=y_labels
)

print("Starting CNN training...")
cnn_history = full_cnn_model.fit(
    X_train_cnn, y_train_cnn,
    epochs=50, # Yeterli bir epoch sayısı seçin, EarlyStopping ile duracaktır
    batch_size=32,
    validation_data=(X_val_cnn, y_val_cnn),
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
    verbose=1
)
print("CNN training complete.")

# 3. Eğitilmiş CNN Modelinden Özellik Çıkarıcıyı Türet
print("\n--- Creating CNN Feature Extractor from the trained CNN model ---")
cnn_feature_extractor = create_cnn_feature_extractor(full_cnn_model)

# 4. Eğitilmiş CNN Özellik Çıkarıcı ile Tüm Görüntülerden Özellikleri Elde Et
print("\n--- Extracting CNN features for hybrid model training ---")
features_array_orig = cnn_feature_extractor.predict(X_images, verbose=0)

# 5. Hibrit Modeller için Veriyi Böl
# Bu X_train_orig ve X_test_orig, CNN'den gelen özelliklerdir.
X_train_orig, X_test_orig, y_train, y_test = split_data(features_array_orig, y_labels, test_size=0.25, random_state=42)

if X_train_orig is None or X_train_orig.shape[0] == 0:
    print("No features for hybrid model training. Exiting.")
    exit()

# 6. CNN Özelliklerini Ölçekle
print("\n--- Scaling CNN features for hybrid models ---")
X_train_scaled, X_test_scaled, scaler = scale_features(X_train_orig, X_test_orig)
feature_sets = {}
feature_transformers = {}

if X_train_scaled is not None:
    feature_sets['Scaled_All_CNN'] = (X_train_scaled, X_test_scaled)
    feature_transformers['Scaled_All_CNN'] = scaler
else:
    print("Scaled features are None. Exiting.")
    exit()

# 7. Özellik Seçimi ve Boyut Azaltma (Correlation, RFE, PCA)
print("\n--- Performing feature selection & PCA ---")
original_feature_count = X_train_scaled.shape[1]

# Korelasyon tabanlı özellik seçimi
corr_feature_percentages = ['75%', '50%']
for percentage_str in corr_feature_percentages:
    print(f"\nPerforming correlation selection: {percentage_str}...")
    try:
        X_train_corr, X_test_corr, corr_selector = perform_correlation_selection(
            X_train_scaled, y_train, X_test_scaled, k_features=percentage_str
        )
        if X_train_corr is not None and X_train_corr.shape[1] < original_feature_count:
            feature_sets[f'Scaled_Corr{percentage_str}_CNN'] = (X_train_corr, X_test_corr)
            feature_transformers[f'Scaled_Corr{percentage_str}_CNN'] = corr_selector
    except Exception as e:
        print(f"Error during correlation selection for {percentage_str}: {e}")

# RFE özellik seçimi
rfe_feature_percentages = ['75%', '50%']
rfe_step_val = 0.1
rfe_estimator = LogisticRegression(solver='liblinear', random_state=42, max_iter=2000)

for percentage_str in rfe_feature_percentages:
    print(f"\nPerforming RFE selection with {percentage_str} (step={rfe_step_val})...")
    try:
        X_train_rfe, X_test_rfe, rfe_selector = perform_rfe_selection(
            X_train_scaled, y_train, X_test_scaled, n_features_to_select=percentage_str, step=rfe_step_val, estimator=rfe_estimator
        )
        if X_train_rfe is not None and X_train_rfe.shape[1] < original_feature_count:
            feature_sets[f'Scaled_RFE{percentage_str}_CNN'] = (X_train_rfe, X_test_rfe)
            feature_transformers[f'Scaled_RFE{percentage_str}_CNN'] = rfe_selector
    except Exception as e:
        print(f"Error during RFE selection for {percentage_str}: {e}")

# PCA boyut azaltma
pca_components = [0.95, 500] # 0.95 varyansın korunması, 500 bileşen
for n_comp in pca_components:
    print(f"\nPerforming PCA with n_components={n_comp}...")
    try:
        X_train_pca, X_test_pca, pca_transformer = perform_pca_dimension_reduction(X_train_scaled, X_test_scaled, n_components=n_comp)
        if X_train_pca is not None and (isinstance(n_comp, int) and X_train_pca.shape[1] < original_feature_count or isinstance(n_comp, float)):
            fs_name_suffix = f"{int(n_comp*100)}%" if isinstance(n_comp, float) else str(n_comp)
            fs_name = f'Scaled_PCA_{fs_name_suffix}_CNN'
            feature_sets[fs_name] = (X_train_pca, X_test_pca)
            feature_transformers[fs_name] = pca_transformer
    except Exception as e:
        print(f"Error during PCA for n_components={n_comp}: {e}")

print("\n--- Available Feature Sets for Tuning ---")
for name, (X_train_fs, _) in feature_sets.items():
    print(f"- {name}: {X_train_fs.shape[1]} features")

# 8. Hibrit Modelleri Eğit ve Hiperparametre Ayarı Yap
print("\n--- Model Training and RandomizedSearchCV for Hybrid Models ---")
models_to_tune = {
     'LightGBM': {
        'estimator': lgb.LGBMClassifier(random_state=42, objective='binary', metric='binary_logloss', verbosity=-1, n_jobs=4),
        'param_grid': {
            'n_estimators': [50, 80, 120], 
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [8, 15],
            'num_leaves': [20, 40, 60],
            'subsample': [0.8, 0.9],
            'colsample_bytree': [0.8, 0.9, 1.0],
            'min_split_gain': [0.1],
            'min_child_samples': [5]
    }
    },
    'SVM': {
        'estimator': SVC(random_state=42, probability=True), # proba True for predict_proba in test
        'param_grid': {
            'C': [0.1, 1, 10, 50],
            'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
            'kernel': ['rbf', 'linear']
        }
    },
    'Custom_MLP': {
        'estimator': SklearnKerasClassifier(
            model=create_custom_mlp,
            loss=tf.keras.losses.BinaryCrossentropy(),
            epochs=100,
            batch_size=32,
            verbose=0, # KerasClassifier verbose'u RandomSearchCV tarafından kontrol edilir
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, verbose=0, restore_best_weights=True)]
        ),
        'param_grid': {
            'model__hidden_layer_1_neurons': [64, 128, 256],
            'model__hidden_layer_2_neurons': [0, 64, 128],
            'model__dropout_rate': [0.2, 0.4, 0.6],
            'model__activation': ['relu', 'leaky_relu'],
            'optimizer__learning_rate': [0.001, 0.005, 0.01]
        }
    }
}

cv_strategy = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
scoring_metric = 'f1'
all_results = {}
best_overall_test_score = -np.inf
best_overall_combination = None
best_overall_trained_model = None
best_overall_X_test = None
best_overall_transformer = None

for model_name, model_config in models_to_tune.items():
    all_results[model_name] = {}
    estimator = model_config['estimator']
    param_distributions = model_config['param_grid']
    n_iter_search = model_config.get('n_iter', 8) # Varsayılan iterasyon sayısı
    print(f"\n\n=== Training & Tuning {model_name} (Hybrid Model) ===")
    for fs_name in sorted(feature_sets.keys()):
        X_train_fs, X_test_fs = feature_sets[fs_name]
        print(f"\n--- Tuning {model_name} on Feature Set: {fs_name} ({X_train_fs.shape[1]} features) ---")

        if X_train_fs is None or X_train_fs.shape[0] == 0:
            print(f"Skipping {fs_name} due to empty training data.")
            continue

        tuned_search = tune_model_hyperparameters(
            estimator,
            X_train_fs,
            y_train,
            param_grid=param_distributions,
            cv_strategy=cv_strategy,
            scoring=scoring_metric,
            search_method='RandomSearch',
            n_iter=n_iter_search,
            validation_split_keras=0.2
        )

        if tuned_search:
            best_model_for_combination = tuned_search.best_estimator_
            best_cv_score = tuned_search.best_score_
            best_params = tuned_search.best_params_
            print(f"Best CV {scoring_metric} for {model_name} on {fs_name}: {best_cv_score:.4f}")
            test_metrics = evaluate_model(best_model_for_combination, X_test_fs, y_test, model_name, fs_name)
            all_results[model_name][fs_name] = {
                'best_cv_score': best_cv_score,
                'best_params': best_params,
                'test_metrics': test_metrics,
                'trained_model': best_model_for_combination,
                'transformer': feature_transformers.get(fs_name)
            }
            if test_metrics and test_metrics.get('f1_score', -np.inf) > best_overall_test_score:
                best_overall_test_score = test_metrics['f1_score']
                best_overall_combination = (model_name, fs_name)
                best_overall_trained_model = best_model_for_combination
                best_overall_X_test = X_test_fs # Bu bilgi test notebook'unda kullanılmaz
                best_overall_transformer = feature_transformers.get(fs_name)

print("\n\n=== Results Summary for All Hybrid Models ===")
if not all_results:
    print("No results available for hybrid models.")
else:
    print("\nBest CV F1 Scores:")
    print("-------------------------------------------------")
    for model_name, fs_results in all_results.items():
        print(f"\n{model_name}:")
        if fs_results:
            for fs_name in sorted(fs_results.keys()):
                result = fs_results[fs_name]
                cv_score = result.get('best_cv_score', float('nan'))
                print(f"  - {fs_name}: {cv_score:.4f}")
        else:
            print("  No results for this model.")

    print("\nTest Results - F1 Score:")
    print("----------------------------")
    best_f1_per_model = {}
    for model_name, fs_results in all_results.items():
        print(f"\n{model_name}:")
        if fs_results:
            best_test_f1_for_model = -np.inf
            best_fs_name_for_model = None

            for fs_name in sorted(fs_results.keys()):
                result = fs_results[fs_name]
                test_f1 = result.get('test_metrics', {}).get('f1_score', float('nan'))
                print(f"  - {fs_name}: {test_f1:.4f}")
                if not np.isnan(test_f1) and test_f1 > best_test_f1_for_model:
                    best_test_f1_for_model = test_f1
                    best_fs_name_for_model = fs_name
            if best_fs_name_for_model:
                best_f1_per_model[model_name] = (best_fs_name_for_model, best_test_f1_for_model)
        else:
            print("  No results for this model.")

    print("\n=== Best Overall Combination Based on Test F1 ===")
    if best_overall_combination:
        model_name, fs_name = best_overall_combination
        best_result = all_results[model_name][fs_name]
        test_metrics = best_result['test_metrics']

        print(f"Best Model: {model_name}")
        actual_feature_count = feature_sets[fs_name][0].shape[1] if fs_name in feature_sets and feature_sets[fs_name][0] is not None else 'N/A'
        print(f"Best Feature Set: {fs_name} ({actual_feature_count} features)")
        print(f"Best CV F1 Score: {best_result['best_cv_score']:.4f}")
        print(f"Test F1 Score: {test_metrics['f1_score']:.4f}")
        print(f"Test Accuracy: {test_metrics['accuracy']:.4f}")
        print(f"Test Precision: {test_metrics['precision']:.4f}")
        print(f"Test Recall: {test_metrics['recall']:.4f}")
        print(f"Best Parameters: {best_result['best_params']}\n")
        print(f"Confusion Matrix:\n{np.array(test_metrics['confusion_matrix'])}")
    else:
        print("No best overall combination found.")

# 9. Modelleri ve Dönüştürücüleri Kaydet
MODEL_SAVE_DIR = os.path.join('..', 'models')
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
print("\n--- Saving Best Model Per Algorithm (Based on Test F1) ---")

# Global StandardScaler'ı kaydet
if 'scaler' in locals() and scaler is not None:
    try:
        joblib.dump(scaler, os.path.join(MODEL_SAVE_DIR, 'hybrid_kaggle_m1_global_scaler.pkl'))
        print(f"   Saved global StandardScaler: {os.path.join(MODEL_SAVE_DIR, 'hybrid_kaggle_m1_global_scaler.pkl')}")
    except Exception as e:
        print(f"   Error saving global StandardScaler: {e}")
else:
    print("   Global StandardScaler not found or is None, skipping save.")

# Eğitilmiş CNN Özellik Çıkarıcıyı Kaydetme
if 'cnn_feature_extractor' in locals() and cnn_feature_extractor is not None:
    try:
        # Önemli: Bu, eğitilmiş CNN'den türetilen özellik çıkarıcıdır.
        cnn_feature_extractor_filename = 'hybrid_kaggle_m1_cnn_feature_extractor.keras' 
        CNN_FEATURE_EXTRACTOR_SAVE_PATH = os.path.join(MODEL_SAVE_DIR, cnn_feature_extractor_filename)
        
        cnn_feature_extractor.save(CNN_FEATURE_EXTRACTOR_SAVE_PATH)
        print(f"   Saved trained CNN Feature Extractor: {CNN_FEATURE_EXTRACTOR_SAVE_PATH}")
    except Exception as e:
        print(f"   Error saving trained CNN Feature Extractor: {e}")
else:
    print("   Trained CNN Feature Extractor object not found or is None. Skipping save.")

if 'best_f1_per_model' not in locals() or not best_f1_per_model: # Corrected variable name
     print("Could not determine best feature set per model. Skipping model/transformer saves.")
else:
    for model_name, (best_fs_name_for_model, best_test_f1_for_model) in best_f1_per_model.items():
        print(f"\nProcessing {model_name}...")
        if model_name in all_results and best_fs_name_for_model in all_results[model_name]:
            best_combination_results = all_results[model_name][best_fs_name_for_model]
            model_to_save = best_combination_results.get('trained_model')
            transformer_to_save = best_combination_results.get('transformer')

            if model_to_save:
                is_keras_model = isinstance(model_to_save, KerasClassifier)
                file_extension = '.keras' if is_keras_model else '.pkl'
                model_filename = f'hybrid_kaggle_m1_{model_name.lower()}_best_model_{best_fs_name_for_model}{file_extension}'
                MODEL_SAVE_PATH_ALG = os.path.join(MODEL_SAVE_DIR, model_filename)
                try:
                    if is_keras_model:
                        # Scikeras KerasClassifier objesi içindeki asıl Keras modelini kaydet
                        model_to_save.model_.save(MODEL_SAVE_PATH_ALG) 
                    else:
                        joblib.dump(model_to_save, MODEL_SAVE_PATH_ALG)
                    print(f"   Saved model: {MODEL_SAVE_PATH_ALG}")
                except Exception as e:
                    print(f"   Error saving {model_name} model to {MODEL_SAVE_PATH_ALG}: {e}")
            else:
                print(f"   No trained model found for {model_name} on {best_fs_name_for_model}.")
            
            # Transformer'ı kaydet (Scaled_All_CNN için zaten scaler kaydedildi, bu yüzden kontrol ekliyoruz)
            if transformer_to_save and best_fs_name_for_model != 'Scaled_All_CNN': 
                 transformer_filename = f'hybrid_kaggle_m1_selector_{best_fs_name_for_model}.pkl'
                 TRANSFORMER_SAVE_PATH = os.path.join(MODEL_SAVE_DIR, transformer_filename)
                 try:
                     joblib.dump(transformer_to_save, TRANSFORMER_SAVE_PATH)
                     print(f"   Saved feature selection transformer: {TRANSFORMER_SAVE_PATH}")
                 except Exception as e:
                    print(f"   Error saving transformer for {best_fs_name_for_model} to {TRANSFORMER_SAVE_PATH}: {e}")
            elif best_fs_name_for_model != 'Scaled_All_CNN': # Bu durumda bir transformer beklendiği halde bulunamadı uyarısı
                print(f"   Warning: Feature selection transformer not found for {best_fs_name_for_model}.")
        else:
            print(f"No valid results found for the best feature set '{best_fs_name_for_model}' for model {model_name}.")

print("\n--- All Hybrid Model Training and Saving Complete ---")

Loading and preprocessing images from ..\data_subsets\fire_dataset...


Processing fire_images: 100%|██████████| 755/755 [00:07<00:00, 98.94it/s] 
Processing non_fire_images: 100%|██████████| 244/244 [00:03<00:00, 61.54it/s] 


Loaded 998 images.

--- Initializing and training the CNN classification model ---
Starting CNN training...
Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 171ms/step - accuracy: 0.6386 - loss: 0.7505 - val_accuracy: 0.7950 - val_loss: 0.4714
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 186ms/step - accuracy: 0.8686 - loss: 0.3426 - val_accuracy: 0.7750 - val_loss: 0.4494
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 184ms/step - accuracy: 0.9239 - loss: 0.2313 - val_accuracy: 0.8000 - val_loss: 0.4132
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 186ms/step - accuracy: 0.9211 - loss: 0.2422 - val_accuracy: 0.8150 - val_loss: 0.3801
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 188ms/step - accuracy: 0.9371 - loss: 0.1905 - val_accuracy: 0.8150 - val_loss: 0.3678
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 202ms/s