In [21]:
# Importing neccesary libraries
import os
import cv2
import numpy as np
import joblib
import time
from tqdm import tqdm
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from IPython.display import display
from ipyfilechooser import FileChooser
import ipywidgets as widgets

In [23]:
# ---------- GUI INPUTS ----------
image_folder_picker = FileChooser(os.getcwd())
image_folder_picker.title = 'Select Image Folder'
image_folder_picker.show_only_dirs = True

label_folder_picker = FileChooser(os.getcwd())
label_folder_picker.title = 'Select Label Folder'
label_folder_picker.show_only_dirs = True

output_folder_picker = FileChooser(os.getcwd())
output_folder_picker.title = 'Select Output Folder'
output_folder_picker.show_only_dirs = True

output_name_text = widgets.Text(
    value='processed_output.pkl',
    placeholder='Enter output file name',
    description='Output Name:',
    layout=widgets.Layout(width='50%')
)

run_button = widgets.Button(description="Run Preprocessing + Train", button_style='success')
output = widgets.Output()

# ---------- DISPLAY WIDGETS ----------
display(image_folder_picker, label_folder_picker, output_folder_picker, output_name_text, run_button, output)

# ---------- BUTTON CALLBACK ----------
def on_run_clicked(b):
    with output:
        output.clear_output()

        IMAGE_DIR = image_folder_picker.selected_path
        LABEL_DIR = label_folder_picker.selected_path
        OUTPUT_DIR = output_folder_picker.selected_path
        OUTPUT_NAME = output_name_text.value.strip()

        if not IMAGE_DIR or not LABEL_DIR or not OUTPUT_DIR or not OUTPUT_NAME:
            print("❌ Please complete all inputs: folders and output name.")
            return

        print(f"📁 Image Folder: {IMAGE_DIR}")
        print(f"📁 Label Folder: {LABEL_DIR}")
        print(f"📁 Output Folder: {OUTPUT_DIR}")
        print(f"💾 Output File Name: {OUTPUT_NAME}")

        start_time = time.time()
        all_features = []
        all_labels = []

        image_files = sorted(os.listdir(IMAGE_DIR))
        label_files = sorted(os.listdir(LABEL_DIR))

        for img_file, lbl_file in zip(image_files, label_files):
            img_path = os.path.join(IMAGE_DIR, img_file)
            lbl_path = os.path.join(LABEL_DIR, lbl_file)

            img = cv2.imread(img_path)
            label = cv2.imread(lbl_path, cv2.IMREAD_UNCHANGED)

            if img is None or label is None:
                print(f"⚠️ Skipping {img_file}/{lbl_file} - Failed to load")
                continue

            if img.shape[:2] != label.shape[:2]:
                print(f"🔧 Resizing {lbl_file} to match {img_file}")
                label = cv2.resize(label, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_NEAREST)

            all_features.append(img.reshape(-1, 3))
            all_labels.append(label.flatten())

        # Combine and split
        features = np.concatenate(all_features)
        labels = np.concatenate(all_labels)

        X_train, X_test, y_train, y_test = train_test_split(
            features, labels, test_size=0.2, stratify=labels, random_state=42
        )


        # ---------- XGBoost Training ----------
        print("🛠️ Training XGBoost model...")
        model = XGBClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=6,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            n_jobs=-1,
            eval_metric='logloss',
            early_stopping_rounds=10
        )

        with tqdm(total=100, desc="Training Progress") as pbar:
            model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
            pbar.update(100)

        print("✅ XGBoost training complete.")

         # Save using joblib as .pkl
        output_path = os.path.join(OUTPUT_DIR, OUTPUT_NAME)
        joblib.dump(model, output_path)

        print(f"✅ Processing complete in {time.time() - start_time:.2f} seconds.")
        print(f"📊 Train samples: {len(X_train)}, Test samples: {len(X_test)}")
        print(f"💾 Output saved to: {output_path}")
        # Evaluate
        y_pred = model.predict(X_test)
        print(f"\n✅ Accuracy: {accuracy_score(y_test, y_pred):.4f}")
        print("\n📋 Classification Report:")
        print(classification_report(y_test, y_pred))
        print("\n🧩 Confusion Matrix:")
        print(confusion_matrix(y_test, y_pred))
        
        print(f"🕒 Total runtime: {time.time() - start_time:.2f} seconds")

        
# Bind button
run_button.on_click(on_run_clicked)

FileChooser(path='E:\Faruq\Business\DEEPRESENSE\Experimental\binary classification\water body\Model\XGBoost Al…

FileChooser(path='E:\Faruq\Business\DEEPRESENSE\Experimental\binary classification\water body\Model\XGBoost Al…

FileChooser(path='E:\Faruq\Business\DEEPRESENSE\Experimental\binary classification\water body\Model\XGBoost Al…

Text(value='processed_output.pkl', description='Output Name:', layout=Layout(width='50%'), placeholder='Enter …

Button(button_style='success', description='Run Preprocessing + Train', style=ButtonStyle())

Output()