In [None]:
import os
import numpy as np
import pickle
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from xgboost import XGBClassifier

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model

DATASET_DIR = "data"
CLASSES = ["FloodSatellight", "FloodStreet", "RoadData"]
IMAGE_SIZE = (224, 224)

base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)

def get_all_images(folder):
    img_files = []
    for root, dirs, files in os.walk(folder):
        for f in files:
            if f.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_files.append(os.path.join(root, f))
    return img_files

features = []
labels = []

for label, cls in enumerate(CLASSES):
    folder = os.path.join(DATASET_DIR, cls)
    if not os.path.exists(folder):
        print(f"Folder not found: {folder}")
        continue

    img_files = get_all_images(folder)
    if len(img_files) == 0:
        print(f"No images found in {folder} or its subfolders")
        continue
    else:
        print(f"{cls}: {len(img_files)} images found in {folder} and subfolders")

    for path in tqdm(img_files, desc=f"Processing {cls}"):
        try:
            img = load_img(path, target_size=IMAGE_SIZE)
            img_array = img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = preprocess_input(img_array)

            feature_vector = feature_extractor.predict(img_array, verbose=0)
            features.append(feature_vector.flatten())
            labels.append(label)
        except Exception as e:
            print(f"Error loading {path}: {e}")

X = np.array(features)
y = np.array(labels)

if X.shape[0] == 0:
    raise ValueError("No features extracted! Check your dataset paths and image files.")

print(f"\nTotal images processed: {len(X)}")
print(f"Feature vector shape: {X.shape}")

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=0.95)
X_pca = pca.fit_transform(X_scaled)

print(f"Shape after PCA: {X_pca.shape}")

X_train, X_test, y_train, y_test = train_test_split(
    X_pca, y, test_size=0.2, random_state=42
)

ml_model = XGBClassifier(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    use_label_encoder=False,
    eval_metric='mlogloss'
)
ml_model.fit(X_train, y_train)

y_pred = ml_model.predict(X_test)
print("\nML + CNN Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=CLASSES))

os.makedirs("models", exist_ok=True)

pickle.dump(ml_model, open("models/ml_classifier.pkl", "wb"))
pickle.dump(pca, open("models/pca.pkl", "wb"))
pickle.dump(scaler, open("models/scaler.pkl", "wb"))
feature_extractor.save("models/resnet50_feature_extractor.h5")

print("\nSaved all models in /models/")


FloodSatellight: 580 images found in data/FloodSatellight and subfolders


Processing FloodSatellight: 100%|██████████| 580/580 [03:38<00:00,  2.65it/s]


FloodStreet: 882 images found in data/FloodStreet and subfolders


Processing FloodStreet: 100%|██████████| 882/882 [05:13<00:00,  2.81it/s]


RoadData: 2009 images found in data/RoadData and subfolders


Processing RoadData: 100%|██████████| 2009/2009 [11:52<00:00,  2.82it/s]



Total images processed: 3471
Feature vector shape: (3471, 2048)
Shape after PCA: (3471, 680)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



ML + CNN Model Accuracy: 0.9870503597122302

Classification Report:
                  precision    recall  f1-score   support

FloodSatellight       1.00      0.99      0.99       142
    FloodStreet       0.99      0.96      0.97       161
       RoadData       0.98      1.00      0.99       392

       accuracy                           0.99       695
      macro avg       0.99      0.98      0.99       695
   weighted avg       0.99      0.99      0.99       695


Saved all models in /models/
