In [1]:
pip install opencv-contrib-python scikit-learn matplotlib tqdm

Looking in indexes: http://mirrors.aliyun.com/pypi/simple
[0mNote: you may need to restart the kernel to use updated packages.


In [8]:
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.cluster import MiniBatchKMeans
from tqdm import tqdm
import matplotlib.pyplot as plt

DATA_DIR = '/root/Aerial_Landscapes'
NUM_CLASSES = 15
NUM_CLUSTERS = 100
LIMIT = None

def load_image_paths(root_dir, limit=None):
    image_paths = []
    labels = []
    class_names = sorted(os.listdir(root_dir))
    class_to_idx = {cls: idx for idx, cls in enumerate(class_names)}

    for cls in class_names:
        cls_path = os.path.join(root_dir, cls)
        count = 0
        for img_name in os.listdir(cls_path):
            if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(cls_path, img_name))
                labels.append(class_to_idx[cls])
                count += 1
                if limit and count >= limit:
                    break
    return image_paths, labels, class_names

def extract_sift_descriptors(image_paths, labels):
    sift = cv2.SIFT_create()
    descriptors_list = []
    valid_labels = []

    for i, path in enumerate(tqdm(image_paths, desc="Extracting SIFT")):
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is not None:
            descriptors_list.append(descriptors)
            valid_labels.append(labels[i])

    return descriptors_list, valid_labels

def build_vocabulary(descriptor_list, num_clusters):
    all_descriptors = np.vstack(descriptor_list)

    if len(all_descriptors) > 100000:
        np.random.shuffle(all_descriptors)
        all_descriptors = all_descriptors[:100000]

    kmeans = MiniBatchKMeans(n_clusters=num_clusters, batch_size=1000, random_state=42)
    kmeans.fit(all_descriptors)
    return kmeans


def extract_bow_histograms(descriptor_list, kmeans):
    histograms = []
    for descriptors in descriptor_list:
        if descriptors is None:
            histograms.append(np.zeros(kmeans.n_clusters))
            continue
        words = kmeans.predict(descriptors)
        hist, _ = np.histogram(words, bins=np.arange(kmeans.n_clusters + 1))
        hist = hist.astype('float32')
        hist /= (hist.sum() + 1e-7)
        histograms.append(hist)
    return np.array(histograms)

def train_and_evaluate(X, y, class_names):
    print("Histogram shape:", X.shape)
    print("Label count:", len(y))
    print("Unique labels:", np.unique(y))

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
    clf = SVC(kernel='rbf', C=10, gamma=0.1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    np.save(f"bow_histograms_{NUM_CLUSTERS}.npy", histograms)
    np.save(f"bow_labels_{NUM_CLUSTERS}.npy", np.array(labels))
    print("✅ Training complete.\n")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=class_names))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

if __name__ == "__main__":
    image_paths, labels, class_names = load_image_paths(DATA_DIR, limit=LIMIT)
    descriptors_list, labels = extract_sift_descriptors(image_paths, labels)
    kmeans = build_vocabulary(descriptors_list, NUM_CLUSTERS)
    histograms = extract_bow_histograms(descriptors_list, kmeans)
    train_and_evaluate(histograms, labels, class_names)


Extracting SIFT: 100%|██████████| 12000/12000 [02:25<00:00, 82.32it/s]


Histogram shape: (11921, 100)
Label count: 11921
Unique labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
✅ Training complete.

Classification Report:
              precision    recall  f1-score   support

 Agriculture       0.51      0.56      0.53       160
     Airport       0.39      0.41      0.40       160
       Beach       0.62      0.35      0.45       160
        City       0.39      0.69      0.50       160
      Desert       0.49      0.28      0.36       151
      Forest       0.63      0.84      0.72       160
   Grassland       0.55      0.34      0.42       155
     Highway       0.51      0.44      0.47       160
        Lake       0.31      0.10      0.15       159
    Mountain       0.50      0.73      0.59       160
     Parking       0.89      0.78      0.83       160
        Port       0.55      0.46      0.50       160
     Railway       0.49      0.65      0.56       160
 Residential       0.45      0.79      0.58       160
       River       0.29      0.14