In [1]:
import os

import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.svm import SVC
from skimage.feature import hog, local_binary_pattern
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier


In [2]:
# Define the dataset paths
dataset_paths = os.path.join(os.getcwd(), os.path.join(os.path.join("dataset_1"), "dataset_full"))
dataset_paths = {
    "Building": os.path.join(dataset_paths, "Building"),
    "Forest": os.path.join(dataset_paths, "Forest"),
    "Glacier": os.path.join(dataset_paths, "Glacier"),
    "Mountain": os.path.join(dataset_paths, "Mountains"),
    "Sea": os.path.join(dataset_paths, "Sea"),
    "Street": os.path.join(dataset_paths, "Streets")
}

def process_image(img, target_size=(128, 128)):
    try:
        img = cv2.resize(img, target_size)
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        eq_img = cv2.equalizeHist(gray_img)
        return eq_img
    except Exception as err:
        print(f"Error processing image: {err}")
        return None

def load_and_process_images(dataset_paths):
    images = []
    targets = []
    labels_list = list(dataset_paths.keys())

    for label in labels_list:
        label_path = dataset_paths[label]
        label_index = labels_list.index(label)

        if not os.path.isdir(label_path):
            continue

        for filename in os.listdir(label_path):
            file_path = os.path.join(label_path, filename)
            img = cv2.imread(file_path)

            if img is None:
                continue

            processed_img = process_image(img)
            if processed_img is not None:
                images.append(processed_img)
                targets.append(label_index)

    return np.array(images), np.array(targets)


def extract_features(images):
    feature_list = []
    for img in images:
        histogram = cv2.calcHist([img], [0], None, [256], [0, 256]).flatten()

        sobel_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=5).flatten()
        sobel_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=5).flatten()

        hog_features, _ = hog(img, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)

        lbp = local_binary_pattern(img, P=8, R=1, method='uniform')
        lbp_histogram = np.histogram(lbp, bins=np.arange(0, 27), range=(0, 26))[0]

        combined_features = np.hstack((histogram, sobel_x, sobel_y, hog_features, lbp_histogram))
        feature_list.append(combined_features)

    return np.array(feature_list)

In [3]:
# Usage example
images, targets = load_and_process_images(dataset_paths)

In [4]:
# Splitting data
X_train, X_test, y_train, y_test = train_test_split(images, targets, test_size=0.2, random_state=100, stratify=targets)

In [5]:
# Feature extraction
X_train_features = extract_features(X_train)
X_test_features = extract_features(X_test)

In [6]:
# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_features)
X_test = scaler.transform(X_test_features)

# Dimensionality reduction with PCA
pca = PCA(n_components=200)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

# Balancing the data with SMOTE
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

In [7]:
# Predictions and evaluation
svm_model = SVC()
svm_model.fit(X_train_balanced, y_train_balanced)
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))

Accuracy: 0.8226882745471878
              precision    recall  f1-score   support

           0       0.78      0.71      0.74       100
           1       0.95      0.98      0.96       549
           2       0.53      0.60      0.56       100
           3       0.62      0.60      0.61       100
           4       0.65      0.59      0.62       100
           5       0.83      0.76      0.79       100

    accuracy                           0.82      1049
   macro avg       0.73      0.71      0.71      1049
weighted avg       0.82      0.82      0.82      1049



In [8]:

xgb = XGBClassifier()
xgb.fit(X_train_balanced, y_train_balanced)
y_pred = xgb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))

Accuracy: 0.8007626310772163
              precision    recall  f1-score   support

           0       0.70      0.62      0.66       100
           1       0.95      0.97      0.96       549
           2       0.51      0.54      0.52       100
           3       0.60      0.53      0.56       100
           4       0.62      0.71      0.66       100
           5       0.74      0.68      0.71       100

    accuracy                           0.80      1049
   macro avg       0.69      0.67      0.68      1049
weighted avg       0.80      0.80      0.80      1049



In [9]:
rf_model = RandomForestClassifier()
xgb = XGBClassifier()

ensemble_model = StackingClassifier(estimators=[('rf', rf_model), ('xgb', xgb)])
ensemble_model.fit(X_train_balanced, y_train_balanced)

# Predictions and evaluation
y_pred = ensemble_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')

Accuracy: 0.8045757864632984
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.63      0.67       100
           1       0.94      0.97      0.96       549
           2       0.54      0.53      0.54       100
           3       0.60      0.53      0.56       100
           4       0.63      0.71      0.67       100
           5       0.72      0.69      0.70       100

    accuracy                           0.80      1049
   macro avg       0.69      0.68      0.68      1049
weighted avg       0.80      0.80      0.80      1049



In [12]:
import pickle

# Save models and components
model_path = os.path.join(os.getcwd(), "models")
os.makedirs(model_path, exist_ok=True)

model_data = {
    'classifier': svm_model,
    'pca': pca,
    'scaler': scaler,
    'label_encoder': list(dataset_paths.keys())
}

with open(os.path.join(model_path, "models.pkl"), 'wb') as file:
    pickle.dump(model_data, file)