## Third Approach
- Extract Features from PCA, Extract Features from histogram_equalization, grayscale_transformation, edge_detection, hough_transform
- Extract Colour Intensity
- Merge them and Again pass it to pca
- Train the model from the PCA Features then do the prediction

In [2]:
import os
import cv2
import numpy as np

In [3]:
# Define paths to datasets
dataset_paths = os.getcwd() + "/dataset_1/dataset_full"
dataset_paths = {
    "Building": os.path.join(dataset_paths, "Building"),
    "Forest": os.path.join(dataset_paths, "Forest"),
    "Glacier": os.path.join(dataset_paths, "Glacier"),
    "Mountain": os.path.join(dataset_paths, "Mountains"),
    "Sea": os.path.join(dataset_paths, "Sea"),
    "Street": os.path.join(dataset_paths, "Streets")
}

def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            img = cv2.resize(img, (128, 128))  # Resize image to a fixed size
            images.append(img)
    return images

def extract_color_intensity(image):
    # Calculate the mean color intensity for each channel (R, G, B)
    mean_intensity = image.mean(axis=(0, 1))
    return mean_intensity

In [None]:
# Load images from all categories
data = []
labels = []
color_intensity_features = []

for label, folder in dataset_paths.items():
    images = load_images_from_folder(folder)
    data.extend(images)
    labels.extend([label] * len(images))
    color_intensity_features.extend([extract_color_intensity(img) for img in images])

In [None]:
data = np.array(data)
labels = np.array(labels)
color_intensity_features = np.array(color_intensity_features)

In [None]:
# Flatten the images to create feature vectors
data_flattened = data.reshape((data.shape[0], -1))

In [None]:
from sklearn.decomposition import PCA

# Apply PCA to reduce dimensionality
pca = PCA(n_components=7)  # Adjust the number of components based on your needs
data_reduced = pca.fit_transform(data_flattened)


In [None]:
import os
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [None]:

# Feature extraction functions
def histogram_equalization(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized = cv2.equalizeHist(gray)
    return equalized

def grayscale_transformation(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

def image_smoothing(image):
    return cv2.GaussianBlur(image, (5, 5), 0)

def edge_detection(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
    canny = cv2.Canny(gray, 100, 200)
    return sobelx, canny

def hough_transform(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLines(edges, 1, np.pi/180, 200)
    if lines is not None:
        lines = lines.flatten()
    else:
        lines = np.zeros((1,))
    return lines

def sift_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    if descriptors is not None:
        descriptors = descriptors.flatten()
    else:
        descriptors = np.zeros((128,))
    return descriptors

def extract_features(image):
    features = []
    equalized = histogram_equalization(image)
    features.append(equalized.flatten())
    
    gray = grayscale_transformation(image)
    features.append(gray.flatten())
    
    smooth = image_smoothing(image)
    features.append(smooth.flatten())
    
    sobelx, canny = edge_detection(image)
    features.append(sobelx.flatten())
    features.append(canny.flatten())
    
    lines = hough_transform(image)
    features.append(lines.flatten())
    
    descriptors = sift_features(image)
    features.append(descriptors.flatten())
    
    # Pad or truncate each feature vector to a fixed length (e.g., 2000)
    max_length = 2000
    padded_features = []
    for feature in features:
        if feature.shape[0] > max_length:
            feature = feature[:max_length]
        else:
            feature = np.pad(feature, (0, max_length - feature.shape[0]), 'constant')
        padded_features.append(feature)
    
    return np.concatenate(padded_features)

# apply pca function
def apply_pca(features, n_components):
    pca = PCA(n_components=n_components)
    return pca.fit_transform(features)


In [None]:
# Load dataset
images = []
labels = []
label_map = {0: "Building", 1: "Forest", 2: "Glacier", 3: "Mountain", 4: "Sea", 5: "Street"}

for label, class_name in enumerate(label_map.values()):
    class_dir = dataset_paths[class_name]
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        image = cv2.imread(file_path)
        if image is not None:
            images.append(image)
            labels.append(label)

In [None]:
# Extract features for all images
features = np.array([extract_features(image) for image in images])

In [None]:
data_reduced.shape

(5245, 7)

In [None]:
features.shape

(5245, 14000)

In [None]:
combined_features = np.hstack((data_reduced, features))

In [None]:
combined_pca = apply_pca(combined_features, n_components=15)

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [None]:
# Train a classification model
model = SVC()
model.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_map.values()))

Accuracy: 0.6034318398474738
Classification Report:
              precision    recall  f1-score   support

    Building       0.50      0.06      0.11       112
      Forest       0.77      0.95      0.85       539
     Glacier       0.20      0.04      0.07        94
    Mountain       0.34      0.61      0.43       107
         Sea       0.24      0.40      0.30        91
      Street       1.00      0.07      0.12       106

    accuracy                           0.60      1049
   macro avg       0.51      0.35      0.32      1049
weighted avg       0.62      0.60      0.54      1049

