<a href="https://colab.research.google.com/github/islemaiouni/Advanced-Feature-Engineering/blob/main/HR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# === Step 1: Create the dataset folder structure ===
import os
from google.colab import files  # Import files module from google.colab to allow file uploads

# Define the base directory in Colab to store the dataset
base_dir = "/content/dataset/dataset-20250309T110448Z-001/dataset"
# Define paths for training and testing directories for each class
train_green_dir = os.path.join(base_dir, "train", "green")
train_city_dir  = os.path.join(base_dir, "train", "city")
test_green_dir  = os.path.join(base_dir, "test", "green")
test_city_dir   = os.path.join(base_dir, "test", "city")

# Create directories if they do not already exist
os.makedirs(train_green_dir, exist_ok=True)
os.makedirs(train_city_dir, exist_ok=True)
os.makedirs(test_green_dir, exist_ok=True)
os.makedirs(test_city_dir, exist_ok=True)

# Print the folder structure to verify it has been created
print("Folder structure created:")
print("Train/green:", os.listdir(train_green_dir))
print("Train/city :", os.listdir(train_city_dir))
print("Test/green :", os.listdir(test_green_dir))
print("Test/city  :", os.listdir(test_city_dir))

# === Step 2: Upload files to each subfolder ===
print("Please upload files for the train/green folder (e.g., green-001, green-002, …, green-133)")
# Upload files for the train/green folder
uploaded_train_green = files.upload()
for filename in uploaded_train_green.keys():
    # Write each uploaded file into the train/green directory
    with open(os.path.join(train_green_dir, filename), 'wb') as f:
        f.write(uploaded_train_green[filename])
print("Files for train/green uploaded.")

print("Please upload files for the train/city folder (e.g., city-001, …, city-140)")
# Upload files for the train/city folder
uploaded_train_city = files.upload()
for filename in uploaded_train_city.keys():
    with open(os.path.join(train_city_dir, filename), 'wb') as f:
        f.write(uploaded_train_city[filename])
print("Files for train/city uploaded.")

print("Please upload files for the test/green folder (e.g., green-test-001, …, green-test-030)")
# Upload files for the test/green folder
uploaded_test_green = files.upload()
for filename in uploaded_test_green.keys():
    with open(os.path.join(test_green_dir, filename), 'wb') as f:
        f.write(uploaded_test_green[filename])
print("Files for test/green uploaded.")

print("Please upload files for the test/city folder (e.g., city-test-001, …, city-test-030)")
# Upload files for the test/city folder
uploaded_test_city = files.upload()
for filename in uploaded_test_city.keys():
    with open(os.path.join(test_city_dir, filename), 'wb') as f:
        f.write(uploaded_test_city[filename])
print("Files for test/city uploaded.")

# Verify the final folder structure by listing the content of each directory
print("\nContents of train/green folder:", os.listdir(train_green_dir))
print("Contents of train/city folder :", os.listdir(train_city_dir))
print("Contents of test/green folder  :", os.listdir(test_green_dir))
print("Contents of test/city folder   :", os.listdir(test_city_dir))

# === Step 3: Load the dataset, extract LBP features, and train the models ===
import numpy as np
import cv2  # Import OpenCV for image processing
import matplotlib.pyplot as plt
from skimage.feature import local_binary_pattern  # Import LBP function from skimage
from sklearn.model_selection import cross_val_score, StratifiedKFold  # For cross-validation
from sklearn.svm import SVC  # Support Vector Machine classifier
from sklearn.metrics import classification_report, confusion_matrix  # Evaluation metrics
from tensorflow.keras.models import Sequential  # For building the ANN model
from tensorflow.keras.layers import Dense, Input  # Layers for the ANN model
from tensorflow.keras.optimizers import Adam  # Optimizer for training the ANN

# Define paths for training and testing data
TRAIN_PATH = os.path.join(base_dir, "train")
TEST_PATH  = os.path.join(base_dir, "test")

print("\nTraining path:", TRAIN_PATH)
print("Test path    :", TEST_PATH)

# Define the categories; these must match the folder names exactly
CATEGORIES = ["green", "city"]

# Parameters for LBP feature extraction
LBP_RADIUS = 1                   # Radius for LBP calculation
LBP_POINTS = 8 * LBP_RADIUS      # Number of sampling points
N_BINS = 256                     # Each channel will yield 256 bins; final feature vector length = 256*3 = 768

def extract_lbp_histogram(image):
    """
    Extracts the concatenated LBP histogram for the 3 RGB channels of an image.

    Parameters:
        image: Input RGB image.
    Returns:
        numpy array: Normalized LBP histogram vector of length 768.
    """
    histogram = []  # Initialize an empty list to store histograms
    for i in range(3):  # Loop over each channel (R, G, B)
        # Compute the LBP image for the current channel using the 'uniform' method
        lbp = local_binary_pattern(image[:, :, i], P=LBP_POINTS, R=LBP_RADIUS, method='uniform')
        # Calculate the histogram for LBP values with 256 bins, within the range [0, 256)
        hist, _ = np.histogram(lbp.ravel(), bins=N_BINS, range=(0, N_BINS))
        # Normalize the histogram so that the sum equals 1
        hist = hist.astype("float") / (hist.sum() + 1e-6)
        histogram.extend(hist)  # Append the histogram of the current channel to the list
    return np.array(histogram)  # Convert the list to a numpy array and return

def load_dataset(folder):
    """
    Loads images from a given folder (train or test) and extracts their LBP features.

    Parameters:
        folder: The path to the dataset folder (either train or test).
    Returns:
        X: numpy array containing the feature vectors.
        y: numpy array containing the corresponding labels.
    """
    X = []  # List to store feature vectors
    y = []  # List to store labels
    # Iterate over each category and its corresponding label
    for label, category in enumerate(CATEGORIES):
        category_folder = os.path.join(folder, category)  # Path for the current category
        if not os.path.isdir(category_folder):
            raise FileNotFoundError(f"Folder not found: {category_folder}")
        # Loop over each file in the category folder
        for file in os.listdir(category_folder):
            image_path = os.path.join(category_folder, file)  # Construct the full path to the image
            image = cv2.imread(image_path)  # Read the image
            if image is None:
                continue  # Skip invalid files
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert the image from BGR to RGB
            features = extract_lbp_histogram(image)  # Extract LBP features
            X.append(features)  # Append feature vector to X
            y.append(label)   # Append the corresponding label to y
    return np.array(X), np.array(y)  # Convert lists to numpy arrays and return

# Load training data and display the number of samples
print("\nLoading training data from:", TRAIN_PATH)
X_train, y_train = load_dataset(TRAIN_PATH)
print("Number of training samples:", len(y_train))

# Load test data and display the number of samples
print("\nLoading test data from:", TEST_PATH)
X_test, y_test = load_dataset(TEST_PATH)
print("Number of test samples:", len(y_test))

# === Train and evaluate the SVM model ===
# Initialize an SVM classifier with RBF kernel (change to 'sigmoid' if desired)
svm = SVC(kernel='rbf', probability=True)
# Set up stratified 5-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# Perform cross-validation on the training data and compute accuracy scores
scores_svm = cross_val_score(svm, X_train, y_train, cv=kfold, scoring='accuracy')
print("\nAverage SVM accuracy (5-fold): {:.2f}% ± {:.2f}%".format(scores_svm.mean()*100, scores_svm.std()*100))

# Train the SVM model on the entire training set
svm.fit(X_train, y_train)
# Predict labels for the test set using the trained SVM model
svm_predictions = svm.predict(X_test)
# Print the classification report and confusion matrix for the SVM model
print("\nSVM Classification Report:")
print(classification_report(y_test, svm_predictions))
print("SVM Confusion Matrix:")
print(confusion_matrix(y_test, svm_predictions))

# === Build, train, and evaluate the ANN model ===
def build_ann(input_dim):
    """
    Builds an Artificial Neural Network (ANN) with:
      - Input layer of dimension 'input_dim'
      - Hidden layer with 64 neurons (ReLU activation)
      - Hidden layer with 32 neurons (ReLU activation)
      - Output layer with 1 neuron (sigmoid activation for binary classification)

    Parameters:
        input_dim: Number of features in the input.
    Returns:
        model: A compiled Keras model.
    """
    model = Sequential([
        Input(shape=(input_dim,)),        # Input layer
        Dense(64, activation='relu'),       # First hidden layer with 64 neurons
        Dense(32, activation='relu'),       # Second hidden layer with 32 neurons
        Dense(1, activation='sigmoid')      # Output layer for binary classification
    ])
    # Compile the model using Adam optimizer and binary crossentropy loss function
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Build the ANN model using the number of features from the training set
ann = build_ann(X_train.shape[1])
# Train the ANN model with validation on the test set over 50 epochs and batch size of 32
history = ann.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    verbose=1
)

# Predict test labels using the trained ANN model and threshold probabilities at 0.5
ann_predictions = (ann.predict(X_test) > 0.5).astype(int)
# Print the classification report and confusion matrix for the ANN model
print("\nANN Classification Report:")
print(classification_report(y_test, ann_predictions))
print("ANN Confusion Matrix:")
print(confusion_matrix(y_test, ann_predictions))

# === Plot the training curves for the ANN model ===
plt.figure(figsize=(12, 5))
# Plot the training and validation accuracy curves
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Training/Validation Accuracy")

# Plot the training and validation loss curves
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Training/Validation Loss")

plt.tight_layout()  # Adjust subplots to fit in the figure area
plt.show()  # Display the plots
