<a href="https://colab.research.google.com/github/eglantinc/image-classification/blob/main/feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import csv
import os

import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input as preprocess_vgg
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input as preprocess_inception
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input as preprocess_resnet
from pathlib import Path
import kagglehub



In [5]:
def choose_model(chosen_model):
    """
    Chooses the pre-trained Keras model based on the provided name.

    Args:
        model_name (str): Name of the model to use ('vgg16', 'alexnet', 'inceptionv3').

    Returns:
        tuple: Keras model, target image size, and preprocessing function.
    Raises:
        ValueError: If the model name is not supported.
    """
    if chosen_model.lower() == "alexnet":
        print("Using AlexNet model...")
        model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
        target_size = (224, 224)
        preprocess_fn = preprocess_resnet
    elif chosen_model.lower() == "vgg16":
        print("Using VGG16 model...")
        model = VGG16(weights='imagenet', include_top=False, pooling='avg')
        target_size = (224, 224)
        preprocess_fn = preprocess_vgg
    elif chosen_model.lower() == "inceptionv3":
        print("Using InceptionV3 model...")
        model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
        target_size = (299, 299)
        preprocess_fn = preprocess_inception
    else:
        raise ValueError("Unsupported model. Choose from 'vgg16', 'alexnet', or 'inceptionv3'.")
    return model, target_size, preprocess_fn

In [6]:
def extract_features(image_path, target_size, model, preprocess_fn):
    """
    Charges the image from 'image_path', resizes it to 'target_size',
    converts it to a NumPy array, applies preprocessing, and extracts
    the feature vector using the provided model.

    Args:
        image_path (str): Path to the image.
        target_size(tuple) : Target size (height, width) to resize the image.
        model (keras.Model): Pre-trained model to extract features.
        preprocess_fn (function): Preprocessing function specific to the model.
    Returns:
        numpy.ndarray: Extracted feature vector (flattened to 1D).
    """
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_fn(img_array)
    features = model.predict(img_array)
    return features.flatten()

In [7]:
def create_vector(image_path, target_size, model, preprocess_fn):
    """
    Creates a feature vector from an image using the specified model.

    Args:
        image_path (str): Path to the image.
        target_size (tuple): Target size (height, width) to resize the image.
        model (keras.Model): Pre-trained model to extract features.
        preprocess_fn (function): Preprocessing function specific to the model.

    Returns:
        list: feature vector with the label (folder name) appended at the end.
    """


    features = extract_features(image_path, target_size, model, preprocess_fn)
    return features.tolist() + [Path(image_path).parts[-2]]

In [16]:
def main():
    all_models = [ "alexnet", "inceptionv3", "vgg16"]

    # Create the Dataset directory if it doesn't exist
    dataset_dir = "Dataset"
    os.makedirs(dataset_dir, exist_ok=True)


    for model_name in all_models:
        print(f"Processing with model: {model_name}")
        model, target_size, preprocess_fn = choose_model(model_name)
        print("-" * 1000)
        # Download latest version
        parent_folder = kagglehub.dataset_download("amerzishminha/forest-fire-smoke-and-non-fire-image-dataset")

        subfolder_name = "train"
        subfolder_path = next(Path(parent_folder).rglob(subfolder_name))
        print(subfolder_path)

        wildfire_path = [subfolder_path / "fire", subfolder_path / "non fire", subfolder_path / "Smoke"]

        features = []
        for folder in wildfire_path:
            number_of_images = 0
            print("Processing folder:", folder)
            for image_file in folder.iterdir():
                number_of_images += 1
                print(f"Processing of image: {image_file.name}")
                features_vector = create_vector(str(image_file), target_size, model, preprocess_fn)
                print(f"Feature vector for {image_file.name}: {features_vector[:5]} ... {features_vector[-5:]}")
                features.append(features_vector)
                print("-" * 1000)

                csv_file_path = f"{dataset_dir}/{model_name}.csv"
                print(f"Writing features to {csv_file_path}...")
                with open(csv_file_path, mode='w', newline='') as csv_file:
                    writer = csv.writer(csv_file)
                    # Create header based on feature length minus label column and add 'label' at the end
                    header = [f"feature_{i + 1}" for i in range(len(features[0]) - 1)] + ["label"]
                    writer.writerow(header)
                    writer.writerows(features)

                print(f"Features written to {csv_file_path}")

                # Taking just the first 200 images of the folder to create the csv files
                if number_of_images >= 200:
                  print("Reached the limit of 200 images. Exiting the loop.")
                  break


In [None]:
main()