# Image classifier - complete pipeline

## Approach:

`find relevant image characteristics` $\rightarrow$ `create feature vector creation pipeline` $\rightarrow$ `iterate through dataset` 

In [5]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity
import os

# Loading and displaying images



In [None]:
class ImageProcessor:
    def __init__(self, directory):
        self.directory = directory
        self.images = []
        self.downscaled_images = []
        
    def load_images(self):
        self.images = []
        for filename in os.listdir(self.directory):
            filepath = os.path.join(self.directory, filename)
            if os.path.isfile(filepath):
                img = cv2.imread(filepath)
                if img is not None:
                    self.images.append((filename, img))
    
    def downscale_images(self, factor=0.01):
        self.downscaled_images = []
        for filename, img in self.images:
            height, width = img.shape[:2]
            new_size = (int(width * factor), int(height * factor))
            downscaled_img = cv2.resize(img, new_size, interpolation=cv2.INTER_AREA)
            self.downscaled_images.append((filename, downscaled_img))

class ImageVisualizer:
    @staticmethod
    def display_image(images, img_id=None):
        if img_id is None:
            img_id = random.randint(0, len(images)-1)
        filename, img = images[img_id]
        plt.figure()
        plt.title(filename)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()
    
    @staticmethod
    def show_similar_images(original_images, similar_images_data, title="", show_plot=False, save_plot=False):
        image_dict = {filename: img for filename, img in original_images}
        fig, axes = plt.subplots(1, 5, figsize=(20, 5))
        if len(similar_images_data) < 5:
            axes = axes.flat[:len(similar_images_data)]
        
        for ax, (filename, _, _, similarity) in zip(axes, similar_images_data[:5]):
            img = image_dict[filename]
            ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))s
            ax.set_title(f"{filename}\nSimilarity: {similarity:.2f}")
            ax.axis('off')
        fig.suptitle(title)
        plt.tight_layout()
        if save_plot: plt.savefig(f"{title.replace(' ', '_')}_comparison.png")
        if show_plot: plt.show()


# Image Characteristics for feature vector

In [None]:
class FeatureExtractor:
    """
    A modular, object-oriented feature extraction class for image analysis.

    This class allows dynamic and extensible extraction of a wide variety of image features
    from a given image. It supports selective feature computation, meaning you can
    request only the features you need, and the resulting feature vector will adapt accordingly,
    while maintaining a consistent ordering.

    Attributes:
    -----------
    filename : str
        The name or path of the image file (used for identification/logging purposes).
    image : np.ndarray
        The image data as a NumPy array (BGR format, typically from cv2.imread).
    feature_functions : dict
        A mapping of feature names to their corresponding extraction methods.

    Methods:
    --------
    extract_features(features: list[str]) -> np.ndarray
        Extracts the selected features from the image and returns a flattened NumPy array
        of shape (num_features, ). Each feature can contribute a scalar or a vector, and
        all are concatenated in the order requested.

    Design Rationale:
    -----------------
    - **Flexibility**: Users can specify exactly which features to extract by passing a list of feature names.
    - **Extensibility**: New features can be added simply by writing a new method and registering it
      in the `feature_functions` dictionary. This avoids modifying core logic and encourages modular design.
    - **Consistency**: Regardless of the number or type of features requested, the output is always
      a flat NumPy array, enabling compatibility with machine learning pipelines or downstream analysis.
    - **Encapsulation**: Image handling and feature logic are neatly encapsulated within the class.

    Example Usage:
    --------------
    >>> import cv2
    >>> img = cv2.imread("example.jpg")
    >>> extractor = FeatureExtractor("example.jpg", img)
    >>> features = extractor.extract_features(["mean_intensity", "edge_density", "color_histogram"])
    >>> print(features.shape)  # Output: (num_features, )

    Adding a New Feature:
    ---------------------
    1. Define a new method following the `_feature_name(self)` naming pattern.
       The method should return a scalar or 1D/2D array-like output.
       
       Example:
       >>> def _texture_entropy(self):
       >>>     from skimage.measure import shannon_entropy
       >>>     return shannon_entropy(self.image)

    2. Register the new method in `self.feature_functions` inside `__init__`:
       >>> self.feature_functions["texture_entropy"] = self._texture_entropy

    3. Now, you can request "texture_entropy" as part of your feature list.
    """

    def __init__(self, image_data):
        self.filename, self.image = image_data # contains filename, image
        self.feature_functions = {
            "extract_dominant_colors": self._extract_dominant_colors_kmeans,
            "extract_dominant_colors_fingerpring": self._extract_dominant_colors_fingerprint,
            "mean_intensity": self._mean_intensity,
            "edge_density": self._edge_density,
            "color_histogram": self._color_histogram,
        }
        pass

    def generate_feature_vector(self, features: list[str]) -> np.ndarray:
        extracted = []

        for feature_name in features:
            if feature_name not in self.feature_functions:
                raise ValueError(f"Feature '{feature_name}' not implemented.")
            
            feature_value = self.feature_functions[feature_name]()
            feature_value = np.ravel(feature_value)  # Ensure flat output
            extracted.append(feature_value)

        return np.concatenate(extracted)

    # ===== feature extraction methods =====
  
    def _mean_intensity(self):
        return np.mean(self.image)

    def _edge_density(self):
        edges = cv2.Canny(self.image, 100, 200)
        return np.sum(edges > 0) / edges.size

    def _color_histogram(self, bins=8):
        chans = cv2.split(self.image)
        features = []
        for chan in chans:
            hist = cv2.calcHist([chan], [0], None, [bins], [0, 256])
            hist = cv2.normalize(hist, hist).flatten()
            features.extend(hist)
        return np.array(features)
    
    def _extract_dominant_colors_kmeans(self):
        pass

    def _extract_dominant_colors_fingerprint(self):
        pass

    def _fft_fingerprinting(self):
        pass
   
