In [7]:
import os
import cv2
import numpy as np
import joblib
import time
from skimage.feature import hog, graycomatrix, graycoprops
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [9]:
# --- Configuration ---
INPUT_FOLDER = "images"  # Folder to read training images from
MODEL_PATH = "wildlife_cluster_model.pkl" # File to save the new model to

# --- Constants (MUST be identical to prediction script) ---
GRID_W = 100  # 800 / 8
GRID_H = 75   # 600 / 8
TARGET_ASPECT_RATIO = 4.0 / 3.0
LBP_RADIUS = 1
LBP_POINTS = 8 * LBP_RADIUS
# Ensure LBP config matches our manual CLBP function
assert LBP_RADIUS == 1 and LBP_POINTS == 8, "CLBP implementation is specific to R=1, P=8"


# --- Image Preprocessing ---
def process_image_for_training(path: str) -> np.ndarray | None:
    """
    Loads an image and processes it according to the strict 4:3 / 800x600 rules.
    - Enforces 4:3 aspect ratio by center-cropping.
    - Scales down large images.
    - Discards small images.
    - Returns a 3-channel HSV image.
    """
    img = cv2.imread(path)
    if img is None:
        print(f"Warning: Could not read image at {path}")
        return None
    
    h, w, _ = img.shape
    current_aspect_ratio = w / h
    
    # a. Crop to 4:3 aspect ratio if it's not already correct
    if not np.isclose(current_aspect_ratio, TARGET_ASPECT_RATIO):
        if current_aspect_ratio > TARGET_ASPECT_RATIO: # Image is wider than 4:3
            new_w = int(TARGET_ASPECT_RATIO * h)
            x_start = (w - new_w) // 2
            img = img[:, x_start:x_start + new_w]
        else: # Image is taller than 4:3
            new_h = int(w / TARGET_ASPECT_RATIO)
            y_start = (h - new_h) // 2
            img = img[y_start:y_start + new_h, :]
    
    # b. Scale down if larger than 800x600
    h, w, _ = img.shape
    if w > 800 or h > 600:
        img = cv2.resize(img, (800, 600), interpolation=cv2.INTER_AREA)

    # c. Do not scale up if smaller than 800x600 (discard)
    h, w, _ = img.shape
    if w < 800 or h < 600:
        # print(f"Info: Discarding small image: {path} ({w}x{h})")
        return None
        
    # Convert to HSV color space for feature extraction
    return cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# --- CLBP Helper Function (with dtype fix) ---
def compute_clbp_components(image: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Calculates the Completed LBP (CLBP) components: S, M, and C.
    This implementation is for R=1, P=8.
    """
    img_float = image.astype(np.float64)
    padded = np.pad(img_float, (1, 1), 'edge')
    
    center = padded[1:-1, 1:-1]
    neighbors = [
        padded[ :-2,  :-2], padded[ :-2, 1:-1], padded[ :-2, 2:  ],
        padded[1:-1, 2:  ], padded[2:  , 2:  ], padded[2:  , 1:-1],
        padded[2:  ,  :-2], padded[1:-1,  :-2]
    ]
    
    diffs = [n - center for n in neighbors]
    mags = [np.abs(d) for d in diffs]
    
    # --- FIX: Use a larger intermediate dtype (e.g., int64) to prevent overflow ---
    clbp_s = np.zeros_like(center, dtype=np.int64) 
    for i in range(LBP_POINTS):
        clbp_s += ((diffs[i] >= 0) * (2**i))
        
    mean_mag = np.mean([m.ravel() for m in mags])
    
    clbp_m = np.zeros_like(center, dtype=np.int64)
    for i in range(LBP_POINTS):
        clbp_m += ((mags[i] >= mean_mag) * (2**i))

    mean_center = np.mean(center)
    clbp_c = (center >= mean_center).astype(np.uint8)
    
    # --- FIX: Cast back to uint8 on return ---
    return clbp_s.astype(np.uint8), clbp_m.astype(np.uint8), clbp_c

# --- Feature Extraction (using CLBP + HOG) ---
def extract_features_from_cell(cell: np.ndarray) -> np.ndarray:
    """
    Extracts features from a single 100x75 3-channel HSV grid cell.
    - Extracts Color Features (mean/std) from H and S channels.
    - Extracts Texture Features (mean/std, CLBP, GLCM, HOG, Canny) from V channel.
    """
    features = []
    h, s, v = cv2.split(cell)
    
    # 1. Color Features
    features.append(np.mean(h))
    features.append(np.std(h))
    features.append(np.mean(s))
    features.append(np.std(s))

    # 2. Statistical Features (from V)
    features.append(np.mean(v))
    features.append(np.std(v))
    
    v_uint8 = v.astype(np.uint8)

    # 3. Completed LBP (CLBP) Histograms (from V channel)
    clbp_s, clbp_m, clbp_c = compute_clbp_components(v_uint8)
    
    # a. CLBP-S Histogram (Sign) - 256 bins
    hist_s, _ = np.histogram(clbp_s.ravel(), bins=np.arange(0, 257), density=True)
    features.extend(hist_s)
    
    # b. CLBP-M Histogram (Magnitude) - 256 bins
    hist_m, _ = np.histogram(clbp_m.ravel(), bins=np.arange(0, 257), density=True)
    features.extend(hist_m)
    
    # c. CLBP-C Histogram (Center) - 2 bins
    hist_c, _ = np.histogram(clbp_c.ravel(), bins=np.arange(0, 3), density=True)
    features.extend(hist_c)

    # 4. GLCM Texture Features (from V)
    glcm = graycomatrix(v_uint8, 
                        distances=[1], 
                        angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
                        levels=256, symmetric=True, normed=True)
    
    features.extend([
        np.mean(graycoprops(glcm, 'contrast')),
        np.mean(graycoprops(glcm, 'dissimilarity')),
        np.mean(graycoprops(glcm, 'homogeneity')),
        np.mean(graycoprops(glcm, 'energy')),
        np.mean(graycoprops(glcm, 'correlation'))
    ])
    
    # --- 5. NEW: HOG Features (from V channel) ---
    # We use larger cells (16x16) to keep the feature vector size manageable.
    # On a 100x75 cell, this produces a 540-feature vector.
    hog_features = hog(v_uint8, 
                       orientations=9, 
                       pixels_per_cell=(16, 16),
                       cells_per_block=(2, 2), 
                       block_norm='L2-Hys', 
                       feature_vector=True)
    features.extend(hog_features)

    # --- 6. Edge Density Feature (from V channel) ---
    edges = cv2.Canny(v_uint8, 50, 150)
    features.append(np.sum(edges > 0) / v.size)
    
    return np.array(features)

def extract_features_from_image(img: np.ndarray) -> np.ndarray:
    """
    Extracts features from all 64 cells in a single HSV image.
    """
    features_list = []
    for y in range(0, img.shape[0], GRID_H):
        for x in range(0, img.shape[1], GRID_W):
            cell = img[y:y+GRID_H, x:x+GRID_W]
            features_list.append(extract_features_from_cell(cell))
    return np.vstack(features_list)

# --- Main Training Logic ---
if __name__ == "__main__":
    
    if not os.path.exists(INPUT_FOLDER):
        print(f"Error: Input folder not found at '{INPUT_FOLDER}'")
        exit()

    print(f"Starting feature extraction from '{INPUT_FOLDER}'...")
    start_time = time.time()
    
    # 1. Find all image paths
    image_paths = [
        os.path.join(INPUT_FOLDER, f) for f in os.listdir(INPUT_FOLDER)
        if f.lower().endswith(('png', 'jpg', 'jpeg'))
    ]
    
    if not image_paths:
        print(f"Error: No images found in '{INPUT_FOLDER}'.")
        exit()
        
    print(f"Found {len(image_paths)} images.")
    
    # 2. Extract features from all images
    all_features_list = []
    for i, path in enumerate(image_paths):
        print(f"  Processing image {i+1}/{len(image_paths)}: {path}")
        processing_img = process_image_for_training(path)
        
        if processing_img is not None:
            # features shape will be (64, 1066)
            features = extract_features_from_image(processing_img)
            all_features_list.append(features)
        else:
            print(f"    -> Skipping {path} (preprocessing failed).")

    if not all_features_list:
        print("Error: No valid images were processed. Exiting.")
        exit()
        
    # Create the master data array X
    X = np.vstack(all_features_list)
    
    extraction_time = time.time() - start_time
    print(f"\nFeature extraction complete in {extraction_time:.2f}s")
    print(f"Extracted {X.shape[1]} features from {X.shape[0]} total cells.")
    
    # 3. Scale the features
    print("Scaling features...")
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # 4. Find the optimal k for KMeans
    print("Finding optimal k using Silhouette Score...")
    silhouette_scores = {}
    k_range = range(2, 8) 
    
    for k in k_range:
        print(f"  Testing k={k}...")
        kmeans_temp = KMeans(n_clusters=k, random_state=42, n_init=10)
        labels = kmeans_temp.fit_predict(X_scaled)
        
        # Sample for silhouette score if data is large
        if X_scaled.shape[0] > 10000:
            print("    (Calculating score on a sample of 10k points)")
            np.random.seed(42)
            sample_indices = np.random.choice(X_scaled.shape[0], 10000, replace=False)
            score = silhouette_score(X_scaled[sample_indices], labels[sample_indices])
        else:
            score = silhouette_score(X_scaled, labels)
            
        silhouette_scores[k] = score
        print(f"  -> Silhouette Score: {score:.4f}")

    # Get the k with the highest score
    best_k = max(silhouette_scores, key=silhouette_scores.get)
    print(f"\nBest k found: {best_k} (Score: {silhouette_scores[best_k]:.4f})")
    
    # 5. Train the final model
    print(f"Training final model with k={best_k}...")
    final_kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10)
    final_kmeans.fit(X_scaled)
    
    # 6. Save the scaler and the k-means model together
    print(f"Saving model to {MODEL_PATH}...")
    models_to_save = {
        'scaler': scaler,
        'kmeans': final_kmeans
    }
    joblib.dump(models_to_save, MODEL_PATH)
    
    print("\n--- Training Complete ---")
    print(f"Model (scaler + kmeans) saved successfully to '{MODEL_PATH}'.")
    print("You can now run the prediction script.")

Starting feature extraction from 'images'...
Found 465 images.
  Processing image 1/465: images/RIMG0051.JPG
  Processing image 2/465: images/CIMG0496(1).JPG
  Processing image 3/465: images/CIMG0569.JPG
  Processing image 4/465: images/CIMG0233.JPG
    -> Skipping images/CIMG0233.JPG (preprocessing failed).
  Processing image 5/465: images/CIMG0555.JPG
  Processing image 6/465: images/RIMG0079.JPG
  Processing image 7/465: images/CIMG0757.JPG
  Processing image 8/465: images/CIMG0067(2).JPG
  Processing image 9/465: images/IMG_20180726_154742802.jpg
  Processing image 10/465: images/CIMG0958.JPG
  Processing image 11/465: images/CIMG0179.JPG
  Processing image 12/465: images/IMG_20250709_164054571_HDR~2.jpg
  Processing image 13/465: images/CIMG0162(1).JPG
  Processing image 14/465: images/IMG_3858.JPG
    -> Skipping images/IMG_3858.JPG (preprocessing failed).
  Processing image 15/465: images/DSC02847.JPG
  Processing image 16/465: images/CIMG0186.JPG
  Processing image 17/465: imag

  ret = a @ b
  ret = a @ b
  ret = a @ b
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight


    (Calculating score on a sample of 10k points)


  ret = a @ b
  ret = a @ b
  ret = a @ b


  -> Silhouette Score: 0.1044
  Testing k=3...


  ret = a @ b
  ret = a @ b
  ret = a @ b
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight


    (Calculating score on a sample of 10k points)


  ret = a @ b
  ret = a @ b
  ret = a @ b


  -> Silhouette Score: 0.0913
  Testing k=4...


  ret = a @ b
  ret = a @ b
  ret = a @ b
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight


    (Calculating score on a sample of 10k points)


  ret = a @ b
  ret = a @ b
  ret = a @ b


  -> Silhouette Score: 0.0671
  Testing k=5...


  ret = a @ b
  ret = a @ b
  ret = a @ b
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight


    (Calculating score on a sample of 10k points)


  ret = a @ b
  ret = a @ b
  ret = a @ b


  -> Silhouette Score: 0.0631
  Testing k=6...


  ret = a @ b
  ret = a @ b
  ret = a @ b
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight


    (Calculating score on a sample of 10k points)


  ret = a @ b
  ret = a @ b
  ret = a @ b


  -> Silhouette Score: 0.0642
  Testing k=7...


  ret = a @ b
  ret = a @ b
  ret = a @ b
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight


    (Calculating score on a sample of 10k points)


  ret = a @ b
  ret = a @ b
  ret = a @ b


  -> Silhouette Score: 0.0638

Best k found: 2 (Score: 0.1044)
Training final model with k=2...


  ret = a @ b
  ret = a @ b
  ret = a @ b
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight
  current_pot = closest_dist_sq @ sample_weight


Saving model to wildlife_cluster_model.pkl...

--- Training Complete ---
Model (scaler + kmeans) saved successfully to 'wildlife_cluster_model.pkl'.
You can now run the prediction script.


In [10]:
# --- Configuration ---
MODEL_PATH = "wildlife_cluster_model.pkl"
INPUT_FOLDER = "images" # Folder to read images from
OUTPUT_FOLDER = "cluster_visualizations_3" # Folder to save visualizations to

# --- Constants (MUST be identical to training script) ---
GRID_W = 100  # 800 / 8
GRID_H = 75   # 600 / 8
TARGET_ASPECT_RATIO = 4.0 / 3.0
LBP_RADIUS = 1
LBP_POINTS = 8 * LBP_RADIUS
# Ensure LBP config matches our manual CLBP function
assert LBP_RADIUS == 1 and LBP_POINTS == 8, "CLBP implementation is specific to R=1, P=8"


# --- Processing Functions (Must be identical to training script) ---
def process_image_for_prediction(path: str):
    """
    Loads an image for prediction.
    Returns:
        - display_img (np.ndarray): 800x600 BGR image for drawing on.
        - processing_img (np.ndarray): 800x600 HSV image for feature extraction.
        - or (None, None) if processing fails.
    """
    img = cv2.imread(path)
    if img is None:
        print(f"Warning: Could not read image at {path}")
        return None, None
    
    h, w, _ = img.shape
    current_aspect_ratio = w / h
    
    # a. Crop to 4:3 aspect ratio
    if not np.isclose(current_aspect_ratio, TARGET_ASPECT_RATIO):
        if current_aspect_ratio > TARGET_ASPECT_RATIO:
            new_w = int(TARGET_ASPECT_RATIO * h)
            x_start = (w - new_w) // 2
            img = img[:, x_start:x_start + new_w]
        else:
            new_h = int(w / TARGET_ASPECT_RATIO)
            y_start = (h - new_h) // 2
            img = img[y_start:y_start + new_h, :]
    
    # --- START OF MODIFIED SECTION ---
    # This logic is now identical to the training script
    
    # b. Scale down if larger than 800x600
    h, w, _ = img.shape
    if w > 800 or h > 600:
        img = cv2.resize(img, (800, 600), interpolation=cv2.INTER_AREA)

    # c. Do not scale up if smaller than 800x600 (discard)
    h, w, _ = img.shape
    if w < 800 or h < 600:
        print(f" -> Skipping {path} (Image is < 800x600, same as training)")
        return None, None
    
    # d. Create both BGR and HSV versions
    display_img = img.copy()
    processing_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # Fixed typo
    
    # --- END OF MODIFIED SECTION ---
    
    return display_img, processing_img

# --- CLBP Helper Function (Identical to training) ---
def compute_clbp_components(image: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Calculates the Completed LBP (CLBP) components: S, M, and C.
    This implementation is for R=1, P=8.
    """
    img_float = image.astype(np.float64)
    padded = np.pad(img_float, (1, 1), 'edge')
    
    center = padded[1:-1, 1:-1]
    neighbors = [
        padded[ :-2,  :-2], padded[ :-2, 1:-1], padded[ :-2, 2:  ],
        padded[1:-1, 2:  ], padded[2:  , 2:  ], padded[2:  , 1:-1],
        padded[2:  ,  :-2], padded[1:-1,  :-2]
    ]
    
    diffs = [n - center for n in neighbors]
    mags = [np.abs(d) for d in diffs]
    
    clbp_s = np.zeros_like(center, dtype=np.int64) 
    for i in range(LBP_POINTS):
        clbp_s += ((diffs[i] >= 0) * (2**i))
        
    mean_mag = np.mean([m.ravel() for m in mags])
    
    clbp_m = np.zeros_like(center, dtype=np.int64)
    for i in range(LBP_POINTS):
        clbp_m += ((mags[i] >= mean_mag) * (2**i))

    mean_center = np.mean(center)
    clbp_c = (center >= mean_center).astype(np.uint8)
    
    return clbp_s.astype(np.uint8), clbp_m.astype(np.uint8), clbp_c


# --- Feature Extraction (MODIFIED to include HOG) ---
def extract_features_from_cell(cell: np.ndarray) -> np.ndarray:
    """
    Extracts features from a single 100x75 3-channel HSV grid cell.
    (Identical to training script)
    """
    features = []
    h, s, v = cv2.split(cell)
    
    # 1. Color Features
    features.append(np.mean(h))
    features.append(np.std(h))
    features.append(np.mean(s))
    features.append(np.std(s))
    
    # 2. Statistical Features (from V)
    features.append(np.mean(v))
    features.append(np.std(v))
    
    v_uint8 = v.astype(np.uint8)

    # 3. Completed LBP (CLBP) Histograms (from V channel)
    clbp_s, clbp_m, clbp_c = compute_clbp_components(v_uint8)
    
    hist_s, _ = np.histogram(clbp_s.ravel(), bins=np.arange(0, 257), density=True)
    features.extend(hist_s)
    
    hist_m, _ = np.histogram(clbp_m.ravel(), bins=np.arange(0, 257), density=True)
    features.extend(hist_m)
    
    hist_c, _ = np.histogram(clbp_c.ravel(), bins=np.arange(0, 3), density=True)
    features.extend(hist_c)
    
    # 4. GLCM Texture Features (from V)
    glcm = graycomatrix(v_uint8, distances=[1], 
                        angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
                        levels=256, symmetric=True, normed=True)
    
    features.extend([
        np.mean(graycoprops(glcm, 'contrast')),
        np.mean(graycoprops(glcm, 'dissimilarity')),
        np.mean(graycoprops(glcm, 'homogeneity')),
        np.mean(graycoprops(glcm, 'energy')),
        np.mean(graycoprops(glcm, 'correlation'))
    ])
    
    # --- 5. HOG Features (from V channel) ---
    # This section is added to match the training script
    hog_features = hog(v_uint8, 
                       orientations=9, 
                       pixels_per_cell=(16, 16),
                       cells_per_block=(2, 2), 
                       block_norm='L2-Hys', 
                       feature_vector=True)
    features.extend(hog_features)

    # --- 6. Canny Edge (from V) ---
    edges = cv2.Canny(v_uint8, 50, 150)
    features.append(np.sum(edges > 0) / v.size)
    
    return np.array(features)

def extract_features_from_image(img: np.ndarray) -> np.ndarray:
    """
    Extracts features from all 64 cells in a single HSV image.
    """
    features_list = []
    for y in range(0, img.shape[0], GRID_H):
        for x in range(0, img.shape[1], GRID_W):
            cell = img[y:y+GRID_H, x:x+GRID_W]
            features_list.append(extract_features_from_cell(cell))
    return np.vstack(features_list)

# --- Main Visualization Logic (Unchanged) ---
if __name__ == "__main__":
    
    # 1. Check if model and image folder exist
    if not os.path.exists(MODEL_PATH):
        print(f"Error: Model file not found at '{MODEL_PATH}'")
        print("Please run the training script first.")
        exit()
        
    if not os.path.exists(INPUT_FOLDER):
        print(f"Error: Input folder not found at '{INPUT_FOLDER}'")
        exit()

    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    print(f"Saving visualizations to '{OUTPUT_FOLDER}'")

    print("Loading models...")
    # 2. Load the trained scaler and kmeans model
    models = joblib.load(MODEL_PATH)
    scaler = models['scaler']
    kmeans = models['kmeans']
    k = kmeans.n_clusters
    print(f"Successfully loaded model with k={k} clusters.")

    # 3. Generate stable, random colors for each cluster
    np.random.seed(42) 
    cluster_colors = np.random.randint(0, 256, (k, 3), dtype=np.uint8)
    cluster_colors_list = [tuple(map(int, color)) for color in cluster_colors]
    
    # 4. Find all images in the input folder
    image_paths = [
        os.path.join(INPUT_FOLDER, f) for f in os.listdir(INPUT_FOLDER) 
        if f.lower().endswith(('png', 'jpg', 'jpeg'))
    ]
    
    if not image_paths:
        print(f"Error: No images found in '{INPUT_FOLDER}'.")
        exit()
        
    print(f"Found {len(image_paths)} images to process...")

    # 5. Loop over each image path and process it
    for image_path in image_paths:
        print(f"Processing: {image_path}")
        
        # a. Process the test image
        # This will now return (None, None) for small images
        display_image, processing_image = process_image_for_prediction(image_path)
        
        # This 'if' statement now correctly skips the bad images
        if display_image is None:
            continue

        # b. Extract features and predict clusters
        features = extract_features_from_image(processing_image)
        scaled_features = scaler.transform(features)
        labels = kmeans.predict(scaled_features) 

        # c. Create the visualization
        overlay = display_image.copy()
        output_image = display_image.copy()
        
        i = 0 
        for y in range(0, display_image.shape[0], GRID_H):
            for x in range(0, display_image.shape[1], GRID_W):
                cluster_id = labels[i]
                color = cluster_colors_list[cluster_id]
                
                cv2.rectangle(overlay, (x, y), (x + GRID_W, y + GRID_H), color, -1)
                cv2.rectangle(output_image, (x, y), (x + GRID_W, y + GRID_H), (0, 0, 0), 1)
                
                text = str(cluster_id)
                text_pos = (x + 10, y + 40)
                cv2.putText(output_image, text, text_pos, 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3, cv2.LINE_AA)
                cv2.putText(output_image, text, text_pos, 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                
                i += 1

        # d. Blend the overlay with the output image
        alpha = 0.6  
        cv2.addWeighted(overlay, alpha, output_image, 1 - alpha, 0, output_image)

        # e. Save the final image
        base_filename = os.path.basename(image_path)
        name, ext = os.path.splitext(base_filename)
        output_path = os.path.join(OUTPUT_FOLDER, f"{name}_clustered{ext}")
        
        cv2.imwrite(output_path, output_image)
    
    print("\n--- Done ---")
    print(f"All visualizations saved to: {OUTPUT_FOLDER}")

Saving visualizations to 'cluster_visualizations_3'
Loading models...
Successfully loaded model with k=2 clusters.
Found 465 images to process...
Processing: images/RIMG0051.JPG
Processing: images/CIMG0496(1).JPG
Processing: images/CIMG0569.JPG
Processing: images/CIMG0233.JPG
 -> Skipping images/CIMG0233.JPG (Image is < 800x600, same as training)
Processing: images/CIMG0555.JPG
Processing: images/RIMG0079.JPG
Processing: images/CIMG0757.JPG
Processing: images/CIMG0067(2).JPG
Processing: images/IMG_20180726_154742802.jpg
Processing: images/CIMG0958.JPG
Processing: images/CIMG0179.JPG
Processing: images/IMG_20250709_164054571_HDR~2.jpg
Processing: images/CIMG0162(1).JPG
Processing: images/IMG_3858.JPG
 -> Skipping images/IMG_3858.JPG (Image is < 800x600, same as training)
Processing: images/DSC02847.JPG
Processing: images/CIMG0186.JPG
Processing: images/CIMG0192.JPG
Processing: images/p12.jpg
Processing: images/IMG_3332.JPG
 -> Skipping images/IMG_3332.JPG (Image is < 800x600, same as tr