In [2]:
import cv2
import cvlib as cv
import pandas as pd
from sklearn.cluster import KMeans
import numpy as np
import os

# Replace 'path/to/your/image.jpg' with the actual path to your image
image_path = 'C:/Users/purna/Downloads/classify.jpeg'

# Load the image using cv2
image = cv2.imread(image_path)

# Use cvlib to detect objects in the image with confidence level set to 0.1
bbox, label, conf = cv.detect_common_objects(image, confidence=0.1)

# Find the indices of bounding boxes corresponding to apples
apple_indices = [index for index, obj in enumerate(label) if obj == 'apple']

# Create an empty list to store the BGR values of each apple and their bounding box coordinates
apple_data = []

# Check if there is at least one apple detected (and at least two apples for skipping the first one)
if len(apple_indices) >= 2:
    # Loop through each detected apple (start from index 1 to skip the first apple)
    for index in range(1, len(apple_indices)):
        apple_index = apple_indices[index]
        apple_bbox = bbox[apple_index]

        # Crop the respective apple using the bounding box coordinates with error handling
        x1, y1, x2, y2 = apple_bbox
        x1 = max(x1, 0)
        y1 = max(y1, 0)
        x2 = min(x2, image.shape[1])
        y2 = min(y2, image.shape[0])
        apple_crop = image[y1:y2, x1:x2]

        # Calculate the average color of the apple
        average_color = cv2.mean(apple_crop)[:3]

        # Add the average color and bounding box coordinates to the list
        apple_data.append({
            'B': average_color[0],
            'G': average_color[1],
            'R': average_color[2],
            'x1': x1,
            'y1': y1,
            'x2': x2,
            'y2': y2
        })

    # Create a DataFrame from the list of apple data
    apple_colors_df = pd.DataFrame(apple_data)

    # Perform KMeans clustering on the BGR values of the apples with fixed random state
    # After several testing we found that it is hard to distinguish green apple just based on RGB values alone, hence we just choose 2 clusters here (red and yellow)
    kmeans = KMeans(n_clusters=2, random_state=10)
    apple_colors_df['Cluster'] = kmeans.fit_predict(apple_colors_df[['B', 'G', 'R']])

    # Find the centroid of each cluster
    cluster_centroids = kmeans.cluster_centers_

    # Sort the cluster centroids based on the overall RGB values
    cluster_centroids_sorted = cluster_centroids[cluster_centroids.sum(axis=1).argsort()]

    # Define the conditions and corresponding values for the new column
    conditions = [
        (apple_colors_df['Cluster'] == 1),
        (apple_colors_df['Cluster'] == 0)
    ]

    values = ['YELLOW', 'RED']
    
    # Use numpy.select to create the new column 'Cluster_Label'
    apple_colors_df['Cluster_Label'] = np.select(conditions, values)

    # Sort the DataFrame based on the 'Cluster_Label' column
    apple_colors_df = apple_colors_df.sort_values(by='Cluster_Label')

    # Add a new column 'Apple_Number' to represent the ascending numbers for each apple in each cluster
    apple_colors_df['Apple_Number'] = apple_colors_df.groupby('Cluster_Label').cumcount() + 1

    # Export each apple to JPG files with the cluster label and apple number in the file name
    output_dir = os.getcwd()

    for _, row in apple_colors_df.iterrows():
        cluster_label = row['Cluster_Label']
        apple_number = row['Apple_Number']
        x1, y1, x2, y2 = row['x1'], row['y1'], row['x2'], row['y2']
        apple_crop = image[y1:y2, x1:x2]

        # Write the cropped apple image to a file
        output_file = os.path.join(output_dir, f'{cluster_label.lower()}{apple_number}.jpg')
        cv2.imwrite(output_file, apple_crop)

else:
    print("No apples detected in the image or only one apple detected.")


  super()._check_params_vs_input(X, default_n_init=10)
