# Image Color Analysis with KMeans Clustering
This notebook extracts dominant colors from images using KMeans clustering, making it useful for various projects involving color analysis.
## Instructions
- Update the `folder_path` with the directory containing your images.
- Adjust the `output_file_path` if you want to save the results to a different location.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import cv2
import os
from sklearn.cluster import KMeans


## Define Folder Path
Specify the folder where your images are located and set the output file path for saving the results.

In [None]:
# Define the folder path where images are stored
folder_path = 'path/to/your/image/folder'

## Function to Extract Top Colors
This function extracts the top dominant colors from an image using KMeans clustering.

In [None]:
# Define a function to extract the top dominant colors from an image
def get_top_colors(image_path, num_colors=5):
    """
    Extract the top dominant colors from an image using KMeans clustering.

    Parameters:
    - image_path (str): The path to the image file.
    - num_colors (int): The number of dominant colors to extract.

    Returns:
    - List of tuples containing color in HEX format and its percentage.
    """
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Could not read image {image_path}")
        return []

    # Reshape the image to a list of pixels
    pixels = image.reshape(-1, 3)

    # Use KMeans to find dominant colors
    kmeans = KMeans(n_clusters=num_colors, random_state=42)
    kmeans.fit(pixels)
    colors = kmeans.cluster_centers_
    labels = kmeans.labels_

    # Calculate the percentage of each color
    _, counts = np.unique(labels, return_counts=True)
    total_pixels = pixels.shape[0]
    percentages = (counts / total_pixels) * 100

    # Convert colors to HEX and pair with their percentages
    top_colors = [(f'#{int(color[2]):02x}{int(color[1]):02x}{int(color[0]):02x}', pct)
                  for color, pct in zip(colors, percentages)]

    return top_colors


## Process Images in Folder
Loop through each image in the folder, apply the color extraction function, and store the results.

In [None]:
# Initialize an empty list to store color data for each image
expanded_rows = []

# Loop through each image file in the specified folder
for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):  # Check for valid image extensions
        image_path = os.path.join(folder_path, filename)
        colors = get_top_colors(image_path, num_colors=5)  # Extract top 5 colors
        if colors:  # If colors were successfully extracted
            image_id = os.path.splitext(filename)[0]  # Use filename as ID without extension
            for color, pct in colors:
                expanded_rows.append({
                    'ID': image_id,
                    'Color': color,
                    'Percentage': pct
                })


## Create DataFrame and Save to CSV
Store the color data in a DataFrame and save it as a CSV file.

In [None]:
# Convert the list of dictionaries to a DataFrame
expanded_df = pd.DataFrame(expanded_rows)

# Save the color data to a CSV file
output_file_path = 'path/to/output/Color_Analysis.csv'
expanded_df.to_csv(output_file_path, index=False)

print(f"File saved to {output_file_path}")
