In [None]:
import cv2
import numpy as np
import os
from sklearn.cluster import MiniBatchKMeans  # Faster version of KMeans
from collections import Counter
import matplotlib.pyplot as plt
import json
from PIL import Image

def get_dominant_colors(pil_img, palette_size=16, num_colors=10):
    """Get dominant colors from a PIL image using adaptive palette."""
    # Resize image to speed up processing
    img = pil_img.copy()
    img.thumbnail((100, 100))

    # Reduce colors (uses k-means internally)
    paletted = img.convert('P', palette=Image.ADAPTIVE, colors=palette_size)

    # Find the color that occurs most often
    palette = paletted.getpalette()
    color_counts = sorted(paletted.getcolors(), reverse=True)

    dominant_colors = []
    for i in range(min(num_colors, len(color_counts))):  # Ensure no out-of-bounds access
        palette_index = color_counts[i][1]
        dominant_colors.append(palette[palette_index * 3:palette_index * 3 + 3])

    return dominant_colors

def extract_dominant_colors(image, num_colors=7):
    """Extract dominant colors from an image using MiniBatchKMeans for faster processing."""
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.reshape(-1, 3)
    kmeans = MiniBatchKMeans(n_clusters=num_colors, random_state=42, max_iter=100, batch_size=1000)
    kmeans.fit(image)
    return kmeans.cluster_centers_.astype(int)

def merge_similar_colors_in_counter(color_counter, threshold=30):
    """Merge similar colors in the color counter based on a distance threshold in RGB space."""
    merged_counter = Counter()
    colors = list(color_counter.keys())
    color_array = np.array(colors)
    for color, count in color_counter.items():
        found_similar = False
        dist = np.linalg.norm(color_array - np.array(color), axis=1)
        close_colors = np.where(dist < threshold)[0]
        if close_colors.size > 0:
            merged_color = tuple(color_array[close_colors[0]])
            merged_counter[merged_color] += count
            found_similar = True
        if not found_similar:
            merged_counter[color] = count
    return merged_counter

def process_image(image_path, num_colors=7, use_pil=False):
    """Process a single image to extract dominant colors and count their occurrences."""
    try:
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image: {image_path}")
            return []
        image = cv2.resize(image, (150, 150))  # Resize image to a standard size to speed up processing
        
        if use_pil:
            pil_image = Image.open(image_path)
            dominant_colors = get_dominant_colors(pil_image, num_colors=num_colors)
            return {tuple(color): 1 for color in dominant_colors}
        else:
            dominant_colors = extract_dominant_colors(image, num_colors)
            return {tuple(color): 1 for color in dominant_colors}
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return []

def process_images_in_folder(folder_path, num_colors=7, use_pil=False):
    """Process all images in a folder to extract and cluster dominant colors."""
    color_counter = Counter()
    for file_name in os.listdir(folder_path):
        if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            image_path = os.path.join(folder_path, file_name)
           
            image_colors = process_image(image_path, num_colors, use_pil)
            color_counter.update(image_colors)
    
    merged_counter = merge_similar_colors_in_counter(color_counter, threshold=30)
    return merged_counter

def display_color_summary(colors_with_counts):
    """Display a summary of colors according to their counts."""
    # Total number of pixels (for scaling purposes)
    total_count = sum(count for _, count in colors_with_counts)

    # Sort by count in descending order
    sorted_colors = sorted(colors_with_counts, key=lambda x: x[1], reverse=True)
    
    # Create the color bar with width based on the count
    color_bar = np.zeros((50, 300, 3), dtype='uint8')
    x_offset = 0  # Start position for drawing color bars
    num_colors = len(colors_with_counts)

    # Create the color bar with equal width for each color
    block_width = 300 // num_colors  # Equal width for each color block

    for i, (color, _) in enumerate(colors_with_counts):
        start_x = i * block_width
        end_x = start_x + block_width
        color = np.array(color, dtype='uint8')  # Ensure proper dtype for color
        color_bar[:, start_x:end_x, :] = color

    # Display the color bar
    plt.figure(figsize=(6, 2))
    plt.axis("off")
    plt.imshow(color_bar)
    plt.show()


    #for color, count in sorted_colors:
        # Calculate the width of the color block based on its count
        

        #block_width = int((count / total_count) * 300)  # Scale width proportionally
        #color_bar[:, x_offset:x_offset+block_width, :] = color
        #x_offset += block_width


def process_year_and_month(base_path, num_colors=7, use_pil=False):
    """Process folders grouped by year and month."""
    results = {}
    for year_folder in os.listdir(base_path):
        year_path = os.path.join(base_path, year_folder)
        if os.path.isdir(year_path) and year_folder.startswith("Photos_20"):
            results[year_folder] = {}
            for month_folder in os.listdir(year_path):
                month_path = os.path.join(year_path, month_folder)
                if os.path.isdir(month_path) and month_folder.startswith(("Jan", "Feb", "Mar", "Apr", "May", "Jun", 
                                                                           "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")):
                    print(f"Processing Year: {year_folder}, Month: {month_folder}")
                    month_colors = process_images_in_folder(month_path, num_colors, use_pil)
                    top_colors = month_colors.most_common(10)
                    results[year_folder][month_folder] = [(tuple(color), count) for color, count in top_colors]
                    # Display the top colors for the month
                    display_color_summary(top_colors)
    return results

def convert_numpy_ints(data):
    """Recursively convert numpy int64 and float64 values to native Python int/float."""
    if isinstance(data, dict):
        return {k: convert_numpy_ints(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [convert_numpy_ints(v) for v in data]
    elif isinstance(data, np.integer):  # Handle all numpy integer types
        return int(data)
    elif isinstance(data, np.floating):  # Handle all numpy float types
        return float(data)
    else:
        return data


# Specify the base folder containing year and month folders
base_folder = r"C:\Users\Lenovo\Desktop"

# Process all folders and store the results
color_data = process_year_and_month(base_folder, num_colors=7, use_pil=True)

# Apply the conversion to ensure JSON serializability
color_data = convert_numpy_ints(color_data)

# Save the results to a JSON file
output_file = "color_summary.json"
with open(output_file, "w") as f:
    json.dump(color_data, f, indent=4)

print(f"Color summary saved to {output_file}.")

In [None]:
import cv2
import numpy as np
import os
from sklearn.cluster import MiniBatchKMeans  # Faster version of KMeans
from collections import Counter
import matplotlib.pyplot as plt
import json
from PIL import Image
from concurrent.futures import ThreadPoolExecutor

def get_dominant_colors(pil_img, palette_size=16, num_colors=10):
    """Get dominant colors from a PIL image using adaptive palette."""
    img = pil_img.copy()
    img.thumbnail((100, 100))  # Resize for faster processing
    paletted = img.convert('P', palette=Image.ADAPTIVE, colors=palette_size)
    palette = paletted.getpalette()
    color_counts = sorted(paletted.getcolors(), reverse=True)
    dominant_colors = [tuple(palette[color_counts[i][1] * 3: color_counts[i][1] * 3 + 3])
                       for i in range(min(num_colors, len(color_counts)))]
    return dominant_colors

def extract_dominant_colors(image, num_colors=7):
    """Extract dominant colors using MiniBatchKMeans for faster processing."""
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.reshape(-1, 3)
    kmeans = MiniBatchKMeans(n_clusters=num_colors, random_state=42, max_iter=100, batch_size=1000)
    kmeans.fit(image)
    return kmeans.cluster_centers_.astype(int)

def merge_similar_colors_in_counter(color_counter, threshold=30):
    """Merge similar colors based on distance threshold."""
    merged_counter = Counter()
    colors = list(color_counter.keys())
    color_array = np.array(colors)
    for color, count in color_counter.items():
        dist = np.linalg.norm(color_array - np.array(color), axis=1)
        close_colors = np.where(dist < threshold)[0]
        if close_colors.size > 0:
            merged_color = tuple(color_array[close_colors[0]])
            merged_counter[merged_color] += count
        else:
            merged_counter[color] = count
    return merged_counter

def process_image(image_path, num_colors=7, use_pil=False):
    """Process a single image to extract dominant colors."""
    try:
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image: {image_path}")
            return []
        image = cv2.resize(image, (150, 150))  # Resize once per image
        
        if use_pil:
            pil_image = Image.open(image_path)
            dominant_colors = get_dominant_colors(pil_image, num_colors=num_colors)
            return {tuple(color): 1 for color in dominant_colors}
        else:
            dominant_colors = extract_dominant_colors(image, num_colors)
            return {tuple(color): 1 for color in dominant_colors}
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return []

def process_images_in_folder(folder_path, num_colors=7, use_pil=False):
    """Process all images in a folder and cluster dominant colors."""
    color_counter = Counter()
    with ThreadPoolExecutor(max_workers=4) as executor:
        futures = []
        for file_name in os.listdir(folder_path):
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                image_path = os.path.join(folder_path, file_name)
                futures.append(executor.submit(process_image, image_path, num_colors, use_pil))
        
        for future in futures:
            image_colors = future.result()
            color_counter.update(image_colors)
    
    merged_counter = merge_similar_colors_in_counter(color_counter, threshold=30)
    return merged_counter

def display_color_summary(colors_with_counts):
    """Display a summary of colors."""
    total_count = sum(count for _, count in colors_with_counts)
    sorted_colors = sorted(colors_with_counts, key=lambda x: x[1], reverse=True)
    color_bar = np.zeros((50, 300, 3), dtype='uint8')
    x_offset = 0
    num_colors = len(colors_with_counts)
    block_width = 300 // num_colors

    for i, (color, _) in enumerate(colors_with_counts):
        start_x = i * block_width
        end_x = start_x + block_width
        color = np.array(color, dtype='uint8')
        color_bar[:, start_x:end_x, :] = color

    plt.figure(figsize=(6, 2))
    plt.axis("off")
    plt.imshow(color_bar)
    plt.show()

def process_year_and_month(base_path, num_colors=7, use_pil=False):
    """Process folders by year and month."""
    results = {}
    for year_folder in os.listdir(base_path):
        year_path = os.path.join(base_path, year_folder)
        if os.path.isdir(year_path) and year_folder.startswith("Photos_20"):
            results[year_folder] = {}
            for month_folder in os.listdir(year_path):
                month_path = os.path.join(year_path, month_folder)
                if os.path.isdir(month_path):
                    print(f"Processing Year: {year_folder}, Month: {month_folder}")
                    month_colors = process_images_in_folder(month_path, num_colors, use_pil)
                    top_colors = month_colors.most_common(10)
                    results[year_folder][month_folder] = [(tuple(color), count) for color, count in top_colors]
                    display_color_summary(top_colors)
    return results

def convert_numpy_ints(data):
    """Convert numpy types to native Python types."""
    if isinstance(data, dict):
        return {k: convert_numpy_ints(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [convert_numpy_ints(v) for v in data]
    elif isinstance(data, np.integer):
        return int(data)
    elif isinstance(data, np.floating):
        return float(data)
    else:
        return data

# Base folder containing year and month folders
base_folder = r"C:\Users\Lenovo\Desktop"

# Process and store results
color_data = process_year_and_month(base_folder, num_colors=7, use_pil=True)

# Ensure JSON serializability
color_data = convert_numpy_ints(color_data)

# Save results to a JSON file
output_file = "color_summary.json"
with open(output_file, "w") as f:
    json.dump(color_data, f, indent=4)

print(f"Color summary saved to {output_file}.")
