Dominant Color Extraction and Visualization Codes

I explained each code blocks functionality in comments.

The script to extract each years most dominant colors.

In [None]:
import cv2
import numpy as np
import os
from sklearn.cluster import KMeans
from collections import Counter
import matplotlib.pyplot as plt
import json
from concurrent.futures import ThreadPoolExecutor
import csv

os.environ["LOKY_MAX_CPU_COUNT"] = "4"

#Extract dominant colors of an image by using KMeans clustering
def extract_dominant_colors(image, num_colors=7):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.reshape(-1, 3) #Reshape the image for clustering
    kmeans = KMeans(n_clusters=num_colors, random_state=30, max_iter=500)
    kmeans.fit(image)
    return kmeans.cluster_centers_.astype(int)


#Merge similar colors in a counter by setting a threshold distance between their RGB values
def merge_similar_colors_in_counter(color_counter, threshold=30):
    merged_counter = Counter()
    colors = list(color_counter.keys())
    color_array = np.array(colors)
    #Iterate over colors
    for color, count in color_counter.items():
        color = tuple(int(c) for c in color)
        dist = np.linalg.norm(color_array - np.array(color), axis=1) #Find the distance between colors
        close_colors = np.where(dist < threshold)[0]
        #Merge the colors if their distance is less than threshold 
        if close_colors.size > 0:
            merged_color = tuple(color_array[close_colors[0]])
            merged_color = tuple(int(c) for c in merged_color) 
            merged_counter[merged_color] += int(count)
        else:
            merged_counter[color] = int(count)
    return merged_counter

#Process each image after resizing
def process_image(image_path, num_colors=7):
    try:
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image: {image_path}")
            return []
        image = cv2.resize(image, (150, 150)) #Resize the image for fater processing
        dominant_colors = extract_dominant_colors(image, num_colors) 
        return {tuple(color): 1 for color in dominant_colors}
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return []


#Process all images in a folder by using threads for speed and merge similar dominant colors
def process_images_in_folder(folder_path, num_colors=7):
    color_counter = Counter()
    with ThreadPoolExecutor(max_workers=4) as executor:
        futures = []
        for file_name in os.listdir(folder_path):
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')): #Find image files
                image_path = os.path.join(folder_path, file_name)
                futures.append(executor.submit(process_image, image_path, num_colors)) #Process each image
        
        for future in futures:
            image_colors = future.result()
            color_counter.update(image_colors) #Update the color counter
    
    merged_counter = merge_similar_colors_in_counter(color_counter, threshold=45) #Merge the colors if they are similar
    return merged_counter

#Display color summary for checking results accuracy
def display_color_summary(colors_with_counts):
    color_bar = np.zeros((50, 300, 3), dtype='uint8')
    num_colors = len(colors_with_counts)
    block_width = 300 // num_colors

    for i, (color, _) in enumerate(colors_with_counts):
        start_x = i * block_width
        end_x = start_x + block_width
        color = np.array(color, dtype='uint8')
        color_bar[:, start_x:end_x, :] = color

    plt.figure(figsize=(6, 2))
    plt.axis("off")
    plt.imshow(color_bar)
    plt.show()

#Save flattened color data to a CSV file for each month/year
def save_color_data_to_csv(year_folder, month_folder, colors, counts):
    output_file = f"{year_folder}_{month_folder}_color_summary.csv"
    with open(output_file, 'w', newline='') as csvfile:
        fieldnames = ['Color (R, G, B)', 'Count']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for color, count in zip(colors, counts):
            writer.writerow({'Color (R, G, B)': color, 'Count': count})

    print(f"Color data for {year_folder} - {month_folder} saved to {output_file}")

#Process each year and month by finding the similar named year folders, save extracted month data to a csv file and display the top colors of that month
def process_year_and_month(base_path, num_colors=7):
    results = {}
    for year_folder in os.listdir(base_path):
        year_path = os.path.join(base_path, year_folder)
        if os.path.isdir(year_path) and year_folder.startswith("Photos_20"): #Find each year folder
            results[year_folder] = {}
            for month_folder in os.listdir(year_path):
                month_path = os.path.join(year_path, month_folder)
                if os.path.isdir(month_path):
                    print(f"Processing Year: {year_folder}, Month: {month_folder}")
                    month_colors = process_images_in_folder(month_path, num_colors) #Process each month in that year
                    top_colors = month_colors.most_common(10) #Find the most common 10 colors
                    results[year_folder][month_folder] = [(tuple(color), count) for color, count in top_colors]
                    
                    colors = [color for color, _ in top_colors]
                    colors_as_integers = [(int(r), int(g), int(b)) for r, g, b in colors]
                    counts = [count for _, count in top_colors]
                    save_color_data_to_csv(year_folder, month_folder, colors_as_integers, counts)
                    display_color_summary(top_colors)
           
    return results

#Base folder containing year and month folders
base_folder = r"C:\Users\Desktop"

#Process and store results
color_data = process_year_and_month(base_folder, num_colors=5)

#Save end result in a JSON file
output_file = "color_summary.json"
with open(output_file, "w") as f:
    json.dump(color_data, f, indent=4)

print(f"Color summary saved to {output_file}.")


Most dominant color of each year between 2016-2024 as a pie chart.

In [None]:
import pandas as pd
import numpy as np
from collections import Counter
import altair as alt
import os



#Process each months csv file regarding the specified year, find the dominant colors and merge them if they are similar
def process_csv_files_in_folder(folder_path, file_n,num_colors=7):
    color_counter = Counter()
    for file_name in os.listdir(folder_path):
        if file_name.lower().endswith(('summary.csv')) and file_name.startswith(file_n):
              df = pd.read_csv(file_name)
              df['Color (R, G, B)'] = df['Color (R, G, B)'].apply(eval)
            
              for color, count in zip(df['Color (R, G, B)'], df['Count']):
                color_counter[color] += count
    return merge_similar_colors_in_counter(color_counter, num_colors)

#Merge colors if they have similar tones checked by their RGB distance
def merge_similar_colors_in_counter(color_counter, threshold=30):
    merged_counter = Counter()
    colors = list(color_counter.keys())
    color_array = np.array(colors)
    for color, count in color_counter.items():
        color = tuple(int(c) for c in color)
        dist = np.linalg.norm(color_array - np.array(color), axis=1)
        close_colors = np.where(dist < threshold)[0]
        if close_colors.size > 0:
            merged_color = tuple(color_array[close_colors[0]])
            merged_color = tuple(int(c) for c in merged_color) 
            merged_counter[merged_color] += int(count)
        else:
            merged_counter[color] = int(count)
    return merged_counter
                     
        
# Load the color data from CSV and read it into a DataFrame
def load_and_process_color_data(csv_file="color_summary.csv"):
    
    df = pd.read_csv(csv_file)

   
    df['Color (R, G, B)'] = df['Color (R, G, B)'].apply(eval)
    return df

# Visualize the color data as a pie chart and save it as a HTML file
def visualize_color_data(df):
    top_colors = df.nlargest(10, 'Count')
    colors = top_colors['Color (R, G, B)'].tolist()
    counts = top_colors['Count'].tolist()
    color=[f"rgb{rgb}" for rgb in colors]

    labels = ['1st', '2nd', '3rd', '4th', '5th','6th','7th','8th','9th','10th']

    all_count=0 
    percentage=[]
    for num in counts:
        all_count += num
    for num in counts:
        percentage.append(num/all_count*100)

    data = pd.DataFrame({
    'labels': labels,
    'counts': counts,
    'colors': colors,
    'percentage': percentage
    })

    
    pie_chart = alt.Chart(data).mark_arc().encode(
        theta=alt.Theta(field="counts", type="quantitative"),
        color=alt.Color(field="labels", type="nominal", scale=alt.Scale(domain=labels, range=color)),
        tooltip=['labels', 'counts', alt.Tooltip('percentage',format='.1f')]
    ).properties(
        title=""
    )

    
    pie_chart.show() 
    
    pie_chart.save("g23.html")    
    
#Save found color data into a csv file
def save_color_data_to_csv(file_name, colors, counts):
    df = pd.DataFrame({
        'Color (R, G, B)': colors,
        'Count': counts
    })
    csv_file_path = f"{file_name}_color_summary.csv"
    df.to_csv(csv_file_path, index=False)
    print(f"Data saved to {csv_file_path}")

#Folder to search color summaries of months
folder_name=r"C:\Users\Desktop\google_takeout" 

#Process csv files
color_data = process_csv_files_in_folder(folder_name,"Photos_2023",10)

colors = [color for color in color_data.keys()]
counts = [count for count in color_data.values()]

#Save color data
save_color_data_to_csv("Photos_2023",colors,counts)

#Process color data
color_data_df=load_and_process_color_data("Photos_2023_color_summary.csv")

#Create the pie chart of the data
visualize_color_data(color_data_df)


Most dominant colors between 2016-2024 as a pie chart.

In [None]:
import pandas as pd
import os
import numpy as np
from collections import Counter
import altair as alt



#Process each months csv file , find the dominant colors and merge them if they are similar
def process_csv_files_in_folder(folder_path, file_n,num_colors=7):
    color_counter = Counter()
    for file_name in os.listdir(folder_path):
        if file_name.lower().endswith(('summary.csv')) and file_name.startswith(file_n):
              df = pd.read_csv(file_name)
              df['Color (R, G, B)'] = df['Color (R, G, B)'].apply(eval)
            
              for color, count in zip(df['Color (R, G, B)'], df['Count']):
                color_counter[color] += count
    return merge_similar_colors_in_counter(color_counter, num_colors)

#Merge colors if they have similar tones checked by their RGB's distance
def merge_similar_colors_in_counter(color_counter, threshold=30):
    merged_counter = Counter()
    colors = list(color_counter.keys())
    color_array = np.array(colors)
    for color, count in color_counter.items():
        color = tuple(int(c) for c in color)
        dist = np.linalg.norm(color_array - np.array(color), axis=1)
        close_colors = np.where(dist < threshold)[0]
        if close_colors.size > 0:
            merged_color = tuple(color_array[close_colors[0]])
            merged_color = tuple(int(c) for c in merged_color) 
            merged_counter[merged_color] += int(count)
        else:
            merged_counter[color] = int(count)
    return merged_counter
                     
        
# Load the color data from CSV and read it into a DataFrame
def load_and_process_color_data(csv_file="color_summary.csv"):
    df = pd.read_csv(csv_file)
    df['Color (R, G, B)'] = df['Color (R, G, B)'].apply(eval)

    return df

# Visualize the color data as a pie chart and save it as a HTML file
def visualize_color_data(df):
    top_colors = df.nlargest(10, 'Count')

    colors = top_colors['Color (R, G, B)'].tolist()
    counts = top_colors['Count'].tolist()
    
    color=[f"rgb{rgb}" for rgb in colors]

    labels = ['1st', '2nd', '3rd', '4th', '5th','6th','7th','8th','9th','10th']
    all_count=0 
    percentage=[]
    for num in counts:
        all_count += num
    for num in counts:
        percentage.append(num/all_count*100)

    data = pd.DataFrame({
    'labels': labels,
    'counts': counts,
    'colors': colors,
    'percentage': percentage
    })

    pie_chart = alt.Chart(data).mark_arc().encode(
        theta=alt.Theta(field="counts", type="quantitative"),
        color=alt.Color(field="labels", type="nominal", scale=alt.Scale(domain=labels, range=color)),
        tooltip=['labels', 'counts', alt.Tooltip('percentage',format='.1f')]
    ).properties(
        title="2016-2024"
    )
   

    pie_chart.show() 
    
    pie_chart.save("gA.html")    
    

#Folder to search color summaries of months
folder_name=r"C:\Users\Desktop\google_takeout" 

#Process csv files 
color_data = process_csv_files_in_folder(folder_name,"Photos",10)

colors = [color for color in color_data.keys()]
counts = [count for count in color_data.values()]

#Process color data
color_data_df=load_and_process_color_data("All_years_color_summary.csv")

#Create the pie chart of the data
visualize_color_data(color_data_df)


Each months color change through the years as a stacked bar chart.

In [None]:
import json
import altair as alt
import pandas as pd

#Load the JSON data
with open("color_summary.json", "r") as file:
    data = json.load(file)

#Extract colors and counts from the JSON data of a specified month
def extract_colors_and_counts(json_data):
    colors = []
    counts = []
    years = []
    months = []
    for year, months_data in json_data.items():
        for month, color_data in months_data.items():
            for color, count in color_data:
                if month.startswith("Dec"):
                    colors.append(f"rgb({color[0]},{color[1]},{color[2]})") 
                    counts.append(count)
                    years.append(year)
                    months.append(month)
    return colors, counts, years, months

colors, counts, years, months = extract_colors_and_counts(data)

#Create a DataFrame for Altair
df = pd.DataFrame({
    'Color': colors,
    'Count': counts,
    'Year': years,
    'Month': months
})

#Create the bar chart with the data
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('Month:N', title='Months'),
    y=alt.Y('Count:Q', title='Counts'),
    color=alt.Color('Color:N',legend=None,scale=None),  
    tooltip=['Month:N', 'Count:Q', 'Color:N']  
).properties(
    width=600,
    height=400,
    title="Color Counts"
)
#Show the chart
chart.configure_axis(
    labelAngle=0  
).configure_view(
    stroke=None 
).show()

#Save the chart to a HTML file
chart.save("dec.html")

Each years color difference through the months as a stacked bar chart.

In [None]:
import json
import altair as alt
import pandas as pd

#Load the JSON data
with open("color_summary.json", "r") as file:
    data = json.load(file)

#Extract colors and counts from the JSON data
def extract_colors_and_counts(json_data,y):
    colors = []
    counts = []
    years = []
    months = []
    for year, months_data in json_data.items():
        for month, color_data in months_data.items():
         if year.endswith(y):
            for color, count in color_data:
                    colors.append(f"rgb({color[0]},{color[1]},{color[2]})") 
                    counts.append(count)
                    years.append(year)
                    months.append(month)
    return colors, counts, years, months
y="24"
colors, counts, years, months = extract_colors_and_counts(data,y)

#Create a DataFrame for Altair
df = pd.DataFrame({
    'Color': colors,
    'Count': counts,
    'Year': years,
    'Month': months
})

#Month order list to show the months in chronological order since they were not saved as such
month_order = [f'Jan{y}', f'Feb{y}', f'March{y}', f'April{y}', f'May{y}', f'June{y}', f'July{y}', f'Aug{y}', f'Sep{y}', f'Oct{y}', f'Nov{y}', f'Dec{y}']

#Create the bar chart of the data
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('Month:N', title='Months',sort=month_order),
    y=alt.Y('Count:Q', title='Counts'),
    color=alt.Color('Color:N',legend=None,scale=None),  
    tooltip=['Month:N', 'Count:Q', 'Color:N'] 
).properties(
    width=600,
    height=500,
    title="Color Counts"
)
#Show the  bar chart
chart.configure_axis(
    labelAngle=0  
).configure_view(
    stroke=None  
).show()

#Save the bar chart in a HTML file
chart.save("24.html")