Each years most dominant colors.

In [None]:
import cv2
import numpy as np
import os
from sklearn.cluster import KMeans
from collections import Counter
import matplotlib.pyplot as plt
import json
from concurrent.futures import ThreadPoolExecutor
import csv

os.environ["LOKY_MAX_CPU_COUNT"] = "4"

def extract_dominant_colors(image, num_colors=7):
    """Extract dominant colors using MiniBatchKMeans for faster processing."""
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.reshape(-1, 3)
    kmeans = KMeans(n_clusters=num_colors, random_state=30, max_iter=500)
    kmeans.fit(image)
    return kmeans.cluster_centers_.astype(int)

def merge_similar_colors_in_counter(color_counter, threshold=30):
    """Merge similar colors based on distance threshold."""
    merged_counter = Counter()
    colors = list(color_counter.keys())
    color_array = np.array(colors)
    for color, count in color_counter.items():
        color = tuple(int(c) for c in color)
        dist = np.linalg.norm(color_array - np.array(color), axis=1)
        close_colors = np.where(dist < threshold)[0]
        if close_colors.size > 0:
            merged_color = tuple(color_array[close_colors[0]])
            merged_color = tuple(int(c) for c in merged_color) 
            merged_counter[merged_color] += int(count)
        else:
            merged_counter[color] = int(count)
    return merged_counter

def process_image(image_path, num_colors=7):
    """Process a single image to extract dominant colors."""
    try:
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image: {image_path}")
            return []
        image = cv2.resize(image, (150, 150))  # Resize once per image
        dominant_colors = extract_dominant_colors(image, num_colors)
        return {tuple(color): 1 for color in dominant_colors}
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return []

def process_images_in_folder(folder_path, num_colors=7):
    """Process all images in a folder and cluster dominant colors."""
    color_counter = Counter()
    with ThreadPoolExecutor(max_workers=4) as executor:
        futures = []
        for file_name in os.listdir(folder_path):
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                image_path = os.path.join(folder_path, file_name)
                futures.append(executor.submit(process_image, image_path, num_colors))
        
        for future in futures:
            image_colors = future.result()
            color_counter.update(image_colors)
    
    merged_counter = merge_similar_colors_in_counter(color_counter, threshold=45)
    return merged_counter

def display_color_summary(colors_with_counts):
    """Display a summary of colors."""
    color_bar = np.zeros((50, 300, 3), dtype='uint8')
    num_colors = len(colors_with_counts)
    block_width = 300 // num_colors

    for i, (color, _) in enumerate(colors_with_counts):
        start_x = i * block_width
        end_x = start_x + block_width
        color = np.array(color, dtype='uint8')
        color_bar[:, start_x:end_x, :] = color

    plt.figure(figsize=(6, 2))
    plt.axis("off")
    plt.imshow(color_bar)
    plt.show()

def save_color_data_to_csv(year_folder, month_folder, colors, counts):
    """Save flattened color data (RGB values and counts) to a CSV file for each month/year."""
    output_file = f"{year_folder}_{month_folder}_color_summary.csv"
    with open(output_file, 'w', newline='') as csvfile:
        fieldnames = ['Color (R, G, B)', 'Count']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # Write the header
        writer.writeheader()
        
        # Write color data rows
        for color, count in zip(colors, counts):
            writer.writerow({'Color (R, G, B)': color, 'Count': count})

    print(f"Color data for {year_folder} - {month_folder} saved to {output_file}")


def process_year_and_month(base_path, num_colors=7):
    """Process folders by year and month."""
    results = {}
    for year_folder in os.listdir(base_path):
        year_path = os.path.join(base_path, year_folder)
        if os.path.isdir(year_path) and year_folder.startswith("Photos_20"):
            results[year_folder] = {}
            for month_folder in os.listdir(year_path):
                month_path = os.path.join(year_path, month_folder)
                if os.path.isdir(month_path):
                    print(f"Processing Year: {year_folder}, Month: {month_folder}")
                    month_colors = process_images_in_folder(month_path, num_colors)
                    top_colors = month_colors.most_common(10)
                    results[year_folder][month_folder] = [(tuple(color), count) for color, count in top_colors]
                    
                    # Save the monthly color data to CSV
                    colors = [color for color, _ in top_colors]
                    colors_as_integers = [(int(r), int(g), int(b)) for r, g, b in colors]
                    counts = [count for _, count in top_colors]
                    print(colors)
                    save_color_data_to_csv(year_folder, month_folder, colors_as_integers, counts)

                    display_color_summary(top_colors)
           
    return results

# Base folder containing year and month folders
base_folder = r"C:\Users\Lenovo\Desktop"

# Process and store results
color_data = process_year_and_month(base_folder, num_colors=5)

output_file = "color_summary.json"
with open(output_file, "w") as f:
    json.dump(color_data, f, indent=4)

print(f"Color summary saved to {output_file}.")


Most dominant color of each year between 2016-2024 as a pie chart.

In [None]:
import pandas as pd
import numpy as np
from collections import Counter
import altair as alt
import os




def process_csv_files_in_folder(folder_path, file_n,num_colors=7):
    color_counter = Counter()
    for file_name in os.listdir(folder_path):
        if file_name.lower().endswith(('summary.csv')) and file_name.startswith(file_n):
              df = pd.read_csv(file_name)
              df['Color (R, G, B)'] = df['Color (R, G, B)'].apply(eval)
            
              for color, count in zip(df['Color (R, G, B)'], df['Count']):
                color_counter[color] += count
    # Reduce to similar colors by merging them
    return merge_similar_colors_in_counter(color_counter, num_colors)

def merge_similar_colors_in_counter(color_counter, threshold=30):
    merged_counter = Counter()
    colors = list(color_counter.keys())
    color_array = np.array(colors)
    for color, count in color_counter.items():
        color = tuple(int(c) for c in color)
        dist = np.linalg.norm(color_array - np.array(color), axis=1)
        close_colors = np.where(dist < threshold)[0]
        if close_colors.size > 0:
            merged_color = tuple(color_array[close_colors[0]])
            merged_color = tuple(int(c) for c in merged_color) 
            merged_counter[merged_color] += int(count)
        else:
            merged_counter[color] = int(count)
    return merged_counter
                     
        
# Load the color data from CSV
def load_and_process_color_data(csv_file="color_summary.csv"):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Convert the 'Color (R, G, B)' column from string to a tuple
    df['Color (R, G, B)'] = df['Color (R, G, B)'].apply(eval)
    return df

# Visualize the color data (Pie Chart)
def visualize_color_data(df):
    top_colors = df.nlargest(10, 'Count')

    # Extract RGB values and counts
    colors = top_colors['Color (R, G, B)'].tolist()
    counts = top_colors['Count'].tolist()
    color=[f"rgb{rgb}" for rgb in colors]

    labels = ['1st', '2nd', '3rd', '4th', '5th','6th','7th','8th','9th','10th']

    all_count=0 
    percentage=[]
    for num in counts:
        all_count += num
    for num in counts:
        percentage.append(num/all_count*100)

    data = pd.DataFrame({
    'labels': labels,
    'counts': counts,
    'colors': colors,
    'percentage': percentage
    })

    # Create the pie chart using Altair
    pie_chart = alt.Chart(data).mark_arc().encode(
        theta=alt.Theta(field="counts", type="quantitative"),
        color=alt.Color(field="labels", type="nominal", scale=alt.Scale(domain=labels, range=color)),
        tooltip=['labels', 'counts', alt.Tooltip('percentage',format='.1f')]
    ).properties(
        title=""
    )

    # Display the pie chart
    pie_chart.show() 
    
    pie_chart.save("g23.html")    
    
def save_color_data_to_csv(file_name, colors, counts):
    df = pd.DataFrame({
        'Color (R, G, B)': colors,
        'Count': counts
    })
    csv_file_path = f"{file_name}_color_summary.csv"
    df.to_csv(csv_file_path, index=False)
    print(f"Data saved to {csv_file_path}")

folder_name=r"C:\Users\Lenovo\Desktop\google_takeout" 

color_data = process_csv_files_in_folder(folder_name,"Photos_2023",10)

colors = [color for color in color_data.keys()]
counts = [count for count in color_data.values()]

save_color_data_to_csv("Photos_2023",colors,counts)

color_data_df=load_and_process_color_data("Photos_2023_color_summary.csv")

visualize_color_data(color_data_df)


Most dominant colors between 2016-2024 as a pie chart.

In [None]:
import pandas as pd
import os
import numpy as np
from collections import Counter
import altair as alt




def process_csv_files_in_folder(folder_path, file_n,num_colors=7):
    """Process all images in a folder to extract and cluster dominant colors."""
    color_counter = Counter()
    for file_name in os.listdir(folder_path):
        if file_name.lower().endswith(('summary.csv')) and file_name.startswith(file_n):
              df = pd.read_csv(file_name)
              df['Color (R, G, B)'] = df['Color (R, G, B)'].apply(eval)
            
            # Aggregate colors and counts
              for color, count in zip(df['Color (R, G, B)'], df['Count']):
                color_counter[color] += count
    print(color_counter)
    # Reduce to similar colors using clustering
    return merge_similar_colors_in_counter(color_counter, num_colors)

def merge_similar_colors_in_counter(color_counter, threshold=30):
    """Merge similar colors based on distance threshold."""
    merged_counter = Counter()
    colors = list(color_counter.keys())
    color_array = np.array(colors)
    for color, count in color_counter.items():
        color = tuple(int(c) for c in color)
        dist = np.linalg.norm(color_array - np.array(color), axis=1)
        close_colors = np.where(dist < threshold)[0]
        if close_colors.size > 0:
            merged_color = tuple(color_array[close_colors[0]])
            merged_color = tuple(int(c) for c in merged_color) 
            merged_counter[merged_color] += int(count)
        else:
            merged_counter[color] = int(count)
    return merged_counter
                     
        
# Load the color data from CSV
def load_and_process_color_data(csv_file="color_summary.csv"):
    """Load the color data from CSV and process it."""
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Convert the 'Color (R, G, B)' column from string to a tuple
    df['Color (R, G, B)'] = df['Color (R, G, B)'].apply(eval)

    return df

# Visualize the color data (Pie Chart)
def visualize_color_data(df):
    """Visualize color data as a pie chart."""
    # Get the top 10 colors (can modify this number)
    top_colors = df.nlargest(10, 'Count')

    # Extract RGB values and counts
    colors = top_colors['Color (R, G, B)'].tolist()
    counts = top_colors['Count'].tolist()
    
    color=[f"rgb{rgb}" for rgb in colors]

    labels = ['1st', '2nd', '3rd', '4th', '5th','6th','7th','8th','9th','10th']
    all_count=0 
    percentage=[]
    for num in counts:
        all_count += num
    for num in counts:
        percentage.append(num/all_count*100)

    data = pd.DataFrame({
    'labels': labels,
    'counts': counts,
    'colors': colors,
    'percentage': percentage
    })

    # Create the pie chart using Altair
    pie_chart = alt.Chart(data).mark_arc().encode(
        theta=alt.Theta(field="counts", type="quantitative"),
        color=alt.Color(field="labels", type="nominal", scale=alt.Scale(domain=labels, range=color)),
        tooltip=['labels', 'counts', alt.Tooltip('percentage',format='.1f')]
    ).properties(
        title="2016-2024"
    )
   

    # Display the pie chart
    pie_chart.show() 
    
    pie_chart.save("gA.html")    
    
def save_color_data_to_csv(file_name, colors, counts):
    """Save color data to a CSV file."""
    # Create a DataFrame from the colors and counts
    df = pd.DataFrame({
        'Color (R, G, B)': colors,
        'Count': counts
    })
    
    # Save the DataFrame to a CSV file
    csv_file_path = f"{file_name}_color_summary.csv"
    df.to_csv(csv_file_path, index=False)
    print(f"Data saved to {csv_file_path}")


folder_name=r"C:\Users\Lenovo\Desktop\google_takeout" 
color_data = process_csv_files_in_folder(folder_name,"Photos",10)
colors = [color for color in color_data.keys()]
counts = [count for count in color_data.values()]



#save_color_data_to_csv("All_years",colors,counts)
color_data_df=load_and_process_color_data("All_years_color_summary.csv")
visualize_color_data(color_data_df)


Each months color change through the years as a stacked bar chart.

In [None]:
import json
import altair as alt
import pandas as pd

# Load the JSON data
with open("color_summary.json", "r") as file:
    data = json.load(file)

# Extract colors and counts from the JSON data
def extract_colors_and_counts(json_data):
    colors = []
    counts = []
    years = []
    months = []
    for year, months_data in json_data.items():
        for month, color_data in months_data.items():
            for color, count in color_data:
                if month.startswith("Dec"):# Get each month
                    colors.append(f"rgb({color[0]},{color[1]},{color[2]})")  # Store the color as a tuple (R, G, B)
                    counts.append(count)
                    years.append(year)
                    months.append(month)
    return colors, counts, years, months

colors, counts, years, months = extract_colors_and_counts(data)

# Create a DataFrame for Altair
df = pd.DataFrame({
    'Color': colors,
    'Count': counts,
    'Year': years,
    'Month': months
})

# Create the Altair bar chart
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('Month:N', title='Months'),
    y=alt.Y('Count:Q', title='Counts'),
    color=alt.Color('Color:N',legend=None,scale=None),  # Use Color as categorical for RGB mapping
    tooltip=['Month:N', 'Count:Q', 'Color:N']  # Display RGB and counts on hover
).properties(
    width=600,
    height=400,
    title="Color Counts"
)

chart.configure_axis(
    labelAngle=0  # Rotate labels to make them readable
).configure_view(
    stroke=None  # Remove border
).show()
chart.save("dec.html")

Each years color difference through the months as a stacked bar chart.

In [None]:
import json
import altair as alt
import pandas as pd

# Load the JSON data
with open("color_summary.json", "r") as file:
    data = json.load(file)

# Extract colors and counts from the JSON data
def extract_colors_and_counts(json_data,y):
    colors = []
    counts = []
    years = []
    months = []
    for year, months_data in json_data.items():
        for month, color_data in months_data.items():
         if year.endswith(y):
            for color, count in color_data:
                    colors.append(f"rgb({color[0]},{color[1]},{color[2]})")  # Store the color as a tuple (R, G, B)
                    counts.append(count)
                    years.append(year)
                    months.append(month)
    return colors, counts, years, months
y="24"
colors, counts, years, months = extract_colors_and_counts(data,y)

# Create a DataFrame for Altair
df = pd.DataFrame({
    'Color': colors,
    'Count': counts,
    'Year': years,
    'Month': months
})

month_order = [f'Jan{y}', f'Feb{y}', f'March{y}', f'April{y}', f'May{y}', f'June{y}', f'July{y}', f'Aug{y}', f'Sep{y}', f'Oct{y}', f'Nov{y}', f'Dec{y}']

# Create the Altair bar chart
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('Month:N', title='Months',sort=month_order),
    y=alt.Y('Count:Q', title='Counts'),
    color=alt.Color('Color:N',legend=None,scale=None),  # Use Color as categorical for RGB mapping
    tooltip=['Month:N', 'Count:Q', 'Color:N']  # Display RGB and counts on hover
).properties(
    width=600,
    height=500,
    title="Color Counts"
)

chart.configure_axis(
    labelAngle=0  # Rotate labels to make them readable
).configure_view(
    stroke=None  # Remove border
).show()
chart.save("24.html")