In [1]:
import json
import numpy as np

In [2]:
import colorsys

In [3]:
n_colors = '70b'

properties_dir = '/mnt/ialabnas/homes/fidelrio/clevr-dataset-gen/image_generation/data/multicolored'
properties_path = f'{properties_dir}/properties-{n_colors}.json'

with open(properties_path) as fp:
    properties = json.load(fp)
    named_colors = properties['colors']

In [4]:
color_names = list(named_colors.keys())
raw_color_values = [named_colors[cn] for cn in color_names]

In [5]:
rgb_color_values = np.array(raw_color_values) / 255

hls_color_values = np.array([colorsys.rgb_to_hls(*c) for c in raw_color_values])
hls_max_min = (hls_color_values.max(0) - hls_color_values.min(0))
hls_min = hls_color_values.min(0)
hls_color_values = (hls_color_values - hls_min) / hls_max_min

hsv_color_values = np.array([colorsys.rgb_to_hsv(*c) for c in raw_color_values])
hsv_max_min = (hsv_color_values.max(0) - hsv_color_values.min(0))
hsv_min = hsv_color_values.min(0)
hsv_color_values = (hsv_color_values - hsv_min) / hsv_max_min

color_values_by_repr = {
    'rgb': rgb_color_values,
    'hls': hls_color_values,
    'hsv': hsv_color_values,
}

raw_color_values = np.array(raw_color_values)

In [6]:
rgb_color_values.shape, hls_color_values.shape, hsv_color_values.shape

((70, 3), (70, 3), (70, 3))

In [7]:
num_clusters = 8
balanced = True
representation = 'hls'

color_values = color_values_by_repr[representation]

In [8]:
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min

def balanced_k_means(X, n_clusters, max_iter=100, random_state=None):
    np.random.seed(random_state)
    n_samples, n_features = X.shape
    size_per_cluster = n_samples // n_clusters

    # Initialize centroids by randomly selecting k unique points
    initial_indices = np.random.choice(n_samples, n_clusters, replace=False)
    centroids = X[initial_indices]

    for iteration in range(max_iter):
        # Assign each sample to the nearest centroid
        labels, _ = pairwise_distances_argmin_min(X, centroids)
        
        # Create clusters ensuring balance
        clusters = [[] for _ in range(n_clusters)]
        for idx, label in enumerate(labels):
            clusters[label].append(idx)
        
        # Balance clusters by redistributing samples if necessary
        for cluster in clusters:
            while len(cluster) > size_per_cluster+1:
                for target_cluster in clusters:
                    if len(target_cluster) < size_per_cluster:
                        target_cluster.append(cluster.pop())
                        if len(cluster) == size_per_cluster or len(cluster) == (size_per_cluster+1):
                            break
                if len(cluster) == size_per_cluster or len(cluster) == (size_per_cluster+1):
                    break
        
        # Calculate new centroids
        new_centroids = np.array([X[cluster].mean(axis=0) for cluster in clusters])
        
        # Check for convergence
        if np.all(centroids == new_centroids):
            break
        centroids = new_centroids
    
    assignation = np.zeros(len(X), dtype=int)
    for cluster_idx, cluster in enumerate(clusters):
        assignation[cluster] = cluster_idx
    return assignation, centroids

def k_means(X, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters)
    y = kmeans.fit_predict(X)
    cluster_centers = kmeans.cluster_centers_
    return y, cluster_centers


if balanced:
    color_assign, cluster_centers = balanced_k_means(color_values, n_clusters=num_clusters, max_iter=100)
else:
    color_assign, cluster_centers = k_means(color_values, n_clusters=num_clusters)

KeyboardInterrupt: 

In [9]:
centroid_closest_colors, _ = pairwise_distances_argmin_min(cluster_centers, color_values)

centroid_color_names = [color_names[cidx] for cidx in centroid_closest_colors]

color_translation = {}
for cname, cluster in zip(color_names, color_assign.tolist()):
    color_translation[cname] = centroid_color_names[cluster]

NameError: name 'cluster_centers' is not defined

In [10]:
# cluster_centers, color_values[centroid_closest_colors]

In [11]:
centroid_color_names

NameError: name 'centroid_color_names' is not defined

In [12]:
color_translation

NameError: name 'color_translation' is not defined

In [13]:
# with open(f'data/color_translation_{representation}_169b_to_{num_clusters}.json', 'w') as fp:
#     json.dump(color_translation, fp)

In [14]:
import math
import matplotlib.pyplot as plt

def display_colors(color_array, patches_per_row=10, patch_size=1):
    """
    Display a list of colors using matplotlib in multiple rows and larger patches.
    
    Parameters:
    color_array (numpy.ndarray): A 2D NumPy array with shape (n, 3), where each row represents an RGB color.
    patches_per_row (int): The number of color patches per row.
    patch_size (int or float): The size of each color patch.
    """
    num_colors = color_array.shape[0]
    num_rows = math.ceil(num_colors / patches_per_row)
    
    # Create a figure and a set of subplots
    fig, ax = plt.subplots(figsize=(patches_per_row * patch_size, num_rows * patch_size))
    
    # Create a patch for each color
    for idx, color in enumerate(color_array):
        # Normalize the RGB values to the range [0, 1]
        normalized_color = color / 255.0
        row = idx // patches_per_row
        col = idx % patches_per_row
        ax.add_patch(
            plt.Rectangle(
                (col * patch_size, row * patch_size), patch_size, patch_size, color=normalized_color))
    
    # Set the limits and hide the axes
    ax.set_xlim(0, patches_per_row * patch_size)
    ax.set_ylim(0, num_rows * patch_size)
    ax.invert_yaxis()
    ax.axis('off')
    
    plt.show()

In [15]:
hls_cluster_centers = cluster_centers*hls_max_min + hls_min
hls_cluster_centers = [np.clip(colorsys.hls_to_rgb(*c),0,255) for c in hls_cluster_centers]

hsv_cluster_centers = cluster_centers*hsv_max_min + hsv_min
hsv_cluster_centers = [np.clip(colorsys.hsv_to_rgb(*c),0,255) for c in hsv_cluster_centers]

cluster_centers_rgb = {
    'rgb': cluster_centers*255,
    'hls': np.array(hls_cluster_centers),
    'hsv': np.array(hsv_cluster_centers),
}[representation] 
display_colors(cluster_centers_rgb)

NameError: name 'cluster_centers' is not defined

In [None]:
display_colors(np.array([named_colors[cname] for cname in centroid_color_names]))

In [None]:
display_colors(raw_color_values[color_assign == 0])

In [16]:
display_colors(raw_color_values[color_assign == 1])

NameError: name 'color_assign' is not defined

In [17]:
display_colors(raw_color_values[color_assign == 2])

NameError: name 'color_assign' is not defined

In [18]:
display_colors(raw_color_values[color_assign == 3])

NameError: name 'color_assign' is not defined

In [19]:
display_colors(raw_color_values[color_assign == 4])

NameError: name 'color_assign' is not defined

In [20]:
display_colors(raw_color_values[color_assign == 5])

NameError: name 'color_assign' is not defined

In [21]:
display_colors(raw_color_values[color_assign == 6])

NameError: name 'color_assign' is not defined

In [22]:
display_colors(raw_color_values[color_assign == 7])

NameError: name 'color_assign' is not defined

In [23]:
num_clusters = 8
for rep in ['rgb','hls','hsv']:
    with open(f'data/color_translation_{rep}_169b_to_{num_clusters}.json') as fp:
        color_hash = json.load(fp)

    print([color_hash[c] for c in ["red", "green", "purple", "cyan"]])
    print([color_hash[c] for c in ["dim-grey", "blue", "brown", "yellow"]])
    print()


['sienna', 'dark-slate-gray', 'dark-orchid', 'medium-turquoise']
['light-steel-blue', 'dark-slate-gray', 'sienna', 'golden-rod']

['golden-rod', 'lime', 'dark-violet', 'lime']
['rosy-brown', 'dark-violet', 'golden-rod', 'lime']

['golden-rod', 'teal', 'teal', 'turquoise']
['dark-khaki', 'turquoise', 'golden-rod', 'golden-rod']



In [47]:
num_clusters = 4
n_colors = '70b'

In [48]:
if n_colors == '8':
    properties_dir = '/mnt/ialabnas/homes/fidelrio/clevr-dataset-gen/image_generation/data/'
    properties_path = f'{properties_dir}/properties.json'
else:
    properties_dir = '/mnt/ialabnas/homes/fidelrio/clevr-dataset-gen/image_generation/data/multicolored'
    properties_path = f'{properties_dir}/properties-{n_colors}.json'

with open(properties_path) as fp:
    properties = json.load(fp)
    named_colors = properties['colors']

In [49]:
color_names = list(named_colors.keys())
raw_color_values = [named_colors[cn] for cn in color_names]

In [50]:
if n_colors == '8':
    base_dir = '/mnt/ialabnas/homes/fidelrio/clevr-dataset-gen/image_generation/data/'
    B_split_path = f'{base_dir}/CoGenT_B.json'
else:
    base_dir = '/mnt/ialabnas/homes/fidelrio/clevr-dataset-gen/image_generation/data/multicolored'
    B_split_path = f'{base_dir}/CoGenT_B-{n_colors}.json'

with open(B_split_path) as fp:
    split_prop = json.load(fp)
    systematic_color_groups = [
        split_prop["cube"],
        split_prop["cylinder"],
    ]

In [51]:
from itertools import chain

# systematic_color_groups = [["red", "green", "purple", "cyan"],
#                            ["dim-grey", "blue", "brown", "yellow"]]
systematic_colors = list(chain(*systematic_color_groups))
non_systematic_colors = [c for c in color_names if c not in systematic_colors]

In [52]:
len(systematic_colors), len(non_systematic_colors)

(8, 62)

In [53]:
systematic_color_groups

[['indian-red', 'forest-green', 'purple', 'cyan'],
 ['grey', 'blue', 'brown', 'light-yellow']]

In [54]:
clusters_per_group = num_clusters // len(systematic_color_groups)
assert num_clusters % len(systematic_color_groups) == 0 or num_clusters > len(systematic_colors)

In [56]:
if num_clusters > len(systematic_colors):
    random.shuffle(non_systematic_colors)
    diff = num_clusters - len(systematic_colors)
    target_colors = list(systematic_colors) + non_systematic_colors[:diff]
else:
    target_colors_by_group = [group[:clusters_per_group] for group in systematic_color_groups]
    target_colors = list(chain(*target_colors_by_group))

origin_colors = [c for c in color_names if c not in target_colors]

In [57]:
len(target_colors), len(origin_colors)

(4, 66)

In [58]:
import random
random.shuffle(origin_colors)

equal_pieces_size = len(origin_colors) // len(target_colors)
unequal_pieces_size = equal_pieces_size + 1
num_unequal_pieces = len(origin_colors) % len(target_colors)
num_equal_pieces = len(target_colors) - num_unequal_pieces
offset = equal_pieces_size * num_equal_pieces

start_end_idxs = ([(i*equal_pieces_size,(i+1)*equal_pieces_size) for i in range(num_equal_pieces)]
                  + [(offset+i*unequal_pieces_size,offset+(i+1)*unequal_pieces_size) for i in range(num_unequal_pieces)])

systematic_color_translation = {}
for pcolor, (start, end) in zip(target_colors, start_end_idxs):
    systematic_color_translation[pcolor] = pcolor
    for color in origin_colors[start:end]:
        systematic_color_translation[color] = pcolor

In [59]:
print(f'data/color_translation_random_{n_colors}_to_{num_clusters}.json')
with open(f'data/color_translation_random_{n_colors}_to_{num_clusters}.json', 'w') as fp:
    json.dump(systematic_color_translation, fp)

data/color_translation_random_70b_to_4.json


In [60]:
systematic_color_translation

{'indian-red': 'indian-red',
 'lavender': 'indian-red',
 'dark-magenta': 'indian-red',
 'light-salmon': 'indian-red',
 'medium-turquoise': 'indian-red',
 'light-slate-gray': 'indian-red',
 'maroon': 'indian-red',
 'plum': 'indian-red',
 'light-yellow': 'indian-red',
 'burly-wood': 'indian-red',
 'deep-pink': 'indian-red',
 'golden-rod': 'indian-red',
 'wheat': 'indian-red',
 'dark-khaki': 'indian-red',
 'bisque': 'indian-red',
 'brown': 'indian-red',
 'yellow-green': 'indian-red',
 'forest-green': 'forest-green',
 'honeydew': 'forest-green',
 'chartreuse': 'forest-green',
 'mint-cream': 'forest-green',
 'navajo-white': 'forest-green',
 'tomato': 'forest-green',
 'gainsboro': 'forest-green',
 'dark-turquoise': 'forest-green',
 'sea-green': 'forest-green',
 'corn-flower-blue': 'forest-green',
 'silver': 'forest-green',
 'dark-orchid': 'forest-green',
 'sky-blue': 'forest-green',
 'papaya-whip': 'forest-green',
 'olive-drab': 'forest-green',
 'blue-violet': 'forest-green',
 'dark-cyan': '

In [61]:
len(set(systematic_color_translation.values())), len(set(systematic_color_translation.keys()))

(4, 70)