In [17]:
import pandas as pd
df = pd.read_json('./data.json')
df.head()
df.columns
pd.set_option('display.max_colwidth', None)



In [18]:

import requests
from colorthief import ColorThief
from io import BytesIO

# Function to get the dominant color from an image URL
def get_main_color(image_url):
    try:
        # Fetch the image using the requests library
        response = requests.get(image_url)
        image = BytesIO(response.content)
        
        # Use ColorThief to get the dominant color
        color_thief = ColorThief(image)
        dominant_color = color_thief.get_color(quality=10)  # Get the dominant color (10 = lower quality for faster processing)
        
        # Convert dominant color to a hex string
        return f"#{dominant_color[0]:02x}{dominant_color[1]:02x}{dominant_color[2]:02x}"
    except Exception as e:
        return None  # In case of any error (e.g., invalid image link)



In [19]:
import pandas as pd
import requests
from PIL import Image
from sklearn.cluster import KMeans
import numpy as np
from io import BytesIO


# Function to get multiple main colors from an image URL using KMeans clustering
def get_main_colors2(image_url, n_colors=3):
    try:
        # Fetch the image using the requests library
        response = requests.get(image_url)
        image = Image.open(BytesIO(response.content))

        # Resize the image to speed up processing (optional)
        image = image.resize((image.width // 5, image.height // 5))

        # Convert the image to RGB (in case it's in another format like RGBA)
        image = image.convert('RGB')

        # Get the pixels as a numpy array
        pixels = np.array(image)

        # Reshape the pixel array to a 2D array (each row is a pixel)
        pixels = pixels.reshape(-1, 3)

        # Use KMeans clustering to find the most common colors
        kmeans = KMeans(n_clusters=n_colors, random_state=42)
        kmeans.fit(pixels)

        # Get the RGB values of the cluster centers (the main colors)
        colors = kmeans.cluster_centers_

        # Convert RGB values to hex
        hex_colors = [f"#{int(color[0]):02x}{int(color[1]):02x}{int(color[2]):02x}" for color in colors]
        return hex_colors
    except Exception as e:
        print(f"Error processing image {image_url}: {e}")
        return []



In [27]:
import numpy as np
from PIL import Image
from sklearn.cluster import KMeans
import requests
from io import BytesIO
def rgb_to_hex(rgb):
    return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])

# Function to get the main colors and the bounding boxes for those colors
def get_main_colors_and_swatch_coords(image_url, n_colors=3, tolerance=30):
    try:
        # Fetch the image using the requests library
        response = requests.get(image_url)
        image = Image.open(BytesIO(response.content))

        # Resize the image to speed up processing (optional)
        image = image.resize((image.width // 5, image.height // 5))

        # Convert the image to RGB (in case it's in another format like RGBA)
        image = image.convert('RGB')

        # Get the pixels as a numpy array
        pixels = np.array(image)

        # Reshape the pixel array to a 2D array (each row is a pixel)
        pixels = pixels.reshape(-1, 3)

        # Use KMeans clustering to find the most common colors
        kmeans = KMeans(n_clusters=n_colors, random_state=42)
        kmeans.fit(pixels)

        # Get the RGB values of the cluster centers (the main colors)
        colors = kmeans.cluster_centers_.astype(int)
        print("NUM COLORS", n_colors, len(colors))

        # Assign each pixel to the nearest cluster
        labels = kmeans.labels_

        # Prepare a list to store bounding box details (x, y, width, height, color)
        swatches = []

        # For each color (cluster), find the bounding box (min, max x and y)
        for color in colors:
            # Find the pixels that are closest to the current color (within tolerance)
            # We'll find pixels where the color difference is within a given tolerance
            diffs = np.abs(pixels - color)
            distances = np.sqrt(np.sum(diffs**2, axis=1))
            matching_pixels = np.where(distances <= tolerance)[0]  # Pixel indices that match the color

            if len(matching_pixels) == 0:
                continue  # Skip if no matching pixels are found for this color

            # Get the x, y coordinates of these pixels
            y_coords, x_coords = np.unravel_index(matching_pixels, (image.height, image.width))

            # Find the bounding box for the current color
            min_x = np.min(x_coords)
            max_x = np.max(x_coords)
            min_y = np.min(y_coords)
            max_y = np.max(y_coords)

            # Add the swatch information (x, y, width, height, color)
            swatches.append({
                'color': rgb_to_hex(color),  # RGB color as a tuple
                'x': int(min_x),
                'y': int(min_y),
                'width': int(max_x - min_x),
                'height': int(max_y - min_y),
                'origional_image_width':image.size[0],
                'selected':0 #0 if unselected, 1 if selectd
            })

        # Return the list of swatch details (coordinates, size, color)
        return swatches

    except Exception as e:
        print(f"Error processing image {image_url}: {e}")
        return []

# Example usage
image_url = 'https://ids.si.edu/ids/deliveryService?id=NMAAHC-2007_3_568_001'  # Replace with your image URL
swatches = get_main_colors_and_swatch_coords(image_url, n_colors=5, tolerance = 1)

# Print swatch details: x, y, width, height, color
import pprint
pprint.pprint(swatches)



NUM COLORS 5 5
[{'color': '#aa5f30',
  'height': 0,
  'origional_image_width': 248,
  'selected': 0,
  'width': 0,
  'x': 83,
  'y': 64},
 {'color': '#dedfda',
  'height': 354,
  'origional_image_width': 248,
  'selected': 0,
  'width': 247,
  'x': 0,
  'y': 0},
 {'color': '#6e3112',
  'height': 45,
  'origional_image_width': 248,
  'selected': 0,
  'width': 165,
  'x': 53,
  'y': 95}]


In [28]:

# Apply the function to the DataFrame
df = df[:10]
df['main_color'] = df['image_link'].apply(get_main_colors2)
df['swatches'] =df['image_link'].apply(get_main_colors_and_swatch_coords)


NUM COLORS 3 3
NUM COLORS 3 3
NUM COLORS 3 3
NUM COLORS 3 3
NUM COLORS 3 3
NUM COLORS 3 3
NUM COLORS 3 3
NUM COLORS 3 3
NUM COLORS 3 3
NUM COLORS 3 3


In [None]:
def hex_to_rgb(hex_color):
    hex_color = hex_color.lstrip('#') 
    return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
def get_vibrant_color(colors):
    return max(colors, key=lambda color: sum(hex_to_rgb(color)))
df['vibrant_color'] = df['main_color'].apply(get_vibrant_color)

In [None]:
import json

#now adding cluster information
with open('data.json', 'r') as file:
    data = json.load(file)

def get_vibrant_color(colors):
    return max(colors, key=lambda color: sum(color))

for obj in data:
    obj['vibrant_color'] = get_vibrant_color(obj['main_colors'])

# Step 3: Cluster the data
# Convert vibrant colors to a NumPy array for clustering
vibrant_colors = np.array([obj['vibrant_color'] for obj in data])

# Use KMeans clustering
num_clusters = 5  # Choose the number of clusters
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(vibrant_colors)

# Assign cluster information to each object
for i, obj in enumerate(data):
    cluster_num = kmeans.labels_[i]
    cluster_center = kmeans.cluster_centers_[cluster_num]
    obj['cluster_num'] = int(cluster_num)
    obj['cluster_x'] = float(cluster_center[0])
    obj['cluster_y'] = float(cluster_center[1])

# Step 4: Calculate cluster range
cluster_x_vals = [obj['cluster_x'] for obj in data]
cluster_y_vals = [obj['cluster_y'] for obj in data]
cluster_x_range = (min(cluster_x_vals), max(cluster_x_vals))
cluster_y_range = (min(cluster_y_vals), max(cluster_y_vals))

for obj in data:
    obj['cluster_x_range'] = cluster_x_range
    obj['cluster_y_range'] = cluster_y_range

# Step 5: Save updated data to a new JSON file
with open('updated_data.json', 'w') as file:
    json.dump(data, file, indent=4)