# Scrape all artworks by type and decade

### Scraping
> paintings and digital works



In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

paintings_urls = {
    "1950s": "https://www.hockney.com/works/paintings/50s",
    "1960s": "https://www.hockney.com/works/paintings/60s",
    "1970s": "https://www.hockney.com/works/paintings/70s",
    "1980s": "https://www.hockney.com/works/paintings/80s",
    "1990s": "https://www.hockney.com/works/paintings/90s",
    "2000s": "https://www.hockney.com/works/paintings/00s",
    "2010s": "https://www.hockney.com/works/paintings/10s"
}

digital_urls = [
    "https://www.hockney.com/works/digital/computer-drawings",
    "https://www.hockney.com/works/digital/iphone",
    "https://www.hockney.com/works/digital/ipad",
    "https://www.hockney.com/works/digital/arrival-of-spring-woldgate",
    "https://www.hockney.com/works/digital/yosemite-suite"
]

data = []

for decade, url in paintings_urls.items():
    res = requests.get(url)
    doc = BeautifulSoup(res.text, 'html.parser')
    items = doc.select('a[data-type="image"]')

    for item in items:
        image_url = item['href']
        caption_html = item.get('data-caption', '')
        caption_soup = BeautifulSoup(caption_html, 'html.parser')

        title = caption_soup.find('b').text.strip() if caption_soup.find('b') else ''
        details = caption_soup.get_text().replace(title, '').strip()

        data.append({
            "title": title,
            "details": details,
            "image_url": image_url,
            "decade": decade
        })

for url in digital_urls:
    res = requests.get(url)
    doc = BeautifulSoup(res.text, 'html.parser')
    items = doc.select('a[data-type="image"]')

    for item in items:
        image_url = item['href']
        caption_html = item.get('data-caption', '')
        caption_soup = BeautifulSoup(caption_html, 'html.parser')

        title = caption_soup.find('b').text.strip() if caption_soup.find('b') else ''
        details = caption_soup.get_text().replace(title, '').strip()

        i_tag = caption_soup.find('i')
        if i_tag:
            year_match = re.search(r'\b(19|20)\d{2}\b', i_tag.text)
            year = int(year_match.group()) if year_match else None
        else:
            year = None

        if year:
            decade = f"{str(year)[:3]}0s"
        else:
            decade = "Unknown"

        data.append({
            "title": title,
            "details": details,
            "image_url": image_url,
            "decade": decade
        })

artworks = pd.DataFrame(data)



Scraping: https://www.hockney.com/works/digital/computer-drawings
Scraping: https://www.hockney.com/works/digital/iphone
Scraping: https://www.hockney.com/works/digital/ipad
Scraping: https://www.hockney.com/works/digital/arrival-of-spring-woldgate
Scraping: https://www.hockney.com/works/digital/yosemite-suite


In [2]:
artworks


Unnamed: 0,title,details,image_url,decade
0,Self Portrait,1951 - Oil on paper 12 3/8 x 8 1/2 in,https://www.hockney.com/img/gallery/paintings/...,1950s
1,Still Life with Bowl of Fruit,1951 - Oil on paper 9 3/4 x 11 1/8 in,https://www.hockney.com/img/gallery/paintings/...,1950s
2,Tunwell Lane,1957 - Oil on canvas 29 x 36 in,https://www.hockney.com/img/gallery/paintings/...,1950s
3,Self Portrait,1954 - Oil on board 18 x 14 in,https://www.hockney.com/img/gallery/paintings/...,1950s
4,Portrait of My Father,1955 - Oil on canvas 20 x 16 in,https://www.hockney.com/img/gallery/paintings/...,1950s
...,...,...,...,...
407,"Yosemite I, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown
408,"Yosemite II, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown
409,"Yosemite III, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown
410,"Yosemite I, October 16th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown


In [50]:
artworks['year'] = artworks['details'].str.extract(r'\b(19\d{2}|20\d{2})\b')
artworks['year'] = pd.to_numeric(artworks['year'], errors='coerce')

artworks

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,details,image_url,decade,year,decadef,type
0,0,0,0,Self Portrait,1951 - Oil on paper 12 3/8 x 8 1/2 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1951.0,1950s,paintings
1,1,1,1,Still Life with Bowl of Fruit,1951 - Oil on paper 9 3/4 x 11 1/8 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1951.0,1950s,paintings
2,2,2,2,Tunwell Lane,1957 - Oil on canvas 29 x 36 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1957.0,1950s,paintings
3,3,3,3,Self Portrait,1954 - Oil on board 18 x 14 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1954.0,1950s,paintings
4,4,4,4,Portrait of My Father,1955 - Oil on canvas 20 x 16 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1955.0,1950s,paintings
...,...,...,...,...,...,...,...,...,...,...
407,407,407,407,"Yosemite I, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
408,408,408,408,"Yosemite II, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
409,409,409,409,"Yosemite III, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
410,410,410,410,"Yosemite I, October 16th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital


In [51]:
def year_to_decade(year):
    if pd.isna(year):
        return 'Na'
    else:
        return f"{int(year) // 10 * 10}s"

artworks['decadef'] = artworks['year'].apply(year_to_decade)

artworks

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,details,image_url,decade,year,decadef,type
0,0,0,0,Self Portrait,1951 - Oil on paper 12 3/8 x 8 1/2 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1951.0,1950s,paintings
1,1,1,1,Still Life with Bowl of Fruit,1951 - Oil on paper 9 3/4 x 11 1/8 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1951.0,1950s,paintings
2,2,2,2,Tunwell Lane,1957 - Oil on canvas 29 x 36 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1957.0,1950s,paintings
3,3,3,3,Self Portrait,1954 - Oil on board 18 x 14 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1954.0,1950s,paintings
4,4,4,4,Portrait of My Father,1955 - Oil on canvas 20 x 16 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1955.0,1950s,paintings
...,...,...,...,...,...,...,...,...,...,...
407,407,407,407,"Yosemite I, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
408,408,408,408,"Yosemite II, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
409,409,409,409,"Yosemite III, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
410,410,410,410,"Yosemite I, October 16th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital


In [52]:
artworks['type'] = artworks['image_url'].str.extract(r'gallery/([^/]+)/')
artworks

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,details,image_url,decade,year,decadef,type
0,0,0,0,Self Portrait,1951 - Oil on paper 12 3/8 x 8 1/2 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1951.0,1950s,paintings
1,1,1,1,Still Life with Bowl of Fruit,1951 - Oil on paper 9 3/4 x 11 1/8 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1951.0,1950s,paintings
2,2,2,2,Tunwell Lane,1957 - Oil on canvas 29 x 36 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1957.0,1950s,paintings
3,3,3,3,Self Portrait,1954 - Oil on board 18 x 14 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1954.0,1950s,paintings
4,4,4,4,Portrait of My Father,1955 - Oil on canvas 20 x 16 in,https://www.hockney.com/img/gallery/paintings/...,1950s,1955.0,1950s,paintings
...,...,...,...,...,...,...,...,...,...,...
407,407,407,407,"Yosemite I, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
408,408,408,408,"Yosemite II, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
409,409,409,409,"Yosemite III, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital
410,410,410,410,"Yosemite I, October 16th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,Unknown,2011.0,2010s,digital


In [97]:
artworks['tool'] = artworks['image_url'].str.extract(r'digital/([^/]+)/', expand=False)
artworks

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,details,image_url,decade,year,decadef,type,tool
0,0,0,0,0,Self Portrait,1951 - Oil on paper 12 3/8 x 8 1/2 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1951.0,1950s,paintings,
1,1,1,1,1,Still Life with Bowl of Fruit,1951 - Oil on paper 9 3/4 x 11 1/8 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1951.0,1950s,paintings,
2,2,2,2,2,Tunwell Lane,1957 - Oil on canvas 29 x 36 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1957.0,1950s,paintings,
3,3,3,3,3,Self Portrait,1954 - Oil on board 18 x 14 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1954.0,1950s,paintings,
4,4,4,4,4,Portrait of My Father,1955 - Oil on canvas 20 x 16 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1955.0,1950s,paintings,
...,...,...,...,...,...,...,...,...,...,...,...,...
407,407,407,407,407,"Yosemite I, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite
408,408,408,408,408,"Yosemite II, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite
409,409,409,409,409,"Yosemite III, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite
410,410,410,410,410,"Yosemite I, October 16th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite


In [54]:
#artworks['tool'] = artworks['tool'].replace('yosemite', 'ipad')


In [None]:
#artworks['tool'] = artworks['tool'].replace('woldgate', 'ipad')


In [98]:
artworks

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,details,image_url,decade,year,decadef,type,tool
0,0,0,0,0,Self Portrait,1951 - Oil on paper 12 3/8 x 8 1/2 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1951.0,1950s,paintings,
1,1,1,1,1,Still Life with Bowl of Fruit,1951 - Oil on paper 9 3/4 x 11 1/8 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1951.0,1950s,paintings,
2,2,2,2,2,Tunwell Lane,1957 - Oil on canvas 29 x 36 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1957.0,1950s,paintings,
3,3,3,3,3,Self Portrait,1954 - Oil on board 18 x 14 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1954.0,1950s,paintings,
4,4,4,4,4,Portrait of My Father,1955 - Oil on canvas 20 x 16 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1955.0,1950s,paintings,
...,...,...,...,...,...,...,...,...,...,...,...,...
407,407,407,407,407,"Yosemite I, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite
408,408,408,408,408,"Yosemite II, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite
409,409,409,409,409,"Yosemite III, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite
410,410,410,410,410,"Yosemite I, October 16th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite


In [99]:

artworks[artworks['year'].isna()]


Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,details,image_url,decade,year,decadef,type,tool
34,34,34,34,34,The Cha Cha That Was Danced in The Early Hours...,oil on canvas 68x60 1/2 in.,https://www.hockney.com/img/gallery/paintings/...,,,Na,paintings,
156,156,156,156,156,The Atelier March 17th 2009,inkjet printed computer drawing on paper 29 1/...,https://www.hockney.com/img/gallery/digital/co...,,,Na,digital,computer_drawings


In [100]:
artworks.to_csv('dhartworks.csv')


# Adding pallete to each image + saving in a new folder

In [104]:
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.cluster import KMeans
from PIL import Image
import numpy as np
from io import BytesIO

def download_image(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return Image.open(BytesIO(response.content)).convert('RGB')
    except Exception as e:
        return None

def create_palette_image_from_pil(img, k=5, save_path=None):
    img_small = img.resize((150, 150))
    img_np = np.array(img_small).reshape(-1, 3)

    kmeans = KMeans(n_clusters=k, random_state=42).fit(img_np)
    centers = np.array(kmeans.cluster_centers_, dtype='uint8')

    _, counts = np.unique(kmeans.labels_, return_counts=True)
    sorted_idx = np.argsort(-counts)
    dominant_colors = centers[sorted_idx]

    # Visualização
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.imshow(img)
    ax.axis('off')

    for i, color in enumerate(dominant_colors):
        rect = patches.Rectangle((img.width + 10, i * (img.height // k)),
                                 width=50, height=(img.height // k),
                                 facecolor=np.array(color)/255)
        ax.add_patch(rect)

    ax.set_xlim(0, img.width + 70)
    ax.set_ylim(img.height, 0)

    if save_path:
        plt.savefig(save_path, bbox_inches='tight')
        plt.close()

    hex_colors = ['#{:02x}{:02x}{:02x}'.format(*tuple(c)) for c in dominant_colors]
    return dominant_colors.tolist(), hex_colors


df = pd.read_csv("dhartworks.csv")
output_dir = "palettes"
os.makedirs(output_dir, exist_ok=True)

df['rgb_colors'] = None
df['hex_colors'] = None

for idx, row in df.iterrows():
    url = row['image_url']
    image_name = os.path.basename(url).split('?')[0]
    save_path = os.path.join(output_dir, f"palette_{image_name.replace('.jpg', '.png')}")

    img = download_image(url)
    if img:
        rgb, hexes = create_palette_image_from_pil(img, k=5, save_path=save_path)
        df.at[idx, 'rgb_colors'] = str(rgb)
        df.at[idx, 'hex_colors'] = str(hexes)

df.to_csv("colorcode.csv", index=False)


In [107]:
df

Unnamed: 0.5,Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,details,image_url,decade,year,decadef,type,tool,rgb_colors,hex_colors
0,0,0,0,0,0,Self Portrait,1951 - Oil on paper 12 3/8 x 8 1/2 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1951.0,1950s,paintings,,"[[121, 127, 109], [8, 22, 9], [204, 132, 25], ...","['#797f6d', '#081609', '#cc8419', '#0a451b', '..."
1,1,1,1,1,1,Still Life with Bowl of Fruit,1951 - Oil on paper 9 3/4 x 11 1/8 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1951.0,1950s,paintings,,"[[14, 32, 16], [162, 45, 18], [185, 87, 61], [...","['#0e2010', '#a22d12', '#b9573d', '#5d5e32', '..."
2,2,2,2,2,2,Tunwell Lane,1957 - Oil on canvas 29 x 36 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1957.0,1950s,paintings,,"[[208, 206, 187], [29, 35, 29], [105, 107, 74]...","['#d0cebb', '#1d231d', '#696b4a', '#40452e', '..."
3,3,3,3,3,3,Self Portrait,1954 - Oil on board 18 x 14 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1954.0,1950s,paintings,,"[[201, 152, 113], [221, 177, 138], [73, 60, 46...","['#c99871', '#ddb18a', '#493c2e', '#af7e58', '..."
4,4,4,4,4,4,Portrait of My Father,1955 - Oil on canvas 20 x 16 in,https://www.hockney.com/img/gallery/paintings/...,1950.0,1955.0,1950s,paintings,,"[[140, 95, 59], [24, 20, 21], [136, 106, 75], ...","['#8c5f3b', '#181415', '#886a4b', '#5a4a2f', '..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
407,407,407,407,407,407,"Yosemite I, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite,"[[108, 128, 128], [220, 223, 219], [164, 170, ...","['#6c8080', '#dcdfdb', '#a4aaa3', '#95b250', '..."
408,408,408,408,408,408,"Yosemite II, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite,"[[87, 96, 104], [59, 79, 64], [184, 186, 171],...","['#576068', '#3b4f40', '#b8baab', '#8c908e', '..."
409,409,409,409,409,409,"Yosemite III, October 5th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite,"[[86, 101, 102], [24, 44, 22], [43, 63, 44], [...","['#566566', '#182c16', '#2b3f2c', '#42534d', '..."
410,410,410,410,410,410,"Yosemite I, October 16th",2011 - iPad drawing printed on four sheets of ...,https://www.hockney.com/img/gallery/digital/yo...,2010.0,2011.0,2010s,digital,yosemite,"[[94, 146, 175], [17, 109, 41], [53, 170, 81],...","['#5e92af', '#116d29', '#35aa51', '#cde2f1', '..."


# Categorizing and creating a palette for each decade of work

In [5]:
import numpy as np
from PIL import Image
import ast
import os
import pandas as pd
from sklearn.cluster import KMeans
from collections import Counter
import altair as alt
import matplotlib.pyplot as plt

csv_path = 'colorcode.csv'
image_folder = 'palettes'
num_colors = 9

df = pd.read_csv(csv_path)

all_pixels = []

for _, row in df.iterrows():
    image_name = os.path.basename(row['image_url']).split('?')[0]
    palette_path = os.path.join(image_folder, f"palette_{image_name.replace('.jpg', '.png')}")

    if os.path.exists(palette_path):
        img = Image.open(palette_path).convert('RGB')
        img = img.resize((100, 100))
        pixels = np.array(img).reshape(-1, 3)
        pixels = np.array([p for p in pixels if 40 < np.mean(p) < 220])
        all_pixels.append(pixels)

if not all_pixels:
    raise ValueError("No valid images loaded.")

all_pixels = np.vstack(all_pixels)

kmeans = KMeans(n_clusters=num_colors, random_state=42).fit(all_pixels)
colors = kmeans.cluster_centers_.astype(int)
labels = kmeans.labels_

counts = Counter(labels)
frequencies = [counts[i] for i in range(num_colors)]

sorted_idx = np.argsort(frequencies)[::-1]
colors = colors[sorted_idx]
frequencies = np.array(frequencies)[sorted_idx]

hex_colors = ['#{:02x}{:02x}{:02x}'.format(*c) for c in colors]

alt.Chart(pd.DataFrame({
    'color': hex_colors,
    'x': [0, 1, 2, 0, 1, 2, 0, 1, 2],
    'y': [2, 2, 2, 1, 1, 1, 0, 0, 0]
})).mark_rect().encode(
    x=alt.X('x:O', axis=None),
    y=alt.Y('y:O', axis=None),
    color=alt.Color('color:N', scale=None)
).properties(
    width=150,
    height=150,
    title="412 Artworks"
).display()

(
    alt.Chart(pd.DataFrame({
        'color': hex_colors,
        'frequency': frequencies
    })).mark_bar(size=30).encode(
        y=alt.Y('color:N', sort='-x', title=''),
        x=alt.X('frequency:Q', title='Frequency (pixels)'),
        color=alt.Color('color:N', scale=None, legend=None)
    ).properties(
        width=500,
        height=250,
        title='Colors — Frequency'
    ) + alt.Chart(pd.DataFrame({
        'color': hex_colors,
        'frequency': frequencies
    })).mark_text(
        align='left',
        baseline='middle',
        dx=5,
        dy=0,
        color='black'
    ).encode(
        y=alt.Y('color:N', sort='-x'),
        x=alt.X('frequency:Q'),
        text='frequency:Q'
    )
).display()



In [7]:


csv_path = 'colorcode.csv'
image_folder = 'palettes'
num_colors = 9
crop_center = True
crop_fraction = 0.6

charts = []

def perceptual_luminance(rgb):
    r, g, b = rgb
    return 0.2126 * r + 0.7152 * g + 0.0722 * b

def process_group(df_group, group_label, group_value):
    all_pixels = []

    for _, row in df_group.iterrows():
        image_name = os.path.basename(row['image_url']).split('?')[0]
        palette_path = os.path.join(image_folder, f"palette_{image_name.replace('.jpg', '.png')}")

        if os.path.exists(palette_path):
            img = Image.open(palette_path).convert('RGB')
            img = img.resize((200, 200))
            if crop_center:
                w, h = img.size
                cw = int(w * crop_fraction)
                ch = int(h * crop_fraction)
                left = (w - cw) // 2
                upper = (h - ch) // 2
                img = img.crop((left, upper, left + cw, upper + ch))
            pixels = np.array(img).reshape(-1, 3)
            pixels = np.array([p for p in pixels if 50 < perceptual_luminance(p) < 220])
            all_pixels.append(pixels)

    if not all_pixels:
        return

    all_pixels = np.vstack(all_pixels)

    kmeans = KMeans(n_clusters=num_colors, random_state=42, n_init=10, init='k-means++')
    kmeans.fit(all_pixels)
    colors = kmeans.cluster_centers_.astype(int)
    labels = kmeans.labels_

    counts = Counter(labels)
    total_pixels = sum(counts.values())
    frequencies = [counts[i] for i in range(num_colors)]
    relative_freqs = [c / total_pixels for c in frequencies]

    sorted_idx = np.argsort(relative_freqs)[::-1]
    colors = colors[sorted_idx]
    hex_colors = ['#{:02x}{:02x}{:02x}'.format(*tuple(c)) for c in colors]

    df_grid = pd.DataFrame({
        'Hex': hex_colors,
        'Row': [2 - i // 3 for i in range(9)],
        'Col': [i % 3 for i in range(9)],
    })

    background = alt.Chart(pd.DataFrame({'x': [0], 'y': [0]})).mark_rect(
        fill='white',
        size=16000
    ).encode(
        x=alt.X('x:O', axis=None),
        y=alt.Y('y:O', axis=None)
    )

    grid_colors = alt.Chart(df_grid).mark_rect(size=5000).encode(
        x=alt.X('Col:O', axis=None, scale=alt.Scale(paddingInner=0, paddingOuter=0.1)),
        y=alt.Y('Row:O', sort='descending', axis=None, scale=alt.Scale(paddingInner=0, paddingOuter=0.1)),
        color=alt.Color('Hex:N', scale=None)
    )

    chart = (background + grid_colors).properties(
        width=150,
        height=150,
        title=f"{group_label}: {group_value}"
    )
    charts.append(chart)

df = pd.read_csv(csv_path)

df_paintings = df[df['type'] == 'paintings']
df_digital = df[df['type'] == 'digital']

for decade, group_df in df_paintings.groupby('decadef'):
    process_group(group_df, 'Decade', decade)

for tool, group_df in df_digital.groupby('tool'):
    process_group(group_df, 'Tool', tool)

if charts:
    rows = []
    for i in range(0, len(charts), 3):
        rows.append(alt.hconcat(*charts[i:i+3]))
    final_chart = alt.vconcat(*rows).resolve_scale(color='independent')

    try:
        final_chart.display()
    except:
        pass

    try:
        final_chart.save('all_palettes.svg')
    except:
        pass
