In [2]:
import requests
import pandas as pd
import time
import os
from tqdm import tqdm

# 1. Set your Rijksmuseum API Key
RIJKSMUSEUM_API_KEY = "otcPObCt"

# 2. Define the API endpoint
RIJKSMUSEUM_ENDPOINT = "https://www.rijksmuseum.nl/api/en/collection"

# 3. Define a function to search artworks
def search_rijksmuseum(query, max_items=20, theme=None):
    params = {
        "key": RIJKSMUSEUM_API_KEY,
        "format": "json",
        "q": query,
        "ps": 100  # page size (max 100)
    }
    response = requests.get(RIJKSMUSEUM_ENDPOINT, params=params)
    response.raise_for_status()
    data = response.json()
    artworks = []

    for art_obj in tqdm(data.get('artObjects', [])[:max_items]):
        artwork = {
            "id": art_obj.get("objectNumber"),
            "title": art_obj.get("title"),
            "artist": art_obj.get("principalOrFirstMaker"),
            "longTitle": art_obj.get("longTitle"),
            "image_url": art_obj.get("webImage", {}).get("url", None),
            "theme": theme,
            "description": None  # placeholder for curator description
        }
        artworks.append(artwork)

    return artworks

# 4. Function to fetch detailed description (if available)
def fetch_artwork_details(object_number):
    details_url = f"https://www.rijksmuseum.nl/api/en/collection/{object_number}"
    params = {
        "key": RIJKSMUSEUM_API_KEY,
        "format": "json",
    }
    response = requests.get(details_url, params=params)
    response.raise_for_status()
    data = response.json()

    description = data.get('artObject', {}).get('label', {}).get('description', None)
    return description

# 5. Collect artworks across different themes
themes = ["landscape", "animals", "history", "still life", "mythology"]
artworks = []

for theme in themes:
    print(f"Fetching artworks for theme: {theme}")
    themed_artworks = search_rijksmuseum(theme, max_items=6, theme=theme)
    artworks.extend(themed_artworks)
    time.sleep(1)

# 6. Fetch detailed descriptions
for art in tqdm(artworks):
    if art['id']:
        try:
            desc = fetch_artwork_details(art['id'])
            art['description'] = desc
            time.sleep(0.5)
        except Exception as e:
            print(f"Failed to fetch details for {art['id']}: {e}")

# 7. Filter artworks with missing descriptions
artworks = [art for art in artworks if art['description'] is not None]
print(f"✅ Final number of artworks with descriptions: {len(artworks)}")

# 8. Convert to DataFrame
df = pd.DataFrame(artworks)

# Create proper `data/` folder in the current working directory (for notebooks)
output_dir = os.path.join(os.getcwd(), "data")
os.makedirs(output_dir, exist_ok=True)

# Save to JSON
output_path = os.path.join(output_dir, "real_museum_metadata.json")
df.to_json(output_path, orient="records", indent=4)

print(f"✅ Successfully saved to '{output_path}'")
print(df.head())


Fetching artworks for theme: landscape


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 33734.35it/s]


Fetching artworks for theme: animals


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 51358.82it/s]


Fetching artworks for theme: history


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 58798.65it/s]


Fetching artworks for theme: still life


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 44462.59it/s]


Fetching artworks for theme: mythology


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 47934.90it/s]
100%|███████████████████████████████████████████| 30/30 [00:31<00:00,  1.06s/it]

✅ Final number of artworks with descriptions: 24
✅ Successfully saved to '/Users/Irem/heritage-AI/Notebooks/data/real_museum_metadata.json'
          id                                              title  \
0  SK-A-1718                  Winter Landscape with Ice Skaters   
1   SK-C-109               Italian Landscape with a Draughtsman   
2  SK-A-4118                        River Landscape with Riders   
3  SK-A-4688              Italian Landscape with Umbrella Pines   
4   SK-A-133  Portrait of a Married Couple, Likely Isaac Abr...   

              artist                                          longTitle  \
0  Hendrick Avercamp  Winter Landscape with Ice Skaters, Hendrick Av...   
1           Jan Both  Italian Landscape with a Draughtsman, Jan Both...   
2       Aelbert Cuyp  River Landscape with Riders, Aelbert Cuyp, c. ...   
3      Hendrik Voogd  Italian Landscape with Umbrella Pines, Hendrik...   
4         Frans Hals  Portrait of a Married Couple, Likely Isaac Abr...   

      


