In [35]:
import requests
import pandas as pd
import time
from tqdm import tqdm

# 1. Set your Rijksmuseum API Key
RIJKSMUSEUM_API_KEY = "otcPObCt"

# 2. Define the API endpoint
RIJKSMUSEUM_ENDPOINT = "https://www.rijksmuseum.nl/api/en/collection"

# 3. Define a function to search artworks
def search_rijksmuseum(query, max_items=20, theme=None):
    params = {
        "key": RIJKSMUSEUM_API_KEY,
        "format": "json",
        "q": query,
        "ps": 100  # page size (max 100)
    }
    response = requests.get(RIJKSMUSEUM_ENDPOINT, params=params)
    response.raise_for_status()
    data = response.json()
    artworks = []
    
    for art_obj in tqdm(data.get('artObjects', [])[:max_items]):
        artwork = {
            "id": art_obj.get("objectNumber"),
            "title": art_obj.get("title"),
            "artist": art_obj.get("principalOrFirstMaker"),
            "longTitle": art_obj.get("longTitle"),
            "image_url": art_obj.get("webImage", {}).get("url", None),
            "theme": theme,
            "description": None  # placeholder for curator description
        }
        artworks.append(artwork)
    
    return artworks

# 4. Function to fetch detailed description (if available)
def fetch_artwork_details(object_number):
    details_url = f"https://www.rijksmuseum.nl/api/en/collection/{object_number}"
    params = {
        "key": RIJKSMUSEUM_API_KEY,
        "format": "json",
    }
    response = requests.get(details_url, params=params)
    response.raise_for_status()
    data = response.json()
    
    description = data.get('artObject', {}).get('label', {}).get('description', None)
    return description

# 5. Collect artworks across different themes
themes = ["landscape", "animals", "history", "still life", "mythology"]
artworks = []

for theme in themes:
    print(f"Fetching artworks for theme: {theme}")
    themed_artworks = search_rijksmuseum(theme, max_items=6, theme=theme)  # fetch a few extra in case some are filtered out
    artworks.extend(themed_artworks)
    time.sleep(1)

# 6. Fetch detailed descriptions if available
for art in tqdm(artworks):
    if art['id']:
        try:
            desc = fetch_artwork_details(art['id'])
            art['description'] = desc
            time.sleep(0.5)
        except Exception as e:
            print(f"Failed to fetch details for {art['id']}: {e}")

# 7. Filter artworks with missing descriptions
artworks = [art for art in artworks if art['description'] is not None]
print(f"Final number of artworks with descriptions: {len(artworks)}")

# 8. Convert to DataFrame
df = pd.DataFrame(artworks)

# 9. Save to JSON
df.to_json("data/real_museum_metadata.json", orient="records", indent=4)

print("Successfully saved to 'data/real_museum_metadata.json'")
print(df)


Fetching artworks for theme: landscape


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 64527.75it/s]


Fetching artworks for theme: animals


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 57065.36it/s]


Fetching artworks for theme: history


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 39260.26it/s]


Fetching artworks for theme: still life


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 52103.16it/s]


Fetching artworks for theme: mythology


100%|██████████████████████████████████████████| 6/6 [00:00<00:00, 39199.10it/s]
100%|███████████████████████████████████████████| 30/30 [00:39<00:00,  1.32s/it]

Final number of artworks with descriptions: 23
Successfully saved to 'data/real_museum_metadata.json'
                id                                              title  \
0        SK-A-1718                  Winter Landscape with Ice Skaters   
1        SK-A-4688              Italian Landscape with Umbrella Pines   
2         SK-C-109               Italian Landscape with a Draughtsman   
3        SK-A-4118                        River Landscape with Riders   
4         SK-A-133  Portrait of a Married Couple, Likely Isaac Abr...   
5        SK-A-1505  A Windmill on a Polder Waterway, Known as ‘In ...   
6         BK-17496                                        Blue Parrot   
7         SK-A-175  A Pelican and other Birds near a Pool, Known a...   
8       BK-1963-64                           Wine fountain and cooler   
9        SK-A-1115                             The Battle of Waterloo   
10      BK-17040-A                                     Table ornament   
11      BK-1976-75    




In [32]:
print(df)

               id                                              title  \
0       SK-A-1718                  Winter Landscape with Ice Skaters   
1       SK-A-4688              Italian Landscape with Umbrella Pines   
2        SK-C-109               Italian Landscape with a Draughtsman   
3       SK-A-4118                        River Landscape with Riders   
4        BK-17496                                        Blue Parrot   
5        SK-A-175  A Pelican and other Birds near a Pool, Known a...   
6      BK-1963-64                           Wine fountain and cooler   
7       SK-A-1115                             The Battle of Waterloo   
8       SK-A-4688              Italian Landscape with Umbrella Pines   
9      BK-NM-1315                                Stadtholder’s chair   
10      SK-A-1451  The Well-stocked Kitchen, with Jesus in het Ho...   
11      SK-A-4100          The Art Gallery of Jan Gildemeester Jansz   
12       SK-A-799                                  Floral Still 