In [26]:
# 04_retrieve_metadata_from_apis.ipynb

import requests
import pandas as pd
import time
from tqdm import tqdm

# 1. Set your Rijksmuseum API Key
RIJKSMUSEUM_API_KEY = "otcPObCt"

# 2. Define the API endpoint
RIJKSMUSEUM_ENDPOINT = "https://www.rijksmuseum.nl/api/en/collection"

# 3. Define a function to search artworks
def search_rijksmuseum(query, max_items=20, theme=None):
    params = {
        "key": RIJKSMUSEUM_API_KEY,
        "format": "json",
        "q": query,
        "ps": 100  # page size (max 100)
    }
    response = requests.get(RIJKSMUSEUM_ENDPOINT, params=params)
    response.raise_for_status()
    data = response.json()
    artworks = []
    
    for art_obj in tqdm(data.get('artObjects', [])[:max_items]):
        artwork = {
            "id": art_obj.get("objectNumber"),
            "title": art_obj.get("title"),
            "artist": art_obj.get("principalOrFirstMaker"),
            "longTitle": art_obj.get("longTitle"),
            "image_url": art_obj.get("webImage", {}).get("url", None),
            "description": None  # will fill later if available
            "theme": theme  # ✅ add this line
        }
        artworks.append(artwork)
    
    return artworks

# 4. Optional: function to fetch detailed description (if available)
def fetch_artwork_details(object_number):
    details_url = f"https://www.rijksmuseum.nl/api/en/collection/{object_number}"
    params = {
        "key": RIJKSMUSEUM_API_KEY,
        "format": "json",
    }
    response = requests.get(details_url, params=params)
    response.raise_for_status()
    data = response.json()
    
    description = data.get('artObject', {}).get('label', {}).get('description', None)
    return description


# 5. Collect artworks across different themes
themes = ["abstract", "romanticism", "portrait", "still life", "mythology"]  # Or other themes you choose
artworks = []

for theme in themes:
    print(f"Fetching artworks for theme: {theme}")
    themed_artworks = search_rijksmuseum(theme, max_items=4, theme=theme)  # 4 per theme = 20 total
    artworks.extend(themed_artworks)
    time.sleep(1)  # Pause to avoid hitting API rate limits

# 6. Fetch detailed descriptions if available
for art in tqdm(artworks):
    if art['id']:
        try:
            desc = fetch_artwork_details(art['id'])
            art['description'] = desc
            time.sleep(0.5)  # be gentle to the API
        except Exception as e:
            print(f"Failed to fetch details for {art['id']}:", e)


# 7. Convert to DataFrame
df = pd.DataFrame(artworks)

# 8. Save to JSON
df.to_json("data/real_museum_metadata.json", orient="records", indent=4)

print("Successfully saved to '../data/real_museum_metadata.json'")
print(df)


Fetching artworks for theme: abstract


TypeError: search_rijksmuseum() got an unexpected keyword argument 'theme'

In [24]:
print(df)

           id                                              title  \
0    SK-A-500                                   The Holy Kinship   
1    SK-A-372                                     Mary Magdalene   
2   SK-A-1115                             The Battle of Waterloo   
3      SK-C-6  The Sampling Officials of the Amsterdam Draper...   
4    SK-A-742                             View of Olinda, Brazil   
5   SK-A-4691                                      Self-portrait   
6   SK-A-4100          The Art Gallery of Jan Gildemeester Jansz   
7   SK-A-4981  Adolf and Catharina Croeser, Known as ‘The Bur...   
8   SK-A-5005                               Woman in a Large Hat   
9    SK-C-251                             Woman Reading a Letter   
10   SK-C-229                                   The Merry Family   
11  SK-A-4830                         Still Life with a Gilt Cup   

                    artist                                          longTitle  \
0   Geertgen tot Sint Jans  The Ho