### Rijksmuseum API

In [2]:
import requests

SEARCH_URL = "https://data.rijksmuseum.nl/search/collection"

def search_portraits(title=None, creator=None, t='painting'):
    params = {
        "creator": creator, 
        "title": title,
        # "imageAvailable": "true",
        "type": t
    }

    r = requests.get(SEARCH_URL, params=params)
    r.raise_for_status()
    data = r.json()

    return data

In [3]:
def parse_artwork_details(data: dict) -> dict:
    """
    Extracts useful structured fields from Rijksmuseum Linked.Art objects
    """
    
    en_code = "http://vocab.getty.edu/aat/300388277"

    # ------------ TITLE ------------
    # First look for English version
    
    title = None
    for s in data.get("subject_of", []):
        for part in s.get("part", []):
            for sub in part.get("part", []):
                if sub.get("type") == "Name":
                    langs = sub.get("language", [])
                    if any(l.get("id") == en_code for l in langs):
                        title = sub.get("content")
                        break
            if title:
                break
        if title:
            break

    # fallback: take any title if no English was found
    if not title:
        for s in data.get("subject_of", []):
            for part in s.get("part", []):
                for sub in part.get("part", []):
                    if sub.get("type") == "Name":
                        title = sub.get("content")
                        break
                if title:
                    break
            if title:
                break


    # ------------ ARTIST / MAKER ------------
    artist_name = None
    artist_id = None
    
    prod = data.get("produced_by")
    if isinstance(prod, dict):
        for part in prod.get("part", []):
            # get the person URI
            for agent in part.get("carried_out_by", []):
                artist_id = agent.get("id")
    
            # read English referred_to_by labels
            for ref in part.get("referred_to_by", []):
                if ref.get("type") == "LinguisticObject":
                    langs = ref.get("language", [])
                    if any(l.get("id") == en_code for l in langs):
                        artist_name = ref.get("content")
                        break
    
            # fallback: any referred_to_by without language filter
            if artist_name is None:
                for ref in part.get("referred_to_by", []):
                    if ref.get("type") == "LinguisticObject":
                        artist_name = ref.get("content")
                        break

    # ------------ YEAR ------------
    year = None
    ts = prod.get("timespan") if prod else None
    if isinstance(ts, dict):
        # Try identified_by textual year first
        if isinstance(ts.get("identified_by"), list):
            for ident in ts["identified_by"]:
                c = ident.get("content")
                if c and any(ch.isdigit() for ch in c):
                    year = c
                    break

        # fallback to machine timestamps
        if year is None:
            b = ts.get("begin_of_the_begin")
            if b: 
                year = b[:4]


    # ------------ DESCRIPTION ------------
    descriptions_eng = []
    content = []
    
    for entry in data.get("subject_of", []):
        langs = entry.get("language", [])
        if any(l.get("id") == en_code for l in langs):
            # entry may contain nested parts
            parts = entry.get("part", [])
            for p in parts:
                if "content" in p:
                    descriptions_eng.append(p["content"])
                # nested parts
                for sub in p.get("part", []):
                    if "content" in sub:
                        descriptions_eng.append(sub["content"])
    # deduplicate
    descriptions_eng = list(dict.fromkeys(descriptions_eng))
    
    # ------------ LOCATION ------------
    location = None
    room = None
    loc = data.get('current_location', [])

    if loc:
        for item in loc.get("identified_by", []):
        
            # 1. Extract identifier
            if item.get("type") == "Identifier":
                if "content" in item:
                    room = item["content"]
        
            # 2. Extract location name in english
            if item.get("type") == "Name":
                langs = item.get("language", [])
                if any(l.get("id") == en_code for l in langs):
                    parts = item.get("part", [])
                    names = [p.get("content") for p in parts if p.get("content")]
                    location = " ".join(names)

    return {
        "title": title,
        "artist": artist_name,
        "year": year,
        "descriptions": descriptions_eng,
        "location": location,
        "room": room,
        "source": data.get("id"),
    }

In [23]:
# what to search
creator = 'Van Gogh'
title = 'Self-Portrait'

data = search_portraits(title=title, creator=creator)
artwork_id = data["orderedItems"][0]['id']

extracted_info = requests.get(
    artwork_id,
    headers={"Accept": "application/ld+json"}
).json()

print(extracted_info.keys())

dict_keys(['@context', 'id', 'type', 'produced_by', 'subject_of', 'attributed_by', 'assigned_by', 'identified_by', 'classified_as', 'dimension', 'made_of', 'current_location', 'shows', 'referred_to_by', 'equivalent', 'member_of'])


In [24]:
extracted_data = parse_artwork_details(extracted_info)
extracted_data  # what we extract

{'title': 'Self-portrait',
 'artist': 'painter: Vincent van Gogh',
 'year': '1887',
 'descriptions': ['Self-portrait',
  'Vincent van Gogh (1853â€“1890), oil on cardboard, 1887',
  'Vincent moved to Paris in 1886, after hearing from his brother Theo about the new, colourful style of French painting. Wasting no time, he tried it out in several self-portraits. He did this mostly to avoid having to pay for a model. Using rhythmic brushstrokes in striking colours, he portrayed himself here as a fashionably dressed Parisian.'],
 'location': 'Main building 19th Century The Hague School / Amsterdam impressionists / Van Gogh and contemporaries',
 'room': 'HG-1.18',
 'source': 'https://id.rijksmuseum.nl/200109794'}