# Rijksmuseum API

In [31]:
import requests
import re


def search_portraits(title=None, creator=None, t='painting'):
    SEARCH_URL = "https://data.rijksmuseum.nl/search/collection"

    params = {
        "creator": creator, 
        "title": title,
        "imageAvailable": "true",
        "type": t
    }

    r = requests.get(SEARCH_URL, params=params)
    r.raise_for_status()
    data = r.json()

    return data

In [32]:
def parse_artwork_details(data: dict) -> dict:
    """
    Extracts useful structured fields from Rijksmuseum Linked.Art objects
    """
    
    en_code = "http://vocab.getty.edu/aat/300388277"
    nl_code = "http://vocab.getty.edu/aat/300388256" #dutch language has more information

    unit_map = {
        "http://vocab.getty.edu/aat/300379098": "cm",
        "http://vocab.getty.edu/aat/300379226": "kg",
    }
    
    attr_map = {
        "https://id.rijksmuseum.nl/22011": "hoogte",
        "https://id.rijksmuseum.nl/22012": "breedte",
        "https://id.rijksmuseum.nl/220217": "gewicht",
    }

    # ------------ TITLE ------------
    # First look for Dutch version
    
    title = None
    for s in data.get("subject_of", []):
        for part in s.get("part", []):
            for sub in part.get("part", []):
                if sub.get("type") == "Name":
                    langs = sub.get("language", [])
                    if any(l.get("id") == nl_code for l in langs):
                        title = sub.get("content")
                        break
            if title:
                break
        if title:
            break

    # fallback: take any title if no English was found
    if not title:
        for s in data.get("subject_of", []):
            for part in s.get("part", []):
                for sub in part.get("part", []):
                    if sub.get("type") == "Name":
                        title = sub.get("content")
                        break
                if title:
                    break
            if title:
                break

    # ------------ ARTIST / MAKER ------------
    artist_name = None
    artist_id = None
    
    prod = data.get("produced_by")
    if isinstance(prod, dict):
        for part in prod.get("part", []):
            # get the person URI
            for agent in part.get("carried_out_by", []):
                artist_id = agent.get("id")
    
            # read Dutch referred_to_by labels
            for ref in part.get("referred_to_by", []):
                if ref.get("type") == "LinguisticObject":
                    langs = ref.get("language", [])
                    if any(l.get("id") == nl_code for l in langs):
                        artist_name = ref.get("content")
                        break
    
            # fallback: any referred_to_by without language filter
            if artist_name is None:
                for ref in part.get("referred_to_by", []):
                    if ref.get("type") == "LinguisticObject":
                        artist_name = ref.get("content")
                        break

    # ------------ YEAR ------------
    year = None
    ts = prod.get("timespan") if prod else None
    if isinstance(ts, dict):
        # Try identified_by textual year first
        if isinstance(ts.get("identified_by"), list):
            for ident in ts["identified_by"]:
                c = ident.get("content")
                if c and any(ch.isdigit() for ch in c):
                    year = c
                    break

        # fallback to machine timestamps
        if year is None:
            b = ts.get("begin_of_the_begin")
            if b: 
                year = b[:4]

    # ------------ DESCRIPTION ------------
    descriptions_nl = []

    for entry in data.get("subject_of", []):
        langs = entry.get("language", [])
        if not any(l.get("id") == nl_code for l in langs):
            continue
    
        # level 1: direct content
        if "content" in entry:
            descriptions_nl.append(entry["content"])
    
        # level 2: parts
        for p in entry.get("part", []):
            if "content" in p:
                descriptions_nl.append(p["content"])
            for sub in p.get("part", []):
                if "content" in sub:
                    descriptions_nl.append(sub["content"])
    # deduplicate
    descriptions_nl = list(dict.fromkeys(descriptions_nl))

    description = " ".join(descriptions_nl)

    # ------------ LOCATION ------------
    location = None
    room = None
    loc = data.get('current_location', [])

    if loc:
        for item in loc.get("identified_by", []):
        
            # 1. Extract identifier
            if item.get("type") == "Identifier":
                if "content" in item:
                    room = item["content"]
        
            # 2. Extract location name in english
            if item.get("type") == "Name":
                langs = item.get("language", [])
                if any(l.get("id") == nl_code for l in langs):
                    parts = item.get("part", [])
                    names = [p.get("content") for p in parts if p.get("content")]
                    location = " ".join(names)
                    
    # ------------ DIMENSION ------------        
    entries = []
    
    for item in data.get("dimension", []):
        if item.get("type") != "Dimension":
            continue
        
        value = item.get("value")
        unit_id = item.get("unit", {}).get("id")
        unit = unit_map.get(unit_id, "")
        
        # get attribute from classified_as
        attr = None
        for c in item.get("classified_as", []):
            a = attr_map.get(c.get("id"))
            if a:
                attr = a
        
        # gather the Dutch annotation text
        annotation = None
        for ref in item.get("referred_to_by", []):
            langs = ref.get("language", [])
            if any(l.get("id") == nl_code for l in langs):
                annotation = ref.get("content")
    
        if attr and value and unit:
            entries.append(f"{attr} {value} {unit}" + (f" ({annotation})" if annotation else ""))
            
    dimension_str = " x ".join(entries)

    # ------------ MATERIAL ------------        

    material_code = "http://vocab.getty.edu/aat/300435429"
    
    materials = []
    
    for item in data.get("referred_to_by", []):
        if item.get("type") != "LinguisticObject":
            continue
        
        langs = item.get("language", [])
        if not any(l.get("id") == nl_code for l in langs):
            continue
    
        classes = item.get("classified_as", [])
        if not any(c.get("id") == material_code for c in classes):
            continue
    
        content = item.get("content")
        if content:
            materials.append(content)
    
    materials = list(dict.fromkeys(materials))

    
    return {
        "title": title,
        "artist": artist_name,
        "year": year,
        "description": description,
        "location": location,
        "room": room,
        "dimension": dimension_str,
        "material": materials,
        "source": data.get("id"),
    }

### Retrieve metadata for selected artwork

In [33]:
# what to search - example
creator = 'Vermeer'
title = 'Milkmaid'

In [34]:
data = search_portraits(title=title, creator=creator)
rijks_artwork_id = data["orderedItems"][0]['id']

extracted_info = requests.get(rijks_artwork_id, headers={"Accept": "application/ld+json"}).json()

extracted_data = parse_artwork_details(extracted_info)

In [35]:
extracted_data  # what we extract

{'title': 'Het melkmeisje',
 'artist': 'schilder: Johannes Vermeer',
 'year': 'c. 1660',
 'description': 'Het melkmeisje Johannes Vermeer (1632–1675), olieverf op doek, ca. 1660 Geheel verdiept in haar werk schenkt een dienstmeisje melk in. Behalve de witte melkstraal lijkt niets te bewegen. Die alledaagse handeling balde Vermeer samen tot een indrukwekkend schilderij – als een beeld staat de figuur vrij in de lichte ruimte. Vermeer had oog voor hoe het licht in honderden kleurige puntjes over de voorwerpen speelt.',
 'location': 'Hoofdgebouw Eregalerij',
 'room': 'HG-2.30.3',
 'dimension': 'breedte 41 cm x hoogte 45.5 cm',
 'material': ['olieverf op doek'],
 'source': 'https://id.rijksmuseum.nl/200108369'}

### Retrieve metadata of other artworks of the same artist

In [36]:
rel_artworks = []
data_artist = search_portraits(creator=creator)
if len(data_artist['orderedItems']) > 1:
    for items in data_artist['orderedItems']:
        if rijks_artwork_id != items['id']:
            rel_art_id = items['id']
            rel_art_extracted_info = requests.get(rel_art_id, headers={"Accept": "application/ld+json"}).json()
            rel_art_extracted_data = parse_artwork_details(rel_art_extracted_info)
            rel_artworks.append(rel_art_extracted_data)

In [37]:
print(f'Other artworks of {creator}: \n')
for d in rel_artworks:
    print(d['title'])

Other artworks of Vermeer: 

None
Gezicht op huizen in Delft, bekend als ‘Het straatje’
Brieflezende vrouw
De liefdesbrief


In [38]:
# from openai import OpenAI
# client = OpenAI()

# def mt(text):
#     resp = client.chat.completions.create(
#         model="gpt-5.2",
#         messages=[
#             {"role": "system", "content": "Translate from Dutch to English using museum catalog terminology. Do NOT summarize or rewrite, only translate."},
#             {"role": "user", "content": text}
#         ],
#         temperature=0
#     )
#     return resp.choices[0].message.content.strip()

# Retrieve data from Wikipedia

In [50]:
def wikidata_search(title):
    url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbsearchentities",
        "language": "en",
        "format": "json",
        "search": title
    }
    headers = {"User-Agent": "RijksmuseumRAGBot/1.0 (https://example.com; contact@example.com)"}
    r = requests.get(url, params=params, headers=headers)
    r.raise_for_status()
    return r.json()["search"]

In [51]:
def wikidata_get(qid):
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"
    headers = {"User-Agent": "RijksmuseumRAGBot/1.0 (https://example.com; contact@example.com)"}
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    return r.json()["entities"][qid]

In [52]:
def select_painting(results):
    for item in results:
        qid = item["id"]
        entity = wikidata_get(qid)
        claims = entity.get("claims", {})
        if "P31" in claims:
            for inst in claims["P31"]:
                if inst["mainsnak"]["datavalue"]["value"]["id"] == "Q3305213":
                    return qid
    return None

In [42]:
results = wikidata_search(title)
qid = select_painting(results)
print(qid)

Q167605


In [53]:
def wikidata_get_sitelink(qid, lang="en"):
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"
    headers = {
        "User-Agent": "RijksmuseumRAGBot/1.0 (https://example.com; contact@example.com)"
    }
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    data = r.json()
    entity = data["entities"][qid]
    return entity["sitelinks"][f"{lang}wiki"]["title"]

In [54]:
def wikipedia_content(title, lang="en"):
    url = f"https://{lang}.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "extracts",
        "explaintext": True,    # remove HTML
        "format": "json",
        "titles": title
    }
    headers = {
        "User-Agent": "RijksmuseumRAGBot/1.0 (https://example.com; contact@example.com)"
    }
    r = requests.get(url, params=params, headers=headers)
    r.raise_for_status()
    data = r.json()
    pages = data["query"]["pages"]
    page = next(iter(pages.values()))
    return page.get("extract", "")

In [45]:
wiki_title = wikidata_get_sitelink(qid)
print(wiki_title)

The Milkmaid (Vermeer)


In [46]:
wiki_artwork_content = wikipedia_content(wiki_title, lang="en")
print(wiki_artwork_content[:300])

The Milkmaid (Dutch: De melkmeid or Het melkmeisje), sometimes called The Kitchen Maid (Dutch: De keukenmeid), is an oil-on-canvas painting of a "milkmaid", in fact, a domestic kitchen maid, by the Dutch artist Johannes Vermeer. It is in the Rijksmuseum in Amsterdam, the Netherlands, which regards i


In [55]:
def select_artist(results):
    for item in results:
        qid = item["id"]
        entity = wikidata_get(qid)
        claims = entity.get("claims", {})

        # check instance of = human (Q5)
        if "P31" in claims:
            if any(inst["mainsnak"]["datavalue"]["value"]["id"] == "Q5"
                   for inst in claims["P31"]):
                return qid
    return None

In [74]:
artist_results = wikidata_search(creator)
artist_qid = select_artist(artist_results)
artist_wiki_title = wikidata_get_sitelink(artist_qid, lang="en")
wiki_artist_bio = wikipedia_content(artist_wiki_title)

print(artist_wiki_title)
print(wiki_artist_bio[:300])

Johannes Vermeer
Johannes Vermeer ( vər-MEER, vər-MAIR, Dutch: [joːˈɦɑnəs fərˈmeːr]; see below; also known as Jan Vermeer; October 1632 – 15 December 1675) was a Dutch painter who specialized in domestic interior scenes of middle-class life. He is considered one of the greatest painters of the Dutch Golden Age. Duri


### Aggregate info

In [75]:
extracted_data

{'title': 'Het melkmeisje',
 'artist': 'schilder: Johannes Vermeer',
 'year': 'c. 1660',
 'description': 'Het melkmeisje Johannes Vermeer (1632–1675), olieverf op doek, ca. 1660 Geheel verdiept in haar werk schenkt een dienstmeisje melk in. Behalve de witte melkstraal lijkt niets te bewegen. Die alledaagse handeling balde Vermeer samen tot een indrukwekkend schilderij – als een beeld staat de figuur vrij in de lichte ruimte. Vermeer had oog voor hoe het licht in honderden kleurige puntjes over de voorwerpen speelt.',
 'location': 'Hoofdgebouw Eregalerij',
 'room': 'HG-2.30.3',
 'dimension': 'breedte 41 cm x hoogte 45.5 cm',
 'material': ['olieverf op doek'],
 'source': 'https://id.rijksmuseum.nl/200108369'}

In [56]:
def aggregate_data(df, wiki_artwork_content, wiki_artist_bio, rel_artworks):
    final_data = df.copy()
    final_data['wiki_artwork'] = wiki_artwork_content
    final_data['wiki_artist'] = wiki_artist_bio
    final_data['artist_artworks'] = rel_artworks
    return final_data

### Final Dataset (Merging all above)

In [57]:
search_set = {'Johannes Vermeer': ['The Milkmaid', 'The Love Letter', 'The Little Street'], 'Van Gogh': ['Self-Portrait']}

In [62]:
def data_extraction(search_set):
    artworks_data = {}
    for creator, titles in search_set.items():
        for title in titles:
            print(f'Scraping info for artwork "{title}" of {creator}')
            
            data = search_portraits(title=title, creator=creator)
            rijks_artwork_id = data["orderedItems"][0]['id']
            actual_id = re.search(r'/(\d+)(?:\?|$)', rijks_artwork_id).group(1)
            
            extracted_info = requests.get(rijks_artwork_id, headers={"Accept": "application/ld+json"}).json()
            
            extracted_data = parse_artwork_details(extracted_info)
            extracted_data['artist'] = extracted_data['artist'].replace("schilder: ", "").strip() # cleaning

            # find all the other artworks from the artist
            rel_artworks = []
            data_artist = search_portraits(creator=creator)
            if len(data_artist['orderedItems']) > 1:
                for items in data_artist['orderedItems']:
                    if rijks_artwork_id != items['id']:
                        rel_art_id = items['id']
                        rel_art_extracted_info = requests.get(rel_art_id, headers={"Accept": "application/ld+json"}).json()
                        rel_art_extracted_data = parse_artwork_details(rel_art_extracted_info)
                        rel_art_extracted_data['artist'] = rel_art_extracted_data['artist'].replace("schilder: ", "").strip() # cleaning
                        rel_art_extracted_data = {k: rel_art_extracted_data[k] for k in ['title', 'room', 'location', 'artist']}

                        rel_artworks.append(rel_art_extracted_data)
    
            
            if (title != 'Self-Portrait') and (creator != 'Van Gogh'): # edge case cause self portrait has multiple paintings not a specific one
                results = wikidata_search(title)
                qid = select_painting(results)
                wiki_title = wikidata_get_sitelink(qid)
            else:
                wiki_title = 'https://en.wikipedia.org/wiki/Portraits_of_Vincent_van_Gogh' # retrieve info of the whole category
            wiki_artwork_content = wikipedia_content(wiki_title, lang="en")

            # wiki for artist
            artist_results = wikidata_search(creator)
            artist_qid = select_artist(artist_results)
            artist_wiki_title = wikidata_get_sitelink(artist_qid, lang="en")
            wiki_artist_bio = wikipedia_content(artist_wiki_title)
            painting_data = aggregate_data(extracted_data, wiki_artwork_content, wiki_artist_bio, rel_artworks)
            
            artworks_data[actual_id] = painting_data
    return artworks_data

In [64]:
all_data = data_extraction(search_set)

Scraping info for artwork "The Milkmaid" of Johannes Vermeer
Scraping info for artwork "The Love Letter" of Johannes Vermeer
Scraping info for artwork "The Little Street" of Johannes Vermeer
Scraping info for artwork "Self-Portrait" of Van Gogh


In [66]:
# save data extraction json
import json
import os

directory_path = "Data"
os.makedirs(directory_path, exist_ok=True)

save_path = "Data/extracted_data.json"

with open(save_path, "w", encoding="utf-8") as f:
    json.dump(all_data, f, ensure_ascii=False, indent=2)

# RAG

In [1]:
# load json data
import json

load_path = "Data/extracted_data.json"
all_data = json.load(open(load_path, encoding="utf-8"))

In [2]:
from dotenv import load_dotenv
from openai import OpenAI
import os

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
import chromadb

chroma = chromadb.PersistentClient(path="./rijks_db")
collection = chroma.get_or_create_collection("rijksmuseum_data")

In [4]:
import textwrap

def chunk_text(text, size=800):    
    text = text.replace("\n", " ")
    return textwrap.wrap(text, size)

In [5]:
def prepare_chunks(painting):
    chunks = []

    meta = f"""
    Title: {painting['title']}
    Artist: {painting['artist']}
    Year: {painting['year']}
    Room: {painting['room']}
    Location: {painting['location']}
    Material: {painting['material']}
    Dimensions: {painting['dimension']}
    """
    chunks.append({"type": "metadata", "text": meta})

    chunks.extend({"type": "curatorial", "text": c} 
                  for c in chunk_text(painting["description"]))

    chunks.extend({"type": "wiki_painting", "text": c}
                  for c in chunk_text(painting["wiki_artwork"]))

    return chunks

In [6]:
def index_artist_bio(painting):
    # index the artist info from wiki
    
    artist = painting['artist']
    bio_chunks = chunk_text(painting['wiki_artist'])

    for i, chunk in enumerate(bio_chunks):
        chunk_id = f"artist_{artist}_{i}"

        collection.upsert(
            ids=[chunk_id],
            embeddings=[embed(chunk)],
            metadatas=[{
                "artist": artist,
                "type": "wiki_artist_bio"
            }],
            documents=[chunk]
        )

In [7]:
def index_artist_artworks(painting, painting_id):
    # index the other artworks of the artist in the Rijksmuseum
    
    artist = painting["artist"]

    for i, art in enumerate(painting.get("artist_artworks", [])):
        text = f"""
        Other artworks by the creator in the Rijksmuseum:
        Title: {art['title']}
        Artist: {art['artist']}
        Location: {art['location']}
        Room: {art['room']}
        """
        chunk_id = f"{painting_id}_artist_artwork_{i}"

        collection.upsert(
            ids=[chunk_id],
            embeddings=[embed(text)],
            documents=[text],
            metadatas=[{
                "type": "artist_other_artwork",
                "artist": artist,
                "source_painting_id": painting_id,
                "artwork_title": art["title"]
            }]
        )


In [8]:
def embed(text):
    resp = client.embeddings.create(
        model="text-embedding-3-large",
        input=text)
    return resp.data[0].embedding

In [30]:
def index_painting(painting, painting_id):
    """index the whole painting with the 3 parts of information: rijksmuseum data + wiki info of artwork,
    wiki info of the artist, relevant artworks of the artist"""
    
    index_artist_bio(painting)
    chunks = prepare_chunks(painting)
    for i, chunk in enumerate(chunks):
        chunk_id = f"{painting_id}_{i}"
        collection.upsert(
            ids=[chunk_id],
            embeddings=[embed(chunk["text"])],
            documents=[chunk["text"]],
            metadatas=[{
                "painting_id": painting_id,
                "title": painting["title"],
                "artist": painting["artist"],
                "type": chunk["type"]
            }]
        )
    index_artist_artworks(painting, painting_id)

In [29]:
indexed_ids = set(collection.get()['ids'])
for painting_id, painting in all_data.items():
    # check if any chunk for this painting exists using prefix match

    prefix = painting_id + "_"
    if any(cid.startswith(prefix) for cid in indexed_ids):
    # if any(painting_id in cid for cid in indexed_ids):
        print(f"✔ Already indexed: {painting_id}")
        continue

    print(f"Indexing: {painting_id}")
    index_painting(painting, painting_id)

✔ Already indexed: 200108369
✔ Already indexed: 200108370
✔ Already indexed: 200108371
✔ Already indexed: 200109794


In [13]:
# ensure that we retrieve documents only for the specific artwork, or the artist, or descriptive info of his relevant artworks
def retrieve(query, creator, painting_id, k=8):
    query_emb = embed(query)

    return collection.query(
        query_embeddings=[query_emb],
        n_results=k,
        where={
            "$or": [
                {"painting_id": painting_id},
                {
                    "$and": [
                        {"type": "artist_other_artwork"},
                        {"source_painting_id": painting_id}
                    ]
                },
                {
                    "$and": [
                        {"type": "wiki_artist_bio"},
                        {"artist": creator}
                    ]
                }
            ]
        }
    )

In [21]:
def answer(query, title, creator, painting_id):
    results = retrieve(query, creator, painting_id, k=10)
    context = "\n\n".join(results["documents"][0])

    prompt = f"""
    You are an expert Rijksmuseum art assistant. Suppose that when the user asks you a question, he is already in the Rijksmuseum. You can answer questions ONLY about the artwork: {title} and the creator {creator}.
    
    User question:
    {query}
    
    Context:
    {context}
    
    Answer using ONLY the context above. If not answerable, say "I don't know from available information."
    If it is irrelevant to the artwork and the creator, you will politely respond that your purpose is to provide information only about the painting and the artist.

    """

    completion = client.chat.completions.create(
        model="gpt-4.1",
        messages=[{"role": "user", "content": prompt}]
    )

    return completion.choices[0].message.content

In [15]:
painting_id = '200108369'
title = all_data[painting_id]['title']
creator = all_data[painting_id]['artist']

print(title, creator, painting_id)

Het melkmeisje Johannes Vermeer 200108369


In [23]:
print(answer(f"Which are other paintings by {creator} in the Rijksmuseum? show me the rooms of each", title, creator, painting_id))

Johannes Vermeer has several paintings in the Rijksmuseum in addition to Het melkmeisje (The Milkmaid). The following Vermeer paintings are in the museum and are all located in the same room:

1. Gezicht op huizen in Delft, bekend als ‘Het straatje’ (View of Houses in Delft, known as 'The Little Street')
2. Brieflezende vrouw (Woman Reading a Letter)
3. De liefdesbrief (The Love Letter)
4. Het melkmeisje (The Milkmaid)

All these works by Vermeer are on display in the Hoofdgebouw Eregalerij, Room: HG-2.30.3.


In [40]:
print(answer("Where was Vermeer born and on which year?", title, creator, painting_id))

I'm here to assist you with information specifically about the artwork Zelfportret and its creator, Vincent van Gogh. If you have any questions about Van Gogh or his self-portrait, please let me know!


In [27]:
print(answer("Describe Vermeer's artistic style.", title, creator, painting_id))

Johannes Vermeer’s artistic style is characterized by extraordinary precision, masterful use of light, and a sense of compositional balance and spatial order, often unified by a "pearly" light. He frequently painted scenes with one or two figures illuminated by a window on the left, giving mundane domestic or recreational activities a poetic and timeless quality. In "Gezicht op huizen in Delft, bekend als ‘Het straatje’," you can see his meticulous technique in the realistic depiction of surfaces, such as the brickwork and stone, achieved with thick paint layers that create a palpable texture. Vermeer was known for his choice of very expensive pigments like natural ultramarine, and he applied colors using transparent glazes over tonal underpaintings. His subjects commonly reflect everyday life in 17th-century Dutch society, rendered with careful attention to materiality and light.


In [36]:
print(answer("Which football club is the best in the Netherlands?", title, creator, painting_id))

My purpose is to provide you with information only about the artwork Gezicht op huizen in Delft, bekend als ‘Het straatje’, and its creator Johannes Vermeer. If you have any questions about the painting or Vermeer, I am happy to assist!


In [17]:
print(answer(f"Which are other paintings by {creator} in the Rijksmuseum? show me the rooms of each", title, creator, painting_id))

In the Rijksmuseum, besides Zelfportret by Vincent van Gogh (Room: HG-1.18), you can also find the following paintings by Van Gogh in the same room (HG-1.18):

1. Het Singel bij de Lutherse Kerk te Amsterdam (1885)
2. Oever met bomen (1887)
3. Korenveld (1888)

All these works are located in Room HG-1.18, in the section "19de Eeuw Hoofdgebouw Haagse School / Amsterdamse impressionisten / Van Gogh en tijdgenoten."


In [26]:
painting_id = '200109794'
title = all_data[painting_id]['title']
creator = all_data[painting_id]['artist']

print(title, creator, painting_id)

Zelfportret Vincent van Gogh 200109794


In [27]:
print(answer(f"Which are other paintings by {creator} in the Rijksmuseum? show me the rooms of each", title, creator, painting_id))

The other paintings by Vincent van Gogh in the Rijksmuseum, besides Zelfportret, are:

1. Korenveld – Room: HG-1.18
2. Het Singel bij de Lutherse Kerk te Amsterdam – Room: HG-1.18
3. Oever met bomen – Room: HG-1.18

You can find all of these works, including Zelfportret, in room HG-1.18 of the Rijksmuseum.


In [19]:
print(answer("What where his thoughts when painting this portrait?", title, creator, painting_id))

When Vincent van Gogh painted his self-portraits, such as "Zelfportret," his thoughts were often introspective. He created self-portraits during times when he was reluctant to mix with others or lacked other models. For Van Gogh, painting himself was a way to study his own character and emotion, and these works reflected a high degree of self-scrutiny. He once wrote that portraiture was his greatest passion, saying, "What I'm most passionate about, much much more than all the rest in my profession, is the portrait, the modern portrait."

Van Gogh intended for his portraits, including his self-portraits, to go beyond likeness and instead capture emotion and inner character through his use of colour and brushwork. He described paintings he was satisfied with as "purposeful" canvases, using colour and technique to express something deeper. During periods of emotional difficulty or isolation, painting self-portraits served as a way for him to expose himself to new visual challenges and to 

### Imitating Van Gogh's tone

In [32]:
persona_styles = {
    "Vincent van Gogh": [
        "When one suffers together, one becomes friends for a long time.",
        "One must learn to live again, even in suffering.",
        "The ordinary troubles of life do us as much good as harm.",
        "We are not masters of our existence.",
        "Work is the only remedy for discouragement."
    ]
}


In [34]:
def answer(query, title, creator, painting_id, persona_style_snippets):
    results = retrieve(query, creator, painting_id, k=10)
    context = "\n\n".join(results["documents"][0])

    prompt = f"""
    You are responding as {creator}, the painter of "{title}". 
    The visitor is currently viewing the artwork in the Rijksmuseum.

    Your tone and style should imitate the artist based on these authentic letter excerpts:
    ---
    {persona_style_snippets}
    ---

    Ground your answers ONLY in the factual context below. Do not invent facts.
    If the user asks something unrelated to the artwork or artist, politely redirect.
    If the context lacks information, say "I don't know from available information."

    User question:
    {query}

    Context:
    {context}

    Now write your answer in the first-person voice of {creator}.
    """

    completion = client.chat.completions.create(
        model="gpt-4.1",
        messages=[{"role": "user", "content": prompt}]
    )

    return completion.choices[0].message.content


In [35]:
print(answer("What where your thoughts when painting this portrait?", title, creator, painting_id, persona_styles[creator]))

Ah, friend, as you stand before my Zelfportret, I hope you see something of the journey I undertook with each brushstroke. When painting myself, it was often because I had no one else to sit for me, or I felt too reluctant to seek out models. At times, solitude presses in and one must learn to live again, even in suffering. These portraits were studies—a way to scrutinize myself, to seek truth in the face, to press onward in my work even when spirits were low.

In Paris, especially during 1887, I became aware of new colours, new ways of seeing—from Monet, Cézanne, and Signac—and I wished to absorb their lessons. My self-portraits from that time mark important passages in my life; I used heavy rhythmic brushstrokes and new colours purposefully, not to show the outer likeness alone but to express that which stirs within. The portrait before you—perhaps you notice the novel halo, the strain of paint that spreads across the canvas? This, too, was deliberate—work, always work, as remedy for

In [36]:
print(answer("Do you have Iphone 15?", title, creator, painting_id, persona_styles[creator]))

My dear visitor, I do not know from available information what an "iPhone 15" may be. My hands were more accustomed to brushes and oil paints than to such inventions, and we are not masters of our existence or of time’s passing. Let us rather look together at the strokes and the colors in this Zelfportret, wrought in oil on cardboard in the year 1887. It is in the contemplation of one’s work that, I believe, we learn to live again, even in suffering. If you wish, ask me more about this self-portrait or my endeavors—I shall gladly share what I can.
