In [1]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import os

In [3]:
def select_images(association_obj):
    """
    This function iteratively scrapes 100 records from the API. Each record consists of a variery of \
    child elements, but we chose to only scrape the association name (object type) and the URL to the \
    image, because we only needed the furnitures as requested by the client. 
    """
    
    img_prefix = "https://images.memorix.nl/rce/thumb/1600x1600/"
    img_postfix = ".jpg"
    base_url = "https://rcerijswijk.adlibhosting.com/api.wo2/wwwopac.ashx?database=collect"
    search_param = "&search=all"
    limit = 100
    startfrom = 0
    
    all_data = []
    
    while True:
        url = f"{base_url}{search_param}&limit={limit}&startfrom={startfrom}"
        response = requests.get(url)
        root = ET.fromstring(response.content)

        records = root.findall(".//record")
        if not records:
            break
            
        for record in records:
            associations = [association.text for association in record.findall(
            ".//Associated_subject/association.subject")]
            if association_obj in associations:
                uuids = [reproduction_ref.text for reproduction_ref in record.findall(
                ".//Reproduction/reproduction.reference")]
                association_name = association_obj
                for uuid in uuids:
                    if uuid:
                        img_url = img_prefix + uuid + img_postfix
                        all_data.append({
                            "Association": association_name,
                            "Url": img_url
                        })
            else:
                continue
                
        startfrom += limit
    
    pd.set_option('display.max_colwidth', None)
    df = pd.DataFrame(all_data)
    return df
    
images = select_images("schilderkunst")

In [4]:
def download_images(df, directory_name="nk_collection_schilderij"):
    """
    This function takes a dataframe produced by the function select_images, and a directory name. \
    Then it creates a directory with the given name if it does not exist, and it downloades all the \
    images in the dataframe and puts it in the directory. 
    """
    
    if not os.path.exists(directory_name):
        os.makedirs(directory_name)
        
    for idx, row in df.iterrows():
        img_url = row["Url"]
        img_name = os.path.join(directory_name, f"schilderij_{idx+1}.jpg")
        response = requests.get(img_url, stream=True)
        with open(img_name, "wb") as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
        
download_images(images)