# Download Met files

Helper to download all William Blake images from the [Met's open API](https://metmuseum.github.io/). This is just a cheap and cheerful script to generate a minimal sample data set. I'm not that familiar with the Met API, so I simply do a search for "William Blake" and download any images that have William Blake listed as the artist and are in the public domain.

## Usage

```python  
from DownloadMetFiles import fetch_works  

fetch_works(
    artist_exact_name="William Blake", 
    search_term="Songs of Innocence William Blake",
    image_dir="../images/innocence",
    limit=10)
```

## Import dependencies

In [1]:
import requests
import os

## Configure some defaults

In [2]:
# Where to save images
IMAGE_DIR = "../images/met"

# Artist display name to filter results against
ARTIST_NAME = "William Blake"

# Term to search for. Artist's name is just fine to reuse.
SEARCH_TERM="William Blake"

In [3]:
def do_search(term):
    url = f"https://collectionapi.metmuseum.org/public/collection/v1/search?q={term}"
    return requests.get(url).json()["objectIDs"]

In [4]:
# :: ignore-cell ::
do_search(ARTIST_NAME)

[383459,
 383509,
 383470,
 383290,
 383456,
 383506,
 371141,
 435671,
 347414,
 348307,
 383462,
 383464,
 383457,
 383468,
 383469,
 383451,
 383458,
 383453,
 383507,
 383460,
 383508,
 383455,
 383466,
 383467,
 371142,
 347901,
 347939,
 347889,
 347938,
 340850,
 347974,
 348004,
 347977,
 348008,
 348040,
 348011,
 347905,
 347922,
 347923,
 347935,
 347918,
 347925,
 347931,
 347933,
 347928,
 347962,
 347971,
 348007,
 347972,
 347929,
 347932,
 347961,
 347907,
 347989,
 348043,
 347904,
 347906,
 347903,
 347924,
 347955,
 347927,
 347959,
 347936,
 347934,
 347969,
 347997,
 348006,
 347966,
 348001,
 347908,
 347973,
 347902,
 347987,
 347930,
 347926,
 347980,
 347968,
 347832,
 340851,
 340853,
 340852,
 347888,
 398714,
 347983,
 426771,
 383669,
 383682,
 383514,
 383515,
 383516,
 375911,
 367367,
 383530,
 383529,
 383531,
 383532,
 360052,
 383511,
 383510,
 383512,
 383513,
 383517,
 383518,
 383519,
 383523,
 383520,
 383526,
 383528,
 383527,
 383525,
 383665,
 

In [5]:
def get_object_metadata(object_id):
    return requests.get(f"https://collectionapi.metmuseum.org/public/collection/v1/objects/{object_id}").json()

In [6]:
get_object_metadata(347901)

{'objectID': 347901,
 'isHighlight': False,
 'accessionNumber': '17.10.4',
 'accessionYear': '1917',
 'isPublicDomain': True,
 'primaryImage': 'https://images.metmuseum.org/CRDImages/dp/original/DP816577.jpg',
 'primaryImageSmall': 'https://images.metmuseum.org/CRDImages/dp/web-large/DP816577.jpg',
 'additionalImages': ['https://images.metmuseum.org/CRDImages/dp/original/MM4278.jpg'],
 'constituents': [{'constituentID': 166218,
   'role': 'Artist',
   'name': 'William Blake',
   'constituentULAN_URL': 'http://vocab.getty.edu/page/ulan/500012489',
   'constituentWikidata_URL': 'https://www.wikidata.org/wiki/Q41513',
   'gender': ''}],
 'department': 'Drawings and Prints',
 'objectName': 'Print',
 'title': 'Songs of Innocence: Introduction',
 'culture': '',
 'period': '',
 'dynasty': '',
 'reign': '',
 'portfolio': '',
 'artistRole': 'Artist',
 'artistPrefix': '',
 'artistDisplayName': 'William Blake',
 'artistDisplayBio': 'British, London 1757–1827 London',
 'artistSuffix': '',
 'artist

In [7]:
def download_image(object_id, image_dir, artist_name):
    
    try:
        metadata = get_object_metadata(object_id)
    except: 
        print(f"Error getting metadata: <ObjectID: {object_id}>")
        return 0

    meta_artist_name = metadata['artistDisplayName']
    image_url = metadata['primaryImage']
    is_public_domain = metadata['isPublicDomain']
    
    if not is_public_domain:
        print(f"Not public domain: {object_id}")
        return 0

    if meta_artist_name !=  artist_name:
        print(f"Not the artist we're looking for: {object_id}: {meta_artist_name} != {artist_name}")
        return 0
    
    out_file = os.path.join(image_dir, f"{object_id}-{os.path.basename(image_url)}" )

    # Skip download if image already exists
    if os.path.exists(out_file):
        return 0 
    
    try:
        image_data = requests.get(image_url)
    except:
        print(f"Error downloading image: <ObjectID: {object_id}>: {image_url}")
        
    open(out_file, "wb").write(image_data.content)
    
    return 1
        

In [8]:
def fetch_works(*, artist_exact_name=ARTIST_NAME, search_term=SEARCH_TERM, image_dir=IMAGE_DIR, limit=100):
    
    # Create the image_dir if it doesn't exist
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)

    # Do a string search for the artist's name
    object_ids = do_search(search_term)
    print(f"Search found {len(object_ids)} potential matches")
    
    download_count = 0
    for object_id in object_ids:
        if download_count >= limit:
            break
        download_count += download_image(object_id, image_dir, artist_exact_name)
        
    print(f"Downloaded {download_count} images")
        

## Demo

Search for "Songs of Innocence William Blake" and download up to 10 images with the artist display name of "William Blake."

In [10]:
# :: ignore-cell ::
fetch_works(
    artist_exact_name="William Blake", 
    search_term="Songs of Innocence William Blake",
    image_dir="../images/innocence",
    limit=10
)

Search found 54 potential matches
Downloaded 10 images
