In [1]:
import pandas as pd
import numpy as np
import requests
import os

# Project Description

### 1.- Defining Variables

The cell below contains code that searches for an artist using different resources of the Harvard Art Museum API - 

In [2]:
key = "38e8628b-5bf1-40c0-85dd-72c788315a1b"
artist = "Cezanne"

The cell below will use the title to find an object with the artist name

In [3]:
r = requests.get(f'https://api.harvardartmuseums.org/object?person={artist}&apikey={key}')
data = r.json()

In [4]:
# Extract the info and records
info = data['info']
records = data['records']

In [28]:
data

{'info': {'totalrecordsperquery': 10,
  'totalrecords': 32,
  'pages': 4,
  'page': 1,
  'next': 'https://api.harvardartmuseums.org/object?person=Cezanne&apikey=38e8628b-5bf1-40c0-85dd-72c788315a1b&page=2',
  'responsetime': '11 ms'},
 'records': [{'copyright': None,
   'contextualtextcount': 0,
   'creditline': 'Harvard Art Museums/Fogg Museum, Horace Swope Fund',
   'accesslevel': 1,
   'dateoflastpageview': '2023-02-09',
   'classificationid': 23,
   'division': 'European and American Art',
   'markscount': 0,
   'publicationcount': 1,
   'totaluniquepageviews': 118,
   'contact': 'am_europeanamerican@harvard.edu',
   'colorcount': 7,
   'rank': 220347,
   'id': 264367,
   'state': 'iii',
   'verificationleveldescription': 'Good. Object is well described and information is vetted',
   'period': None,
   'images': [{'date': '1998-11-01',
     'copyright': 'President and Fellows of Harvard College',
     'imageid': 515,
     'idsid': 20669228,
     'format': 'image/jpeg',
     'descri

## Results with Provenance

The code below will provide you with a dataframe (like an excel file with the artist information)

In [5]:
artist = "Cezanne"

In [16]:
def results_provenance(url, df=pd.DataFrame()):
    r = requests.get(url)
    data = r.json()
    info = data['info']
    records = data['records']
    
    # Extract relevant data from each record
    record_data = []
    for record in records:
        record_data.append({
            'title': record['title'],
            'classification': record['classification'],
            'century': record['century'],
            'provenance': record['provenance']
        })
    
    # Append record data to DataFrame
    df = df.append(pd.DataFrame(record_data))
    
    # Recursively call pagination function for next page, if it exists
    if 'next' in info:
        return results_provenance(info['next'], df)
    else:
        return df

In [17]:
url = f'https://api.harvardartmuseums.org/object?person={artist}&apikey={key}'
df = results_provenance(url)

  df = df.append(pd.DataFrame(record_data))
  df = df.append(pd.DataFrame(record_data))
  df = df.append(pd.DataFrame(record_data))
  df = df.append(pd.DataFrame(record_data))


In [18]:
df

Unnamed: 0,title,classification,century,provenance
0,Female Figure Ornamenting a Clock; verso: Frag...,Drawings,19th century,"[Ambroise Vollard, Paris]. [New Gallery, New Y..."
1,Study of an Ecorché and a Man's Face in Profil...,Drawings,19th century,"Paul Cézanne fils. [The New Gallery, New York]..."
2,"Sheet of Studies, including a Skull",Drawings,19th century,"[Ambroise Vollard, Paris]. [Waldimir Walter, P..."
3,"Rocky Landscape, after Cézanne",Drawings,20th century,"Lewis W. Rubenstein, gift; to Fogg Art Museum,..."
4,Tree Trunks,Drawings,19th century,"Paul Cézanne fils, Paris.\r\nPaul Guillaume[?]..."
5,Forest Interior,Drawings,19th century,"Paul Cézanne; his son, Paul Cézanne. [Georges ..."
6,House Among Trees,Drawings,19th century,"[Ambroise Vollard, Paris.] [Marcel Guiot, Pari..."
7,Portrait of a Man (Emile Zola?); verso: Study ...,Drawings,19th century,"Paul Cézanne fils, Paris.\r\nPaul Guillaume, P..."
8,Study of Trees,Paintings,19th century,"[Ambroise Vollard, Paris.] Estate of Ambroise ..."
9,Still Life with Game Birds,Paintings,19th century,"Paul Gachet, Auvers, France (c. 1873-1909); to..."


## Results with no Provenance

In [14]:
def results(url, df=pd.DataFrame()):
    r = requests.get(url)
    data = r.json()
    info = data['info']
    records = data['records']
    
    # Extract relevant data from each record
    record_data = []
    for record in records:
        record_data.append({
            'title': record['title'],
            'classification': record['classification'],
            'century': record['century']
        })
    
    # Append record data to DataFrame
    df = df.append(pd.DataFrame(record_data))
    
    # Recursively call pagination function for next page, if it exists
    if 'next' in info:
        return results(info['next'], df)
    else:
        return df

In [15]:
url = f'https://api.harvardartmuseums.org/object?person={artist}&apikey={key}'
results(url)

  df = df.append(pd.DataFrame(record_data))
  df = df.append(pd.DataFrame(record_data))
  df = df.append(pd.DataFrame(record_data))
  df = df.append(pd.DataFrame(record_data))


Unnamed: 0,title,classification,century
0,The Small Bathers,Prints,19th century
1,The Artist,Prints,19th-20th century
2,"Armand Guillaumin, Hanged",Prints,19th-20th century
3,The Large Bathers,Prints,19th century
4,Mont Sainte-Victoire (recto and verso),Drawings,19th century
5,Head of a Young Boy,Drawings,19th-20th century
6,Portrait of Ambroise Vollard,Drawings,19th century
7,Heads of Mme Cézanne and Louis-Auguste Cézanne...,Drawings,19th century
8,View of Mont Sainte Victoire; verso: Study of ...,Drawings,19th century
9,Study of a Nude Figure,Drawings,19th century


## Download Images 

The function below will download all images for a particular artist, now it could also be that is not only the artist, so we have to do some API research, but that is your job, you need to know what you want to show

You simply have to modify the artist

In [10]:
artist = "Cezanne"

In [11]:

def download_artist_paintings(artist,key):
    # Set up API endpoint and parameters
    endpoint = "https://api.harvardartmuseums.org/object"
    params = {
        "apikey": key,
        "person": artist,
        "classification": "Paintings",
    }

    # Send API request and extract relevant data from JSON response
    response = requests.get(endpoint, params=params)
    if response.status_code == 200:
        data = response.json()["records"]
    else:
        print(f"Error: {response.status_code}")
        data = []

    # Create directory for images if it doesn't exist
    directory_name = artist.lower().replace(' ', '_') + '_paintings'
    if not os.path.exists(directory_name):
        os.makedirs(directory_name)

    # Download and save images
    for record in data:
        object_number = record["objectnumber"]
        image_url = record["primaryimageurl"]
        if image_url:
            response = requests.get(image_url)
            if response.status_code == 200:
                image_data = response.content
                with open(f"{directory_name}/{object_number}.jpg", "wb") as f:
                    f.write(image_data)
                    print(f"Saved image for {object_number}")
            else:
                print(f"Error downloading image for {object_number}")
        else:
            print(f"No image found for {object_number}")

In [12]:
download_artist_paintings(artist,key)

Saved image for 1998.305
Saved image for 1976.70
Saved image for 1961.144
Saved image for 1964.72
Saved image for 1934.28
Saved image for 1951.46
