In [None]:
import pandas as pd
import numpy as np
import requests
import os
import json  


In [12]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file


# Project Description

### 1.- Defining Variables

The cell below contains code that searches for an artist using different resources of the Harvard Art Museum API - 

In [3]:
key  = os.environ['HARVARD_API_KEY']
artist = "Cezanne"



The cell below will use the title to find an object with the artist name

In [4]:
r = requests.get(f'https://api.harvardartmuseums.org/object?person={artist}&apikey={key}')
data = r.json()

In [5]:
# Extract the info and records
info = data['info']
records = data['records']

In [15]:
for i in records:
    print(i)
    


{'copyright': None, 'contextualtextcount': 0, 'creditline': 'Harvard Art Museums/Fogg Museum, Bequest of Mary Gershinowitz', 'accesslevel': 1, 'dateoflastpageview': '2023-11-13', 'classificationid': 21, 'division': 'European and American Art', 'markscount': 11, 'publicationcount': 2, 'totaluniquepageviews': 474, 'contact': 'am_europeanamerican@harvard.edu', 'colorcount': 3, 'rank': 124223, 'id': 97993, 'state': None, 'verificationleveldescription': 'Best. Object is extensively researched, well described and information is vetted', 'period': None, 'images': [{'date': '2018-08-20', 'copyright': 'President and Fellows of Harvard College', 'imageid': 491094, 'idsid': 457568629, 'format': 'image/jpeg', 'description': None, 'technique': 'Make:Hasselblad;Model:Hasselblad H5D-50c MS;Orientation:1;Software:Adobe Photoshop CS6 (Macintosh);', 'renditionnumber': '768158', 'displayorder': 1, 'baseimageurl': 'https://nrs.harvard.edu/urn-3:HUAM:768158', 'alttext': None, 'width': 2550, 'publiccaption'

## Results with Provenance

The code below will provide you with a dataframe (like an excel file with the artist information)

In [6]:
artist = "Cezanne"

In [7]:
def results_provenance(url, df=pd.DataFrame()):
    r = requests.get(url)
    data = r.json()
    info = data['info']
    records = data['records']
    
    # Extract relevant data from each record
    record_data = []
    for record in records:
        record_data.append({
            'title': record['title'],
            'classification': record['classification'],
            'century': record['century'],
            'provenance': record['provenance']
        })
    
    # Append record data to DataFrame
    df = pd.concat([df, pd.DataFrame(record_data)], ignore_index=True)
    
    # Recursively call pagination function for next page, if it exists
    if 'next' in info:
        return results_provenance(info['next'], df)
    else:
        return df

In [8]:
url = f'https://api.harvardartmuseums.org/object?person={artist}&apikey={key}'
df = results_provenance(url)

In [9]:
len(df)

32

## Results with no Provenance

In [10]:
def results(url, df=pd.DataFrame()):
    r = requests.get(url)
    data = r.json()
    info = data['info']
    records = data['records']
    
    # Extract relevant data from each record
    record_data = []
    for record in records:
        record_data.append({
            'title': record['title'],
            'classification': record['classification'],
            'century': record['century']
        })
    
    # Append record data to DataFrame
    df = pd.concat([df, pd.DataFrame(record_data)], ignore_index=True)
    
    # Recursively call pagination function for next page, if it exists
    if 'next' in info:
        return results(info['next'], df)
    else:
        return df

In [11]:
url = f'https://api.harvardartmuseums.org/object?person={artist}&apikey={key}'
results(url)

Unnamed: 0,title,classification,century
0,Tree Trunks,Drawings,19th century
1,Forest Interior,Drawings,19th century
2,House Among Trees,Drawings,19th century
3,Portrait of a Man (Emile Zola?); verso: Study ...,Drawings,19th century
4,Study of Trees,Paintings,19th century
5,Still Life with Game Birds,Paintings,19th century
6,Jules Peyron,Paintings,19th century
7,Plaster Cupid,Paintings,19th century
8,Small Houses in Pontoise,Paintings,19th century
9,The Small Bathers,Prints,19th century


## Download Images 

The function below will download all images for a particular artist, now it could also be that is not only the artist, so we have to do some API research, but that is your job, you need to know what you want to show

You simply have to modify the artist

In [21]:
artist = "Cezanne"

In [22]:

def download_artist_paintings(artist,key):
    # Set up API endpoint and parameters
    endpoint = "https://api.harvardartmuseums.org/object"
    params = {
        "apikey": key,
        "person": artist,
        "classification": "Paintings",
    }

    # Send API request and extract relevant data from JSON response
    response = requests.get(endpoint, params=params)
    if response.status_code == 200:
        data = response.json()["records"]
    else:
        print(f"Error: {response.status_code}")
        data = []

    # Create directory for images if it doesn't exist
    directory_name = artist.lower().replace(' ', '_') + '_paintings'
    if not os.path.exists(directory_name):
        os.makedirs(directory_name)

    # Download and save images
    for record in data:
        object_number = record["objectnumber"]
        image_url = record["primaryimageurl"]
        if image_url:
            response = requests.get(image_url)
            if response.status_code == 200:
                image_data = response.content
                with open(f"{directory_name}/{object_number}.jpg", "wb") as f:
                    f.write(image_data)
                    print(f"Saved image for {object_number}")
            else:
                print(f"Error downloading image for {object_number}")
        else:
            print(f"No image found for {object_number}")

In [23]:
download_artist_paintings(artist,key)

Saved image for 1998.305
Saved image for 1976.70
Saved image for 1961.144
Saved image for 1964.72
Saved image for 1934.28
Saved image for 1951.46
