In [None]:


import requests # import `requests` library for api request
import pandas   # import pandas for clean data frame printing

# identifing url and query

# to get api key, you will need to go to: https://github.com/harvardartmuseums/api-docs
# and 'Send a request' to obatain a key.

# in addition, the question is asking for british people in the collection
# since one of the keys in the `records` is `culture` (investigated later in code)
# we will query for culter = british

# the below is what is should look like with your apikey.

url = 'https://api.harvardartmuseums.org/person'
query = {'apikey': '7d20a900-deda-11e9-a0e4-89a9f1a6994d', 'q': 'culture:British'}



response = requests.get(url, query) # send a response call to the url

if response.ok: # make sure the response was successful
  print('able to make successful connection')
else:
  print('not able to make successful connection')


able to make successful connection


In [None]:
# investigate the data

data = response.json() # save the json data
data.keys()            # list all the keys in the dictionary

dict_keys(['info', 'records'])

In [None]:
# look at the structure of the data

data['info']

{'next': 'https://api.harvardartmuseums.org/person?apikey=7d20a900-deda-11e9-a0e4-89a9f1a6994d&q=culture%3ABritish&page=2',
 'page': 1,
 'pages': 187,
 'totalrecords': 1861,
 'totalrecordsperquery': 10}

In [None]:
# since we want to loop through the pages eventually,
# we will save the number of pages

total_pages = data['info']['pages'] # save the number of pages from info section of json
total_pages


187

In [None]:
# look at the keys (column names) of the records

data['records'][0].keys()

dict_keys(['dateend', 'alphasort', 'displaydate', 'url', 'deathplace', 'id', 'objectcount', 'lastupdate', 'datebegin', 'birthplace', 'personid', 'gender', 'culture', 'displayname'])

In [None]:

raw_records = data['records'] # save and view the records of the json
raw_records

[{'alphasort': 'Wright, Alastair',
  'birthplace': None,
  'culture': 'British',
  'datebegin': 1965,
  'dateend': 0,
  'deathplace': None,
  'displaydate': None,
  'displayname': 'Alastair Wright',
  'gender': 'male',
  'id': 6247,
  'lastupdate': '2020-01-14T04:45:02-0500',
  'objectcount': 0,
  'personid': 6247,
  'url': 'https://www.harvardartmuseums.org/collections/person/6247'},
 {'alphasort': 'Barraud & Jerrard',
  'birthplace': None,
  'culture': 'British',
  'datebegin': 1870,
  'dateend': 1889,
  'deathplace': None,
  'displaydate': 'active1870s-1880s ',
  'displayname': 'Barraud & Jerrard',
  'gender': 'unknown',
  'id': 18421,
  'lastupdate': '2020-01-14T04:47:27-0500',
  'objectcount': 0,
  'personid': 18421,
  'url': 'https://www.harvardartmuseums.org/collections/person/18421'},
 {'alphasort': 'Barraud, Herbert',
  'birthplace': None,
  'culture': 'British',
  'datebegin': 1845,
  'dateend': 1896,
  'deathplace': None,
  'displaydate': '1845 - 1896',
  'displayname': 'Her

In [None]:
# run the following to get just one record:

raw_records[0]

# just the name: 

raw_records[0]['alphasort']

'Wright, Alastair'

In [None]:
# get the name and birth place of first page

[{'name': record['alphasort'],        # get name
  'birthplace': record['birthplace'], # and birth place
  'culture': record['culture']
 } for record in raw_records]         # for each record in the `raw_records`

# in other words, we are looping through the dictionary

[{'birthplace': None, 'culture': 'British', 'name': 'Wright, Alastair'},
 {'birthplace': None, 'culture': 'British', 'name': 'Barraud & Jerrard'},
 {'birthplace': None, 'culture': 'British', 'name': 'Barraud, Herbert'},
 {'birthplace': None, 'culture': 'British', 'name': 'Frost, John I.'},
 {'birthplace': 'Berlin', 'culture': 'British', 'name': 'Auerbach, Frank'},
 {'birthplace': 'Berlin', 'culture': 'British', 'name': 'Freud, Lucian'},
 {'birthplace': None, 'culture': 'British', 'name': 'Finn, Herbert John'},
 {'birthplace': None,
  'culture': 'British',
  'name': 'Finch, Heneage, Fourth Earl of Aylsford'},
 {'birthplace': None, 'culture': 'British', 'name': 'Gilpin, William'},
 {'birthplace': None, 'culture': 'British', 'name': 'Feake, John'}]

In [None]:
# define a function that will process the data for us

def process_data(raw_records):
  return [{'name': record['alphasort'],        # get name
           'birthplace': record['birthplace'], # and birth place
           'culture': record['culture']
          } for record in raw_records]         # for each record in the `raw_records`

# declare a list to store all results
british_folk = []

page = 1

total_pages = 5

while page <= total_pages:
  
  query.update({'page': page}) # reference to update query: https://thispointer.com/python-how-to-add-append-key-value-pairs-in-dictionary-using-dict-update/
  
  response = requests.get(url, query) # send a response call to the url
  data = response.json()              # send a response call to the url
  raw_records = data['records']       # save and view the records of the json
  
  if response.ok: # make sure the response was successful
    
    print('able to make successful connection', 'on page', page)
    
    records = process_data(raw_records)
    british_folk = british_folk + process_data(raw_records)
    
  else:  
    print('not able to make successful connection', 'on page', page)
  page = page + 1


able to make successful connection on page 1
able to make successful connection on page 2
able to make successful connection on page 3
able to make successful connection on page 4
able to make successful connection on page 5


In [None]:
british_folk

[{'birthplace': None, 'culture': 'British', 'name': 'Wright, Alastair'},
 {'birthplace': None, 'culture': 'British', 'name': 'Barraud & Jerrard'},
 {'birthplace': None, 'culture': 'British', 'name': 'Barraud, Herbert'},
 {'birthplace': None, 'culture': 'British', 'name': 'Frost, John I.'},
 {'birthplace': 'Berlin', 'culture': 'British', 'name': 'Auerbach, Frank'},
 {'birthplace': 'Berlin', 'culture': 'British', 'name': 'Freud, Lucian'},
 {'birthplace': None, 'culture': 'British', 'name': 'Finn, Herbert John'},
 {'birthplace': None,
  'culture': 'British',
  'name': 'Finch, Heneage, Fourth Earl of Aylsford'},
 {'birthplace': None, 'culture': 'British', 'name': 'Gilpin, William'},
 {'birthplace': None, 'culture': 'British', 'name': 'Feake, John'},
 {'birthplace': None, 'culture': 'British', 'name': 'Read, David Charles'},
 {'birthplace': None, 'culture': 'British', 'name': 'Folkingham, Thomas'},
 {'birthplace': None, 'culture': 'British', 'name': 'Messrs. Lucas'},
 {'birthplace': None, '

In [None]:
british_folk = pandas.DataFrame(british_folk)
british_folk
british_folk[['name', 'culture', 'birthplace']]

Unnamed: 0,name,birthplace,culture
0,"Wright, Alastair",,British
1,Barraud & Jerrard,,British
2,"Barraud, Herbert",,British
3,"Frost, John I.",,British
4,"Auerbach, Frank",Berlin,British
5,"Freud, Lucian",Berlin,British
6,"Finn, Herbert John",,British
7,"Finch, Heneage, Fourth Earl of Aylsford",,British
8,"Gilpin, William",,British
9,"Feake, John",,British
