In [1]:
import os # this module provides functions for interacting with the operating system
import pandas as pd # for data manipulation and analysis
import json # standard format inspired by JavaScript for data exchange and data transfer as text format over a network
from pprint import pprint # utility module that you can use to print data structures in a readable, pretty way
import re # regular expression syntax
import pyeuropeana.apis as apis # Europeana's search API

In [3]:
os.environ['EUROPEANA_API_KEY'] = 'amicaldtann'

In [4]:
termscounted = {}
terms = ["aborigine","aboriginal", "inboorling", "inlander", "indigène", "barbarian", "barbaar", "barbare", "barbares", 
         "berber", "berbers", "berbère", "berbères", "Bombay", "Colored","kleurling", "gekleurd","Coolie", "Koelie", 
         "disabled","handicaped", "crippled", "cripple", "cripples", "handicapé", "handicapés", "gehandicapten","infirme", 
         "infirmes", "invalide", "invalides", "eskimo", "eskimos", "Gypsy", "gypsies","Zigeuner", "Gitan", "gitans",
         "Bohémien","bohémiens", "bohémienne", "bohémiennes", "Hottentot", "Indian", "Indians", "Indiaan", "Indien", 
         "indiens", "Kaffir", "Kaffer", "Lappen","Lapp","Lapland", "Lapon", "Laponie", "Lapp", "Mohammedan","Moor",
         "Maure", "Mauresque", "Mulatto", "mulat", "mûlatre", "negro", "negroes", "neger", "negers", "nikker", "nègre",
         "négresse", "Oriental", "oosters", "oosterling", "Orientale", "orientals", "orientales", "orientaux","Planter", 
         "primitive", "primitief","primitif", "slave", "slaaf", "slaves","savage", "sauvage", "Événements d'Algérie",
         "Politionele Actie", "amulet", "amulette", "talisman", "bedeltje"]

In [5]:
for term in terms:
    response = apis.search(query=term)
    response.keys() #the result of the search is a dictionary 
    termscounted[term] = response["totalResults"] 
    print(term)
    print(response["totalResults"]) # the total number of items that the query returned

aborigine
39
aboriginal
1076
inboorling
15
inlander
123
indigène
1979
barbarian
1143
barbaar
14
barbare
420
barbares
344
berber
2312
berbers
235
berbère
871
berbères
943
Bombay
6784
Colored
18510
kleurling
7
gekleurd
8282
Coolie
541
Koelie
62
disabled
3760
handicaped
3
crippled
542
cripple
1573
cripples
134
handicapé
185
handicapés
242
gehandicapten
1119
infirme
120
infirmes
79
invalide
1226
invalides
5064
eskimo
1386
eskimos
243
Gypsy
3565
gypsies
1243
Zigeuner
925
Gitan
148
gitans
433
Bohémien
104
bohémiens
695
bohémienne
260
bohémiennes
32
Hottentot
835
Indian
55507
Indians
4271
Indiaan
388
Indien
104449
indiens
2749
Kaffir
548
Kaffer
75
Lappen
1514
Lapp
9502
Lapland
37827
Lapon
1295
Laponie
34155
Lapp
9502
Mohammedan
133
Moor
26312
Maure
911
Mauresque
2026
Mulatto
82
mulat
330
mûlatre
217
negro
331462
negroes
158
neger
1304
negers
233
nikker
46
nègre
54547
négresse
310
Oriental
137834
oosters
1636
oosterling
212
Orientale
120715
orientals
1676
orientales
9300
orientaux
1230
Planter

In [5]:
dict(sorted(termscounted.items(), key=lambda item: item[1]))

{'handicaped': 3,
 'barbaar': 14,
 'inboorling': 15,
 'bedeltje': 17,
 'bohémiennes': 32,
 'nikker': 46,
 'Koelie': 62,
 'Kaffer': 75,
 "Événements d'Algérie": 78,
 'infirmes': 79,
 'Mulatto': 82,
 'Bohémien': 104,
 'infirme': 120,
 'Mohammedan': 133,
 'cripples': 134,
 'Gitan': 148,
 'negroes': 158,
 'handicapé': 185,
 'mûlatre': 217,
 'berbers': 235,
 'handicapés': 242,
 'eskimos': 243,
 'bohémienne': 260,
 'négresse': 310,
 'barbares': 344,
 'Indiaan': 388,
 'barbare': 420,
 'gitans': 433,
 'Politionele Actie': 445,
 'Coolie': 541,
 'crippled': 542,
 'Kaffir': 548,
 'bohémiens': 695,
 'primitif': 775,
 'Hottentot': 835,
 'berbère': 871,
 'Maure': 911,
 'Zigeuner': 925,
 'berbères': 943,
 'aboriginal': 1076,
 'talisman': 1095,
 'barbarian': 1143,
 'invalide': 1226,
 'orientaux': 1230,
 'gypsies': 1242,
 'Lapon': 1295,
 'neger': 1304,
 'eskimo': 1386,
 'Lappen': 1525,
 'cripple': 1573,
 'amulette': 1613,
 'orientals': 1676,
 'indigène': 1979,
 'Mauresque': 2026,
 'savage': 2068,
 'ber

In [6]:
response2 = apis.search(query=("slaves"), rows=4000)
response2.keys() 
count_data_provider = {} # create dictionary that will store the all the data providers and their count

print(len(response2['items'])) #we first perform a check and print the total result  
for item in response2['items']:
    print(item['country']) # for each item we print the country 
    print(item['dataProvider']) # and we also print the data priver
    dataprovider = item['dataProvider'][0]
    if dataprovider not in count_data_provider: #we add all the counts to the data provider dictionary 
        count_data_provider[dataprovider] = 1
    else:
        count_data_provider[dataprovider] += 1

2451
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['Europe']
['The European Library']
['Slovenia']
['National and University Library of Slovenia']
['Netherlands']
['National Library of the Netherlands']
['Serbia']
['National Library of Serbia']
['Serbia']
['National Library of Serbia']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Hungary']
['National Szechenyi Library']
['United Kingdom']
['Wellcome Collection']
['United Kingdom']
['Wellcome Collection']
['United Kingdom']
['Wellcome Collection']
['United Kingdom']
['Wellcome Collection']

In [7]:
dict(sorted(count_data_provider.items(), key=lambda item: item[1]))

{'The European Library': 1,
 'National Szechenyi Library': 1,
 'Institute of Literary Research - PBI': 1,
 'Braginsky Collection': 1,
 'Royal Library of Belgium': 1,
 'University library Svetozar Markovic, Belgrade': 1,
 'The Museum of Mediterranean and Near Eastern Antiquities': 1,
 'Foundation Virtual Library Miguel de Cervantes': 1,
 'Leiden University Libraries': 1,
 'Maya Image Archive - Image database of the project "Text Database and Dictionary of Classic Maya" at the University of Bonn - Department of Ancient American Studies and Ethnology': 1,
 'RTVE': 1,
 "Atria, Institute on Gender Equality and Women's History": 1,
 'Cyprus University of Technology': 1,
 'Association Films Plans-Fixes': 1,
 'Polytechnic University of Valencia': 1,
 'The Moravian Library in Brno': 1,
 'Royal Museums of Art and History, Brussels': 1,
 'Greek National Opera': 1,
 'American School of Classical Studies at Athens': 1,
 'Folklife and Ethnological Museum of Macedonia and Thrace': 1,
 'Nationalmuseum

In [8]:
response3 = apis.search(query="slave", rows=4000)
response3.keys() 
count_data_provider = {}
for item in response3['items']:
    print(item['country'])
    print(item['dataProvider'])
    dataprovider = item['dataProvider'][0]
    if dataprovider not in count_data_provider:
        count_data_provider[dataprovider] = 1
    else:
        count_data_provider[dataprovider] += 1
print(len(response3['items']))

['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries, University of Oxford']
['United Kingdom']
['Bodleian Libraries,

In [9]:
dict(sorted(count_data_provider.items(), key=lambda item: item[1]))

{'The European Library': 1,
 'National and University Library of Iceland': 1,
 'Russian State Library': 1,
 'National Library of Denmark': 1,
 'Lucian Blaga Central University Library, Cluj-Napoca, Romania': 1,
 'National and University Library in Zagreb': 1,
 'Royal Library of Belgium': 1,
 'University of Zagreb. Catholic Faculty of  Theology. Department of the History of Christian Literature and Christian Teaching.': 1,
 'Naval Museum': 1,
 'Institute of Research and Acoustic Coordination/Music': 1,
 'Leiden University Libraries': 1,
 'Museene for kystkultur og gjenreisning i Finnmark IKS': 1,
 'Zemský archiv Opava, pobočka Olomouc': 1,
 'Digital Memory of Catalonia': 1,
 'National Museum in Warsaw': 1,
 'C-DaRE (Centre for Dance Research), COVUNI and Early Dance Circle  (EDC)': 1,
 'Media library of Art and Visual History of Humboldt University of Berlin': 1,
 'Repositorio Institucional de la Universidad de Huelva': 1,
 'Association Films Plans-Fixes': 1,
 'Historical Museum': 1,
 '

In [10]:
response4 = apis.search(query="bohémien*", rows=5000)
response4.keys() 
count_data_provider = {}

print(len(response4['items']))
for item in response4['items']:
    print(item['country'])
    print(item['dataProvider'])
    dataprovider = item['dataProvider'][0]
    if dataprovider not in count_data_provider:
        count_data_provider[dataprovider] = 1
    else:
        count_data_provider[dataprovider] += 1

1090
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Netherlands']
['National Library of the Netherlands']
['Hungary']
['National Szechenyi Library']
['United Kingdom']
['Wellcome Collection']
['France']
['

In [11]:
dict(sorted(count_data_provider.items(), key=lambda item: item[1]))

{'National Szechenyi Library': 1,
 'Wellcome Collection': 1,
 'Lucian Blaga Central University Library, Cluj-Napoca, Romania': 1,
 'The University and National Library of Debrecen': 1,
 'Uppsala University': 1,
 'Örebro County Museum': 1,
 'Digital Memory of Catalonia': 1,
 'Association Films Plans-Fixes': 1,
 'Historical Museum': 1,
 'Central Institute for the Union Catalogue of Italian Libraries': 1,
 'Under and Tuglas Literature Centre': 1,
 'Bibliotheca Hertziana – Max Planck Institut for Art History': 1,
 'Sancho el Sabio Foundation': 1,
 'Turin Gallery for Modern and Contemporary Art': 1,
 'Meise Botanic Garden': 1,
 'Kunstbibliothek Berlin': 1,
 'Modern Art Museum': 1,
 'Luce Institute': 1,
 'Austrian Broadcasting Corporation': 1,
 'National Audiovisual Archive of Hungary': 1,
 'GESIS - Leibniz Institute for the Social Sciences. Library Cologne': 1,
 'National Heritage Institute, Bucharest': 1,
 'Luigi Sturzo Institute': 1,
 'Central Library of the Bulgarian Academy of Sciences'