# Met API Data Exploration

In [46]:
import os
import pandas as pd
from collections import Counter

## Import standardized CSV of countries and nationalities

In [2]:
countries_df = pd.read_csv("../../data/standards/countries.csv")

In [3]:
countries_df["Country"] = countries_df["Country"].str.strip()
countries_df["Country"] = countries_df["Country"].str.lower()

countries_df["Nationality"] = countries_df["Nationality"].str.strip()
countries_df["Nationality"] = countries_df["Nationality"].str.lower()

countries_df["Region"] = countries_df["Region"].str.strip()
countries_df["Region"] = countries_df["Region"].str.lower()

In [5]:
# Get standard list of countries for comparison
countries_list = countries_df.loc[:, "Country"].tolist()
print(f"Number of Countries: {len(countries_list)}")
print()
for c in countries_list:
    print(c)

Number of Countries: 251

afghanistan
aland islands
albania
algeria
american samoa
andorra
angola
anguilla
antarctica
antigua and barbuda
argentina
armenia
aruba
australia
austria
azerbaijan
bahamas
bahrain
bangladesh
barbados
belarus
belgium
belize
benin
bermuda
bhutan
bolivia
bonaire, sint eustatius and saba
bosnia and herzegovina
botswana
bouvet island
brazil
british indian ocean territory
brunei
bulgaria
burkina faso
burundi
cabo verde
cambodia
cameroon
canada
cayman islands
central african republic
chad
chile
china
christmas island
cocos (keeling) islands
colombia
comoros
republic of the congo
democratic republic of the congo
cook islands
costa rica
ivory coast
croatia
cuba
curacao
cyprus
czech republic
denmark
djibouti
dominica
dominican republic
ecuador
egypt
el salvador
equatorial guinea
eritrea
estonia
eswatini
ethiopia
falkland islands
faroe islands
fiji
finland
france
french guiana
french polynesia
french southern territories
gabon
gambia
georgia
germany
ghana
gibraltar
gree

In [6]:
# Get standard list of nationalities for comparison
nationalities_list = countries_df.loc[:, "Nationality"].tolist()
print(f"Number of Nationalities: {len(nationalities_list)}")
print()
for n in nationalities_list:
    print(n)

Number of Nationalities: 251

afghan
alander
albanian
algerian
american samoan
andorran
angolan
anguillan
antarctic
antiguan
argentinian
armenian
aruban
australian
austrian
azerbaijani
bahamian
bahraini
bangladeshi
barbadian
belarusian
belgian
belizean
beninese
bermudan
bhutanese
bolivian
bonaire, sint eustatius and saba
bosnian and herzegovinan
botswanan
bouvet island
brazilian
british indian ocean territory
bruneian
bulgarian
burkinabe
burundian
cabo verde
cambodian
cameroonian
canadian
cayman
central african republic
chadian
chilean
chinese
christmas island
cocos (keeling) islands
colombian
comoran
congolese
congolese
cook islands
costa rican
ivorian
croatian
cuban
curacaoan
cypriot
czech
danish
djiboutian
dominican
dominican
ecuadorean
egyptian
salvadorean
equatorial guinean
eritrean
estonian
eswatinian
ethiopian
falklander
faroese
fijian
finnish
french
french guianan
french polynesian
french southern territories
gabonese
gambian
georgian
german
ghanaian
gibraltarian
greek
greenlan

In [7]:
# Get standard list of regions for comparison
regions_list = countries_df.loc[:, "Region"].unique().tolist()
print(f"Number of Regions: {len(regions_list)}")
print()
for r in regions_list:
    print(r)

Number of Regions: 6

asia
europe
africa
oceania
americas
nan


## Import Met JSON data

In [11]:
# Create DF from JSON file content

# Declare empty DF for concatenation
df_all = None

# Get files from data folder
base_dir = "../../data/met/api"
files = os.listdir(base_dir)

# Iterate over files
for file in files:
    
    # Convert each file to DF
    df = pd.read_json(f"{base_dir}/{file}")
    
    # Add first DF to concatenation DF
    if df_all is None:
        df_all = df
    
    # Concatenate remaining DFs
    else:
        df_all = pd.concat([df_all, df], axis="rows", ignore_index=True)
    

In [12]:
print(f"Number of Objects: {len(df_all)}")
print()
df_all.info()

Number of Objects: 485744

<class 'pandas.core.frame.DataFrame'>
Index: 485744 entries, 0 to 185743
Data columns (total 57 columns):
 #   Column                 Non-Null Count   Dtype 
---  ------                 --------------   ----- 
 0   objectID               485744 non-null  int64 
 1   isHighlight            485744 non-null  bool  
 2   accessionNumber        485744 non-null  object
 3   accessionYear          485744 non-null  object
 4   isPublicDomain         485744 non-null  bool  
 5   primaryImage           485744 non-null  object
 6   primaryImageSmall      485744 non-null  object
 7   additionalImages       485744 non-null  object
 8   constituents           281598 non-null  object
 9   department             485744 non-null  object
 10  objectName             485744 non-null  object
 11  title                  485744 non-null  object
 12  culture                485744 non-null  object
 13  period                 485744 non-null  object
 14  dynasty                485744 

In [13]:
df_all.head()

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,classification,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber
0,1,False,1979.486.1,1979,False,,,[],"[{'constituentID': 164292, 'role': 'Maker', 'n...",The American Wing,...,,,,2021-04-06T04:41:04.967Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/search/1,,,False,
1,2,False,1980.264.5,1980,False,,,[],"[{'constituentID': 1079, 'role': 'Maker', 'nam...",The American Wing,...,,,,2021-04-06T04:41:04.967Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/search/2,,,False,
2,3,False,67.265.9,1967,False,,,[],,The American Wing,...,,,,2021-04-06T04:41:04.967Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/search/3,,,False,
3,4,False,67.265.10,1967,False,,,[],,The American Wing,...,,,,2021-04-06T04:41:04.967Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/search/4,,,False,
4,5,False,67.265.11,1967,False,,,[],,The American Wing,...,,,,2021-04-06T04:41:04.967Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/search/5,,,False,


In [14]:
doubles = df_all["objectID"].duplicated()
print(f"Number of objects: {len(doubles)}")
print()

i = 0

for d in doubles:
    if d == True:
        i += 1

print(f"Duplicate records: {i}")

Number of objects: 485744

Duplicate records: 4477


In [15]:
# Remove duplicate values

met_df = df_all.drop_duplicates(subset=['objectID'], keep="first")

In [16]:
doubles = met_df["objectID"].duplicated()
print(f"Number of objects: {len(doubles)}")
print()

i = 0

for d in doubles:
    if d == True:
        i += 1

print(f"Duplicate records: {i}")

Number of objects: 481267

Duplicate records: 0


In [17]:
# Item added Oct. 5, 2023

print(met_df.iloc[481266])
print(met_df.iloc[481266]["objectURL"])

objectID                                                            904013
isHighlight                                                          False
accessionNumber                                                  56.597.11
accessionYear                                                             
isPublicDomain                                                       False
primaryImage                                                              
primaryImageSmall                                                         
additionalImages                                                        []
constituents             [{'constituentID': 72709, 'role': 'Artist', 'n...
department                                             Drawings and Prints
objectName                                                           Print
title                                             Bacchus, Venus and Ceres
culture                                                                   
period                   

In [38]:
# List all column names in met df

met_df.columns

Index(['objectID', 'isHighlight', 'accessionNumber', 'accessionYear',
       'isPublicDomain', 'primaryImage', 'primaryImageSmall',
       'additionalImages', 'constituents', 'department', 'objectName', 'title',
       'culture', 'period', 'dynasty', 'reign', 'portfolio', 'artistRole',
       'artistPrefix', 'artistDisplayName', 'artistDisplayBio', 'artistSuffix',
       'artistAlphaSort', 'artistNationality', 'artistBeginDate',
       'artistEndDate', 'artistGender', 'artistWikidata_URL', 'artistULAN_URL',
       'objectDate', 'objectBeginDate', 'objectEndDate', 'medium',
       'dimensions', 'measurements', 'creditLine', 'geographyType', 'city',
       'state', 'county', 'country', 'region', 'subregion', 'locale', 'locus',
       'excavation', 'river', 'classification', 'rightsAndReproduction',
       'linkResource', 'metadataDate', 'repository', 'objectURL', 'tags',
       'objectWikidata_URL', 'isTimelineWork', 'GalleryNumber'],
      dtype='object')

In [41]:
# Create new dataframe with geographic data

met_df_exp = met_df[["objectID", "country", "artistNationality", "culture", "region", "subregion", "geographyType", "department", "artistGender", "medium", "classification", "objectDate", "GalleryNumber", "tags"]]

In [51]:
# Unique Country Values

met_countries = Counter(met_df_exp["country"]).most_common()

print(f"Unique country values: {len(met_countries)}")
print()
for key, value in met_countries:
    print(f"{key} - {value}")

Unique country values: 1042

 - 381043
Egypt - 30046
United States - 9586
France - 8052
Iran - 6553
Japan - 5529
Germany - 3972
Italy - 3494
Peru - 3423
England - 3260
India - 2112
Mexico - 1972
Indonesia - 1704
Byzantine Egypt - 1673
China - 1197
Turkey - 992
Spain - 984
Papua New Guinea - 880
Nigeria - 639
Belgium - 500
Democratic Republic of the Congo - 492
Syria - 460
Iraq - 430
Canada - 394
Mali - 378
The Netherlands - 372
Austria - 342
Colombia - 333
Côte d'Ivoire - 299
Russia - 283
United Kingdom - 276
America - 264
Morocco - 245
Cameroon - 235
Switzerland - 202
Costa Rica - 201
Philippines - 195
present-day France - 178
Bolivia - 165
present-day Uzbekistan - 160
Tibet - 159
Ottoman Empire - 154
Ecuador - 154
Egypt or Syria - 152
Ghana - 151
Northern France - 145
Netherlands - 141
Saudi Arabia - 127
Republic of Benin - 119
Czech Republic - 109
Panama - 109
Guatemala - 102
Algeria - 93
Burkina Faso - 92
Palestine - 92
Nubia (Sudan) - 90
Australia - 84
Thailand - 79
Scotland - 78


In [52]:
# Unique Nationality Values 

met_nationalities = Counter(met_df_exp["artistNationality"]).most_common()

print(f"Unique nationality values: {len(met_nationalities)}")
print()
for key, value in met_nationalities:
    print(f"{key} - {value}")

Unique nationality values: 654

 - 234571
American - 89053
French - 47059
Italian - 24436
British - 24081
German - 11991
Japanese - 7509
Netherlandish - 5640
Dutch - 4112
Spanish - 2293
British, Scottish - 2259
Austrian - 2187
Chinese - 2114
Mexican - 1810
Flemish - 1501
American, born Germany - 1421
Bohemian - 1280
Swiss - 1075
American, born England - 890
Russian - 772
Irish - 648
Iranian - 492
Danish - 463
American, born Hungary - 445
Belgian - 442
American, born Russia - 416
Swedish - 345
Indian - 334
American, born Ireland - 329
American, born France - 318
British and American - 270
French, born Russia - 267
British, born India - 260
American, born Austria - 259
American, born Poland - 257
American, born Cuba - 223
Hungarian - 221
Czech - 221
American, born The Netherlands - 208
French and American - 200
American, born Sweden - 185
Alsatian - 180
American, born Czechoslovakia - 179
Canadian - 170
American, born Switzerland - 168
American, born Scotland - 167
American, born Greece 

In [53]:
# Unique Culture Values

met_cultures = Counter(met_df_exp["culture"]).most_common()

print(f"Unique culture values: {len(met_cultures)}")
print()
for key, value in met_cultures:
    print(f"{key} - {value}")

Unique culture values: 5412

 - 316264
American - 28579
Greek, Attic - 17309
Japan - 16957
China - 13511
French - 10649
Japanese - 5956
Roman - 4858
British - 3096
Italian - 2984
Cypriot - 2774
German - 2148
American or European - 2050
Coptic - 1671
European - 1482
Etruscan - 1233
Greek - 1206
Iran - 1071
Sasanian - 919
Spanish - 862
Chinese - 854
Mexican - 838
Frankish - 753
Indonesia (Java) - 697
Moche - 669
Minoan - 663
Indian - 637
Asmat people - 618
Korea - 584
European or Middle Eastern - 540
probably American - 519
Byzantine - 491
probably French - 483
Assyrian - 480
Chinese, for American market - 452
India - 445
Paracas - 400
Thailand - 382
South Netherlandish - 361
Austrian - 348
Babylonian - 347
Peruvian - 321
Tibet - 281
Greek, South Italian, Apulian - 274
British (American market) - 268
Greek, Laconian - 261
Turkish - 257
Russian - 252
Javanese - 246
Belgian - 234
Bactria-Margiana Archaeological Complex - 222
Chimú - 219
Catalan - 213
Vicús - 209
Indonesia (Central Java) - 

In [54]:
# Unique Region Values

met_regions = Counter(met_df_exp["region"]).most_common()

print(f"Unique region values: {len(met_regions)}")
print()
for key, value in met_regions:
    print(f"{key} - {value}")

Unique region values: 799

 - 442245
Upper Egypt, Thebes - 11351
Memphite Region - 4652
Mid-Atlantic - 4445
New England - 2087
Mesopotamia - 1990
Iran - 1740
Mesoamerica - 1276
Middle Egypt - 1162
Northern Upper Egypt - 729
Eastern Delta - 553
Midwest - 351
West Midlands - 349
Syria - 301
Levant - 267
Anatolia - 224
Bactria-Margiana - 221
Iran or Mesopotamia - 219
Sumatra - 209
Southern Upper Egypt - 202
Fayum Entrance Area - 193
Île-de-France - 189
Southern - 187
Ica Valley - 182
Fayum - 168
Mid-Atlantic United States - 154
Central Asia or Iran - 152
Washkuk Hills - 145
New England United States - 139
Borneo - 129
Java - 116
Iran, probably from Luristan - 116
Western Desert; Kharga Oasis - 113
central Côte d'Ivoire - 109
North Coast - 104
Cyprus - 100
Baffin Island - 95
Central Asia - 94
Midwest United States - 85
Kashmir - 74
northern Côte d'Ivoire - 72
Iran, Luristan - 68
Deccan - 67
Central Anatolia - 63
Caucasus - 62
Lower Nubia - 62
Bohemia - 57
Northern region - 57
Pyrénées-Aqui

In [55]:
# Unique Subregion Values

met_subregions = Counter(met_df_exp["subregion"]).most_common()

print(f"Unique subregion values: {len(met_subregions)}")
print()
for key, value in met_subregions:
    print(f"{key} - {value}")

Unique subregion values: 355

 - 459345
Lisht North - 3006
Deir el-Bahri - 3006
Malqata - 2502
Asasif - 1496
Southern Asasif - 1307
Lisht South - 918
Amarna (Akhetaten) - 841
Valley of the Kings - 735
Sheikh Abd el-Qurna - 535
Qasr-i Abu Nasr - 531
Saqqara - 480
Hasanlu - 399
Abydos - 395
Qantir (Piramesse) - 375
Nimrud (ancient Kalhu) - 324
Ctesiphon - 237
Wadi Gabbanat el-Qurud, Wadi D - 209
Nippur - 194
probably from Sippar (modern Tell Abu Habba) - 143
Dra Abu el-Naga - 134
Hierakonpolis (Nekhen) - 131
Oxyrhynchus (Bahnasa) - 125
probably from Babylon (modern Hillah) - 114
probably from Acemhöyük - 113
Dendera - 109
Medinet Habu - 99
Nishapur - 93
Tell Basta (Bubastis) - 92
Tell Taya - 91
Khokha - 91
Deir el-Medina - 85
Memphis (Mit Rahina) - 85
Lachish (modern Tell ed-Duweir) - 80
Meir - 76
Lahun - 75
Memphite Necropolis - 72
said to be from Ziwiye - 71
Faras - 70
el-Khokha - 69
Teleilat Ghassul - 63
Hermopolis (Ashmunein; Khemenu) - 60
Shahr-i Qumis (ancient Hecatompylos) - 55
Hi

In [56]:
# Unique Geography Types

met_geography_types = Counter(met_df_exp["geographyType"]).most_common()

print(f"Unique geography types: {len(met_geography_types)}")
print()
for key, value in met_geography_types:
    print(f"{key} - {value}")

Unique geography types: 30

 - 390945
From - 26908
Made in - 18357
Country of Origin - 14321
Published in - 12879
Attributed to - 7198
Excavated in - 3781
Printed - 1086
Probably made in - 1003
Possibly made in - 883
Probably from - 658
Original from - 521
Found - 500
Found/excavated - 392
(none assigned) - 334
Possibly from - 324
Said to be from - 317
Object place - 258
Depicted - 208
Mint - 184
Probably originally from - 68
Presumably from - 46
Formerly considered made in - 30
Used - 26
Decorated in - 12
Current location - 10
Said to be found in - 6
Original - 5
Made for - 4
Designed in - 3


In [63]:
# Met Departments
met_departments = Counter(met_df_exp["department"]).most_common()

print(f"Met Departments: {len(met_departments)}")
print()
for key, value in met_departments:
    print(f"{key} - {value}")

Met Departments: 19

Drawings and Prints - 173004
European Sculpture and Decorative Arts - 42076
Asian Art - 37010
Photographs - 35087
Greek and Roman Art - 33532
Costume Institute - 31687
Egyptian Art - 27970
The American Wing - 18546
Islamic Art - 15178
Modern and Contemporary Art - 14722
Arms and Armor - 13624
The Michael C. Rockefeller Wing - 12362
Medieval Art - 7142
Ancient Near Eastern Art - 6224
Musical Instruments - 5229
Robert Lehman Collection - 2586
European Paintings - 2376
The Cloisters - 2340
The Libraries - 572


In [64]:
# Unique Art Mediums
met_mediums = Counter(met_df_exp["medium"]).most_common()

print(f"Unique Mediums: {len(met_mediums)}")
print()
for key, value in met_mediums:
    print(f"{key} - {value}")

Unique Mediums: 65102

Terracotta - 23348
Commercial color lithograph - 18391
Etching - 17524
Engraving - 11778
Albumen photograph - 10152
Gelatin silver print - 9966
Silk - 8633
 - 7232
Bronze - 7207
Lithograph - 7041
Glass - 6511
Faience - 4982
Albumen silver print from glass negative - 4731
Film negative - 4712
silk - 4684
Woodcut - 4683
Silver - 4666
Gold - 3841
Etching and engraving - 3606
Commercial color photolithograph - 3412
Oil on canvas - 3387
Commercial photolithograph - 3160
Cotton - 2983
Wood - 2705
Albumen silver print - 2530
cotton - 2498
Woodblock print; ink and color on paper - 2443
Hard-paste porcelain - 2391
Wood engraving - 2389
Stone - 2149
Porcelain - 2060
wool - 1916
Ceramic - 1908
Limestone - 1835
Bobbin lace - 1833
Hand-colored etching - 1762
Steel - 1717
Iron - 1704
[no medium available] - 1662
Ivory - 1652
Color lithograph - 1608
Limestone, paint - 1471
Soft-paste porcelain - 1449
Linen - 1422
Earthenware - 1419
Commercial lithograph - 1389
Pottery - 1357
Gr

In [59]:
# Unique Classifications

met_classifications = Counter(met_df_exp["classification"]).most_common()

print(f"Unique classifications: {len(met_classifications)}")
print()
for key, value in met_classifications:
    print(f"{key} - {value}")

Unique classifications: 706

Prints - 121426
 - 85030
Photographs - 38527
Drawings - 28368
Vases - 21223
Books - 20825
Ceramics - 11026
Textiles-Woven - 11016
Paintings - 8801
Glass - 6096
Textiles-Laces - 4973
Negatives - 4935
Textiles-Embroidered - 4182
Ceramics-Porcelain - 4135
Metalwork-Silver - 3939
Sculpture - 3587
Ceramics-Pottery - 3581
Textiles-Printed - 2562
Metalwork - 2336
Jewelry - 2330
Textiles - 2093
Gold and Silver - 1957
Textiles-Trimmings - 1950
Metal-Ornaments - 1890
Bronzes - 1823
Stucco - 1741
Terracottas - 1698
Jade - 1664
Gems - 1607
Albums - 1603
Wood-Sculpture - 1586
Woodwork-Furniture - 1471
Medals and Plaquettes - 1460
Textiles-Velvets - 1425
Codices - 1312
Metalwork-Iron - 1245
Sword Furniture-Tsuba - 1242
Sword Furniture-Kozuka - 1117
Ceramics-Containers - 1098
Swords - 1087
Coins - 1073
Cut Paper - 1070
Glass-Stained - 1016
Stone Sculpture - 1009
Shafted Weapons - 954
Netsuke - 942
Horology - 929
Metalwork-Gilt Bronze - 912
Ephemera - 896
Lacquer - 844
Dag

In [62]:
# Gallery Numbers

met_galleries = Counter(met_df_exp["GalleryNumber"]).most_common()

print(f"Unique Values: {len(met_classifications)}")
print()
for key, value in met_galleries:
    print(f"Gallery #: {key} - {value}")

Unique Values: 706

Gallery #:  - 431980
Gallery #: 774 - 7605
Gallery #: 122 - 4532
Gallery #: 171 - 3454
Gallery #: 109 - 2938
Gallery #: 117 - 2262
Gallery #: 130 - 2249
Gallery #: 106 - 1167
Gallery #: 126 - 873
Gallery #: 116 - 788
Gallery #: 170 - 558
Gallery #: 134 - 541
Gallery #: 113 - 494
Gallery #: 301 - 457
Gallery #: 114 - 431
Gallery #: 101 - 402
Gallery #: 706 - 390
Gallery #: 157 - 340
Gallery #: 453 - 334
Gallery #: 152 - 311
Gallery #: 707 - 306
Gallery #: 207 - 299
Gallery #: 704 - 289
Gallery #: 127 - 288
Gallery #: 304 - 281
Gallery #: 533 - 280
Gallery #: 151 - 273
Gallery #: 162 - 267
Gallery #: 118 - 263
Gallery #: 512 - 257
Gallery #: 681 - 244
Gallery #: 684 - 231
Gallery #: 103 - 224
Gallery #: 111 - 213
Gallery #: 166 - 191
Gallery #: 003 - 191
Gallery #: 169 - 190
Gallery #: 247 - 187
Gallery #: 556 - 185
Gallery #: 510 - 182
Gallery #: 373 - 180
Gallery #: 371 - 176
Gallery #: 158 - 176
Gallery #: 305 - 172
Gallery #: 161 - 169
Gallery #: 010 - 167
Gallery

In [49]:
# Artist Gender

met_gender = met_df_exp["artistGender"]

met_gender_count = Counter(met_gender).most_common()

for key, value in met_gender_count:
    print(f"Gender: {key} - {value}")

Gender:  - 468411
Gender: Female - 12856


In [37]:
# Object Tags

met_tags = met_df_exp["tags"]

met_tags_matrix = list(filter(lambda x: (x != None), met_tags))

print(f"Tagged Objects: {len(met_tags_matrix)}")
print()

# Create new list for tag terms
met_tag_list = []

for tag in met_tags_matrix:
    for t in tag:
        met_tag_list.append(t['term'])

# Count unique tags
met_tags_unique = Counter(met_tag_list).most_common()

print(f"Unique tags: {len(met_tags_unique)}")
print()
for key, value in met_tags_unique:
    print(f"{key} - {value}")

Tagged Objects: 189737

Unique tags: 1153

Men - 54633
Women - 37225
Portraits - 26900
Flowers - 16907
Birds - 8267
Actresses - 7102
Athletes - 6869
Horses - 6802
Trees - 6453
Landscapes - 5616
Human Figures - 5608
Baseball - 5505
Leaves - 5498
Profiles - 4743
Architecture - 4685
Animals - 4493
Buildings - 4370
Dogs - 3336
Female Nudes - 3287
Houses - 3230
Boats - 3173
Carriages - 3010
Coat of Arms - 3001
Actors - 2803
Boys - 2556
Christ - 2532
Heads - 2522
Soldiers - 2344
Girls - 2297
Abstraction - 2257
Ornament - 2188
Satire - 2168
Angels - 2152
Saints - 2072
Children - 2042
Lions - 2001
Male Nudes - 1931
Interiors - 1729
Mountains - 1699
Hieroglyphs - 1649
Dragons - 1585
Faces - 1574
Ships - 1552
Musical Instruments - 1515
Virgin Mary - 1444
Fish - 1412
Swords - 1344
Flags - 1332
Rivers - 1237
Bridges - 1161
Plants - 1141
Infants - 1110
Cities - 1109
Scarabs - 1075
Madonna and Child - 1041
Working - 1020
Deer - 1014
Cross - 998
Buddhism - 995
Putti - 988
New York City - 921
Streets 

In [101]:
# Count Unknown Values by Column
met_cols = ['isHighlight', 'accessionNumber', 'accessionYear',
       'isPublicDomain', 'primaryImage', 'primaryImageSmall',
       'department', 'objectName', 'title',
       'culture', 'period', 'dynasty', 'reign', 'portfolio', 'artistRole',
       'artistPrefix', 'artistDisplayName', 'artistDisplayBio', 'artistSuffix',
       'artistAlphaSort', 'artistNationality', 'artistBeginDate',
       'artistEndDate', 'artistGender', 'artistWikidata_URL', 'artistULAN_URL',
       'objectDate', 'objectBeginDate', 'objectEndDate', 'medium',
       'dimensions', 'creditLine', 'geographyType', 'city',
       'state', 'county', 'country', 'region', 'subregion', 'locale', 'locus',
       'excavation', 'river', 'classification', 'rightsAndReproduction',
       'linkResource', 'metadataDate', 'repository', 'objectURL',
       'objectWikidata_URL', 'isTimelineWork', 'GalleryNumber']


# Create new nested list to count empty cells by column
unknowns_by_col = []

# Iterate over each item in met_cols Counter list
for c in met_cols:
    col_count = Counter(met_df[c]).most_common()
    unknowns = 0
    
    # Update 'unknown' with the # of empty cells in column, if > 0
    for cc in col_count:
        if cc[0] == "" or cc[0] == " " or cc[0] == "NAN":
            unknowns = cc[1]
    
    # Append column name and # of empty cells in it
    unknowns_by_col.append([c, unknowns])

In [102]:
# Sort (Desc.) Total Blank Values per Column

unknowns_by_col.sort(key=lambda x: x[1], reverse=True)

for key, value in unknowns_by_col:
    print(f"{key} - {value}")

linkResource - 481267
county - 480439
river - 479175
artistSuffix - 475293
locus - 473831
reign - 470036
artistGender - 468411
locale - 465616
excavation - 464890
state - 464545
subregion - 459345
rightsAndReproduction - 458095
dynasty - 458070
portfolio - 453695
city - 446709
region - 442245
GalleryNumber - 431980
artistPrefix - 423155
objectWikidata_URL - 412546
geographyType - 390945
period - 390231
country - 381043
culture - 316264
artistWikidata_URL - 282397
artistULAN_URL - 277534
artistDisplayBio - 248225
artistEndDate - 244593
artistBeginDate - 244362
artistNationality - 234571
primaryImage - 233995
primaryImageSmall - 233995
artistAlphaSort - 201496
artistRole - 201396
artistDisplayName - 201396
classification - 85030
dimensions - 74394
objectDate - 13370
medium - 7232
accessionYear - 3833
objectName - 2316
creditLine - 690
title - 283
isHighlight - 0
accessionNumber - 0
isPublicDomain - 0
department - 0
objectBeginDate - 0
objectEndDate - 0
metadataDate - 0
repository - 0
obj