In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
import pandas as pd

file_path = '/content/drive/MyDrive/met-objects/MetObjects.parquet'
df = pd.read_parquet(file_path)

df.head(1)

Unnamed: 0,Object Number,Is Public Domain,Object ID,Department,AccessionYear,Object Name,Title,Culture,Period,Artist Display Name,...,Artist ULAN URL,Artist Wikidata URL,Object Date,Medium,City,Country,Classification,Link Resource,Object Wikidata URL,Tags
0,1979.486.1,False,1,The American Wing,1979.0,Coin,One-dollar Liberty Head Coin,,,James Barton Longacre,...,http://vocab.getty.edu/page/ulan/500011409,https://www.wikidata.org/wiki/Q3806459,1853,Gold,,,,http://www.metmuseum.org/art/collection/search/1,,


In [19]:
for col in df.columns:
    series = df[col]

    # Remove real NaNs
    cleaned = series.dropna()

    # Remove string versions of NaN
    cleaned = cleaned[cleaned.astype(str).str.lower() != "nan"]

    example = None if cleaned.empty else cleaned.iloc[0]

    print(f"{col}: {example}")

Object Number: 1979.486.1
Is Public Domain: False
Object ID: 1
Department: The American Wing
AccessionYear: 1979.0
Object Name: Coin
Title: One-dollar Liberty Head Coin
Culture: Mexican
Period: Edo period (1615–1868)
Artist Display Name: James Barton Longacre
Artist Display Bio: American, Delaware County, Pennsylvania 1794–1869 Philadelphia, Pennsylvania
Artist Alpha Sort: Longacre, James Barton
Artist Nationality: American
Artist Begin Date: 1794      
Artist End Date: 1869      
Artist ULAN URL: http://vocab.getty.edu/page/ulan/500011409
Artist Wikidata URL: https://www.wikidata.org/wiki/Q3806459
Object Date: 1853
Medium: Gold
City: Philadelphia
Country: Mexico
Classification: Inrō
Link Resource: http://www.metmuseum.org/art/collection/search/1
Object Wikidata URL: https://www.wikidata.org/wiki/Q116250677
Tags: Birds|Coins


In [20]:
import pandas as pd

def describe_column_distributions(df: pd.DataFrame, bins=10, top_n=10):
    for col in df.columns:
        print(f"\n===== Column: {col} =====")

        if pd.api.types.is_numeric_dtype(df[col]):
            bin_counts = pd.cut(df[col], bins=bins).value_counts().sort_index()
            print(bin_counts.head(top_n))

        else:
            value_counts = df[col].value_counts(dropna=False)
            print(value_counts.head(top_n))

# describe_column_distributions(df)
describe_column_distributions(df[[
    "Department",
    "Object Name",
    "Tags",
    "Medium",
    "Culture"
]])


===== Column: Department =====
Department
Drawings and Prints                       172630
European Sculpture and Decorative Arts     43050
Photographs                                37459
Greek and Roman Art                        33726
Costume Institute                          31651
Egyptian Art                               27969
The American Wing                          16874
Asian Art                                  16479
Islamic Art                                15572
Modern and Contemporary Art                14696
Name: count, dtype: int64

===== Column: Object Name =====
Object Name
Print             100107
Photograph         29436
Drawing            25575
Book               13388
Kylix fragment      8926
Piece               8594
Fragment            7005
Negative            5928
Painting            5670
Bowl                2981
Name: count, dtype: int64

===== Column: Tags =====
Tags
nan                                273652
Flowers                              7602
Portr

In [21]:
mask = (df['Culture'] == 'Japan') & (df['Object Name'] == 'Drawing')
for link in df.loc[mask, 'Link Resource'].head(3):
    print(link)

http://www.metmuseum.org/art/collection/search/64115
http://www.metmuseum.org/art/collection/search/76926
http://www.metmuseum.org/art/collection/search/76927


In [22]:
mask = (df['Culture'] == 'Japan') & (df['Object Name'] == 'Drawing')
count = mask.sum()
print(count)

34


In [23]:
mask = df['Culture'] == 'Japan'
count = mask.sum()
print(count)

9265


In [24]:
pd.set_option('display.max_rows', None)

mask = df['Culture'] == 'Japan'
sub = df.loc[mask]

for col in sub.columns:
    if col in ['Classification']:
        print(f"\n--- {col} ---")
        print(sub[col].value_counts(dropna=False))


--- Classification ---
Classification
Textiles-Woven                    3506
Ceramics                           940
Netsuke                            938
Stencils                           555
Paintings                          517
Illustrated Books                  488
Prints                             440
Textiles-Costumes                  199
Metalwork                          162
Ojime                              149
Calligraphy                        113
Bone                               111
Textiles-Painted and Printed        96
Costumes                            94
Jewelry                             85
Bamboo                              82
Textiles-Embroidered                80
Basketry                            78
Stone                               69
Textiles-Dyed and Embroidered       56
Medals                              50
Lacquer                             39
Sculpture                           36
Textiles-Velvets                    34
Textiles                 

In [25]:
mask = (df['Culture'] == 'Japan') & (df['Classification'] == 'Ceramics')
for link in df.loc[mask, 'Link Resource'].head(3):
    print(link)

http://www.metmuseum.org/art/collection/search/60131
http://www.metmuseum.org/art/collection/search/60782
http://www.metmuseum.org/art/collection/search/60790


In [26]:
mask = (df['Culture'] == 'Japan') & (df['Classification'] == 'Netsuke')
for link in df.loc[mask, 'Link Resource'].head(3):
    print(link)

http://www.metmuseum.org/art/collection/search/59035
http://www.metmuseum.org/art/collection/search/59038
http://www.metmuseum.org/art/collection/search/59039


In [27]:
mask = (df['Culture'] == 'Japan') & (df['Classification'] == 'Prints')
for link in df.loc[mask, 'Link Resource'].head(3):
    print(link)

http://www.metmuseum.org/art/collection/search/61180
http://www.metmuseum.org/art/collection/search/62028
http://www.metmuseum.org/art/collection/search/62029


In [28]:
mask = (df['Culture'] == 'Japan') & (df['Classification'] == 'Calligraphy')
for link in df.loc[mask, 'Link Resource'].head(3):
    print(link)

http://www.metmuseum.org/art/collection/search/60435
http://www.metmuseum.org/art/collection/search/60437
http://www.metmuseum.org/art/collection/search/60451


In [30]:
mask = (df['Culture'] == 'Japan') & (df['Classification'] == 'Paintings')
for link in df.loc[mask, 'Link Resource'].head(3):
    print(link)

http://www.metmuseum.org/art/collection/search/59004
http://www.metmuseum.org/art/collection/search/59005
http://www.metmuseum.org/art/collection/search/59669
