# 📜 Harvard’s Artifacts Collection: ETL, SQL Analytics 

### API Integration & Data Extraction


In [1]:
import streamlit as st
harvard_api = st.secrets.get("harvard")["api_key"]
if not harvard_api:
    st.error("API key for Harvard not found in secrets.")
    st.stop()
url='https://api.harvardartmuseums.org/classification'
import requests
parameters={'apikey':harvard_api,
           'size':100}
response= requests.get(url, parameters)
response

<Response [200]>

In [2]:
data=response.json()
data

{'info': {'totalrecordsperquery': 100,
  'totalrecords': 64,
  'pages': 1,
  'page': 1,
  'responsetime': '5 ms'},
 'records': [{'objectcount': 1,
   'name': 'Natural History Specimens',
   'id': 1133,
   'lastupdate': '2025-10-04T04:03:31-04:00',
   'classificationid': 1133},
  {'objectcount': 1,
   'name': 'Paintings with Text',
   'id': 197,
   'lastupdate': '2025-10-04T04:03:31-04:00',
   'classificationid': 197},
  {'objectcount': 66,
   'name': 'Performance Artifacts',
   'id': 224,
   'lastupdate': '2025-10-04T04:03:31-04:00',
   'classificationid': 224},
  {'objectcount': 1,
   'name': 'Text',
   'id': 204,
   'lastupdate': '2025-10-04T04:03:31-04:00',
   'classificationid': 204},
  {'objectcount': 80,
   'name': 'Tokens',
   'id': 492,
   'lastupdate': '2025-10-04T04:03:31-04:00',
   'classificationid': 492},
  {'objectcount': 62,
   'name': 'Brick Stamps',
   'id': 304,
   'lastupdate': '2025-10-04T04:03:31-04:00',
   'classificationid': 304},
  {'objectcount': 46,
   'name':

In [3]:
from collections import Counter
classiresults= [record['name']
                for record in data['records']
                if record.get('objectcount',0) >= 2500]
print('name with object count >= 2500:')
for name in classiresults:
    print(name)

name with object count >= 2500:
Accessories (non-art)
Photographs
Drawings
Prints
Paintings
Sculpture
Coins
Vessels
Textile Arts
Archival Material
Fragments
Manuscripts
Seals
Straus Materials


In [4]:
url = 'https://api.harvardartmuseums.org/object'
all_records = []   
classifications = ["Paintings", "Sculpture", "Drawings", "Fragments", "Photographs"]
for classi in classifications:
    print(f"\nFetching classification: {classi}")
    for page in range(1, 26):
        parameters = {
            'apikey': harvard_api,
            'size': 100,
            'page': page,
            'classification': classi
        }
        response = requests.get(url, parameters)
        data = response.json()
        all_records.extend(data['records'])
    print(f"Total collected so far: {len(all_records)}")



Fetching classification: Paintings
Total collected so far: 2500

Fetching classification: Sculpture
Total collected so far: 5000

Fetching classification: Drawings
Total collected so far: 7500

Fetching classification: Fragments
Total collected so far: 10000

Fetching classification: Photographs
Total collected so far: 12500


In [5]:
data['records']

[{'copyright': '© Artists Rights Society (ARS), New York / VG Bild-Kunst, Bonn',
  'contextualtextcount': 0,
  'creditline': 'Harvard Art Museums/Busch-Reisinger Museum, Gift of T. Lux Feininger',
  'accesslevel': 1,
  'createdate': '1999-10-25T00:00:00-04:00',
  'dateoflastpageview': '2015-01-05',
  'classificationid': 17,
  'division': 'Modern and Contemporary Art',
  'markscount': 0,
  'publicationcount': 1,
  'totaluniquepageviews': 1,
  'contact': 'am_moderncontemporary@harvard.edu',
  'colorcount': 5,
  'rank': 182327,
  'id': 190237,
  'state': None,
  'verificationleveldescription': 'Adequate. Object is adequately described but information may not be vetted',
  'period': None,
  'images': [],
  'worktypes': [{'worktypeid': '268', 'worktype': 'photograph'},
   {'worktypeid': '241', 'worktype': 'negative'}],
  'imagecount': 2,
  'totalpageviews': 1,
  'accessionyear': None,
  'standardreferencenumber': None,
  'signed': None,
  'classification': 'Photographs',
  'relatedcount': 0

In [6]:
print(type(data['records']))

<class 'list'>


In [7]:
data['records'][0]

{'copyright': '© Artists Rights Society (ARS), New York / VG Bild-Kunst, Bonn',
 'contextualtextcount': 0,
 'creditline': 'Harvard Art Museums/Busch-Reisinger Museum, Gift of T. Lux Feininger',
 'accesslevel': 1,
 'createdate': '1999-10-25T00:00:00-04:00',
 'dateoflastpageview': '2015-01-05',
 'classificationid': 17,
 'division': 'Modern and Contemporary Art',
 'markscount': 0,
 'publicationcount': 1,
 'totaluniquepageviews': 1,
 'contact': 'am_moderncontemporary@harvard.edu',
 'colorcount': 5,
 'rank': 182327,
 'id': 190237,
 'state': None,
 'verificationleveldescription': 'Adequate. Object is adequately described but information may not be vetted',
 'period': None,
 'images': [],
 'worktypes': [{'worktypeid': '268', 'worktype': 'photograph'},
  {'worktypeid': '241', 'worktype': 'negative'}],
 'imagecount': 2,
 'totalpageviews': 1,
 'accessionyear': None,
 'standardreferencenumber': None,
 'signed': None,
 'classification': 'Photographs',
 'relatedcount': 0,
 'verificationlevel': 2,
 

In [8]:
artifact_metadata = []
artifact_media = []
artifact_colors = []
for i in all_records:
    artifact_metadata.append(dict(
        id = i['id'],
        title = i['title'],
        culture = i['culture'],
        period = i['period'],
        century = i['century'],
        medium = i['medium'],
        dimensions = i['dimensions'],
        description = i['description'],
        department = i['department'],
        classification = i['classification'],
        accessionyear = i['accessionyear'],
        accessionmethod = i['accessionmethod']))
    artifact_media.append(dict(
        objectid = i['objectid'],
        imagecount = i['imagecount'],
        mediacount = i['mediacount'],
        colorcount = i['colorcount'],
        rank_value = i['rank'],
        datebegin = i['datebegin'],
        dateend = i['dateend']))
    color_details = i.get('colors')
    if color_details:
        for j in color_details:
            artifact_colors.append(dict(
                objectid = i['objectid'],
                color = j['color'],
                spectrum = j['spectrum'],
                hue = j['hue'],
                percent = j['percent'],
                css3 = j['css3']))


In [9]:
len(artifact_metadata)

12500

In [10]:
len(artifact_media)

12500

In [11]:
len(artifact_colors)

91223

In [12]:
artifact_metadata

[{'id': 165005,
  'title': 'Rocky Bay South of Salerno with View of Capri',
  'culture': 'French',
  'period': None,
  'century': '19th century',
  'medium': 'Oil on canvas',
  'dimensions': 'sight: 44.6 x 73 cm (17 9/16 x 28 3/4 in.)\r\nframe: 58.7 x 86.6 cm (23 1/8 x 34 1/8 in.)',
  'description': None,
  'department': 'Department of Paintings, Sculpture & Decorative Arts',
  'classification': 'Paintings',
  'accessionyear': 2001,
  'accessionmethod': 'Gift'},
 {'id': 165401,
  'title': 'The Bay of Estaque',
  'culture': 'French',
  'period': None,
  'century': '20th century',
  'medium': 'Oil on canvas',
  'dimensions': '33 x 41 cm (13 x 16 1/8 in.)\r\nframe: 49.8 x 58.1 x 3.8 cm (19 5/8 x 22 7/8 x 1 1/2 in.)',
  'description': None,
  'department': 'Department of Modern & Contemporary Art',
  'classification': 'Paintings',
  'accessionyear': 2001,
  'accessionmethod': 'Purchase'},
 {'id': 165462,
  'title': 'Landscape with Windmill and Wayside Chapel',
  'culture': 'Flemish',
  'pe

In [13]:
artifact_media

[{'objectid': 165005,
  'imagecount': 1,
  'mediacount': 0,
  'colorcount': 10,
  'rank_value': 123329,
  'datebegin': 1842,
  'dateend': 1852},
 {'objectid': 165401,
  'imagecount': 3,
  'mediacount': 0,
  'colorcount': 7,
  'rank_value': 122555,
  'datebegin': 1908,
  'dateend': 1908},
 {'objectid': 165462,
  'imagecount': 1,
  'mediacount': 0,
  'colorcount': 10,
  'rank_value': 123271,
  'datebegin': 1623,
  'dateend': 1633},
 {'objectid': 167757,
  'imagecount': 1,
  'mediacount': 0,
  'colorcount': 9,
  'rank_value': 123077,
  'datebegin': 100,
  'dateend': 200},
 {'objectid': 167925,
  'imagecount': 1,
  'mediacount': 0,
  'colorcount': 7,
  'rank_value': 121511,
  'datebegin': 1999,
  'dateend': 1999},
 {'objectid': 169024,
  'imagecount': 1,
  'mediacount': 0,
  'colorcount': 5,
  'rank_value': 122483,
  'datebegin': 1600,
  'dateend': 1700},
 {'objectid': 169327,
  'imagecount': 1,
  'mediacount': 0,
  'colorcount': 8,
  'rank_value': 127701,
  'datebegin': 0,
  'dateend': 0}

In [14]:
artifact_colors

[{'objectid': 165005,
  'color': '#321919',
  'spectrum': '#3db657',
  'hue': 'Brown',
  'percent': 0.3517562724014337,
  'css3': '#000000'},
 {'objectid': 165005,
  'color': '#c8c8c8',
  'spectrum': '#8c5fa8',
  'hue': 'Grey',
  'percent': 0.17218637992831543,
  'css3': '#c0c0c0'},
 {'objectid': 165005,
  'color': '#7d4b19',
  'spectrum': '#6cbd45',
  'hue': 'Brown',
  'percent': 0.13512544802867382,
  'css3': '#8b4513'},
 {'objectid': 165005,
  'color': '#afc8e1',
  'spectrum': '#7b65ad',
  'hue': 'Blue',
  'percent': 0.07354838709677419,
  'css3': '#b0c4de'},
 {'objectid': 165005,
  'color': '#966432',
  'spectrum': '#e46867',
  'hue': 'Orange',
  'percent': 0.05684587813620072,
  'css3': '#a0522d'},
 {'objectid': 165005,
  'color': '#e1e1c8',
  'spectrum': '#e9715f',
  'hue': 'Green',
  'percent': 0.05003584229390681,
  'css3': '#dcdcdc'},
 {'objectid': 165005,
  'color': '#96afc8',
  'spectrum': '#7b65ad',
  'hue': 'Blue',
  'percent': 0.04637992831541218,
  'css3': '#a9a9a9'},
 {

In [15]:
import mysql.connector

connection = mysql.connector.connect(
    host=st.secrets["mysql"]["host"],
    user=st.secrets["mysql"]["user"],
    password=st.secrets["mysql"]["password"],
    database=st.secrets["mysql"]["database"]
)
cursor = connection.cursor()
print("MySQL server established")

MySQL server established


In [16]:
cursor.execute('CREATE DATABASE IF NOT EXISTS harvard_artifacts;')
cursor.execute("USE harvard_artifacts;")
print("Database ready: harvard_artifacts")

Database ready: harvard_artifacts


In [17]:
cursor.execute("""
CREATE TABLE IF NOT EXISTS artifact_metadata (
  id INT PRIMARY KEY,
  title TEXT,
  culture TEXT,
  period TEXT,
  century TEXT,
  medium TEXT,
  dimensions TEXT,
  description TEXT,
  department TEXT,
  classification TEXT,
  accessionyear INT,
  accessionmethod TEXT
);
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS artifact_media (
  objectid INT PRIMARY KEY,
  imagecount INT,
  mediacount INT,
  colorcount INT,
  rank_value INT,
  datebegin INT,
  dateend INT,
  CONSTRAINT fk1_id FOREIGN KEY (objectid) REFERENCES artifact_metadata(id)
);
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS artifact_colors (
  objectid INT,
  color TEXT,
  spectrum TEXT,
  hue TEXT,
  percent REAL,
  css3 TEXT,
  CONSTRAINT fk2_id FOREIGN KEY (objectid) REFERENCES artifact_metadata(id)
);
""")
connection.commit()
print('tables created sucessfully')

tables created sucessfully


In [18]:
cursor.execute("DELETE FROM artifact_colors;")
cursor.execute("DELETE FROM artifact_media;")
cursor.execute("DELETE FROM artifact_metadata;")
connection.commit()
print('Tables emptied Ready for fresh insert')

Tables emptied Ready for fresh insert


In [19]:
insert_metadata = """ 
INSERT INTO artifact_metadata ( id, title, culture, period, century, medium, dimensions, description, 
department, classification, accessionyear, accessionmethod ) 
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) 
"""
cursor.executemany(insert_metadata, [(
    r.get('id'), 
    r.get('title'), 
    r.get('culture'), 
    r.get('period'), 
    r.get('century'),
    r.get('medium'), 
    r.get('dimensions'), 
    r.get('description'), 
    r.get('department'),
    r.get('classification'), 
    r.get('accessionyear'), 
    r.get('accessionmethod')
) for r in artifact_metadata])
connection.commit()
print("artifact_metadata inserted")

artifact_metadata inserted


In [20]:
insert_media = """
INSERT INTO artifact_media (
    objectid, imagecount, mediacount, colorcount, rank_value, datebegin, dateend
) VALUES (%s, %s, %s, %s, %s, %s, %s)
"""
cursor.executemany(insert_media, [(
            r.get('objectid'),
            r.get('imagecount'),
            r.get('mediacount'),
            r.get('colorcount'),
            r.get('rank_value'),  
            r.get('datebegin'),
            r.get('dateend')
        )for r in artifact_media])
connection.commit()
print("artifact_media inserted")

artifact_media inserted


In [21]:
insert_colors = """
INSERT INTO artifact_colors (
    objectid, color, spectrum, hue, percent, css3
) VALUES (%s, %s, %s, %s, %s, %s)
"""
cursor.executemany(insert_colors,[(
            r.get('objectid'),
            r.get('color'),
            r.get('spectrum'),
            r.get('hue'),
            r.get('percent'),
            r.get('css3')
        )for r in artifact_colors])
connection.commit()
print("artifact_colors inserted")

artifact_colors inserted


# SQL QUERIES

### artifact_metadata Table


#### 1. List all artifacts from the 11th century belonging to Byzantine culture.

In [22]:
cursor.execute(""" 
SELECT id, title, century, culture 
FROM artifact_metadata 
WHERE century LIKE '%11th century%' AND culture LIKE '%Byzantine%'; """) 
results = cursor.fetchall() 
for row in results: 
    print(row)

#### 2. What are the unique cultures represented in the artifacts?

In [23]:
cursor.execute("""
    SELECT DISTINCT culture
    FROM artifact_metadata
    WHERE culture IS NOT NULL
    ORDER BY culture;
""")
results = cursor.fetchall()
print("Unique cultures in artifacts:")
for row in results:
    print(row[0])


Unique cultures in artifacts:
Afghan
African
Aksumite
American
American?
Amlash
Anatolian
Arab
Australian
Austrian
Austrian?
Aztec
Baule
Belgian
Bohemian
British
British?
Bulgarian
Burmese
Byzantine
Cambodian
Canadian
Central American
Central Asian
Chinese
Colombian
Coptic
Coptic?
Costa Rican
Croatian
Cycladic
Cypriot
Czech
Dalmatian
Danish
Dogon?
Dutch
Dutch?
Edo
Egyptian
Egyptian?
Elamite
English
Etruscan
European
Fang
Flemish
Flemish?
Franco-Flemish
French
French?
Gandharan
German
German?
Graeco-Roman
Greek
Haitian
Helladic
Hellenistic
Hellenistic or Early Roman
Hispano-Philippine
Honduran
Hungarian
Indian
Indonesian
Iranian
Islamic
Italian
Italian, Emilian, Bolognese
Italian, Emilian, Ferrarese
Italian, Emilian, Parmesan
Italian, Ligurian, Genoese
Italian, Lombard
Italian, Lombard, Bergamasque
Italian, Lombard, Milanese
Italian, Lombard, Pavese
Italian, Marchigian
Italian, Neapolitan
Italian, Northern Italian
Italian, Roman
Italian, Tuscan
Italian, Tuscan, Florentine
Italian, Tusca

#### 3. List all artifacts from the Archaic Period

In [24]:
cursor.execute("""
SELECT id, title, period
FROM artifact_metadata
WHERE period = 'Archaic Period';
""")
results = cursor.fetchall()
print("Artifacts from Archaic Period:")
for row in results:
    print(row)


Artifacts from Archaic Period:
(76741, 'Body Sherd: Face of Woman Looking Left', 'Archaic period')
(77014, 'Body Sherd: Palmette', 'Archaic period')
(77063, 'Body Sherd: Black Gloss', 'Archaic period')
(77064, 'Body Sherd: Dot Register', 'Archaic period')
(77065, 'Body Sherd: Tendrils', 'Archaic period')
(77072, 'Body Sherd: Black-Figure Decoration', 'Archaic period')
(77095, "Rim Sherd: Part of Woman's Head", 'Archaic period')
(77096, 'Body Sherd: Waist of Warrior', 'Archaic period')
(77108, 'Body Sherd: Lower Torso of Draped Figure', 'Archaic period')
(77169, 'Body Sherd: Seated Figure Facing Right', 'Archaic period')
(77411, 'Body Sherd', 'Archaic period')
(77412, 'Body Sherd', 'Archaic period')
(77413, 'Body Sherd: Black Gloss', 'Archaic period')
(77415, 'Body Sherd: Lower Body of Man Carrying Animal', 'Archaic period')
(77416, 'Body Sherd', 'Archaic period')
(77417, 'Body Sherd: Figure Decoration', 'Archaic period')
(77418, 'Body Sherd: Bands on Interior', 'Archaic period')
(77422

#### 4. List artifact titles ordered by accession year in descending order

In [25]:
cursor.execute("""
    SELECT title, accessionyear
    FROM artifact_metadata
    WHERE accessionyear IS NOT NULL
    ORDER BY accessionyear DESC;
""")
results = cursor.fetchall()
print("Artifact titles ordered by accession year (newest first):")
for row in results:
    print(row)


Artifact titles ordered by accession year (newest first):
('Purchasing Fruit [A French Mulatress Purchasing Fruit from a Negro Wench]', 2023)
('Women of Color Bathing and a Voyeur [Mulatresses and Negro Woman Bathing]', 2023)
('Fragments of a Kantharos (drinking cup): Komast Dancers', 2023)
('Fragments of a Siana Cup: Procession of Men with Drinking Horns', 2023)
('Francophone Women of Color and a Young Attendant, Dominica [French Mulatresses of Dominica in their Proper Dress]', 2023)
('Two Women of Color, Dominica [French Mulatress of Dominica and a Negro Woman in their Proper Dress]', 2023)
('Mrs. (Asa) Samuel Hammond (Sarah Dawes Hammond)', 2022)
('(Asa) Samuel Hammond', 2022)
('The Moon (constructed)', 2020)
('Derek Bjorkman, Perkins School', 2016)
('Facing History Class, Boston Latin School', 2016)
('Head of a Bodhisattva', 2016)
('Still Life with Oranges, Pomegranates, Grapes, Chestnuts, Medlars and Ears of Wheat', 2015)
('Nahan (Chinese, Luohan) Riding a Spotted White Deer in a 

#### 5. How many artifacts are there per department

In [26]:
cursor.execute("""
SELECT department, COUNT(*) AS artifact_count
FROM artifact_metadata
GROUP BY department
ORDER BY artifact_count DESC;
""")
results = cursor.fetchall()
print("Number of artifacts per department:")
for department, count in results:
    print(f"{department}: {count}")


Number of artifacts per department:
Busch-Reisinger Museum: 3541
Department of Asian Art: 2494
Department of Ancient and Byzantine Art & Numismatics: 2404
Department of Photographs: 2045
Department of Paintings, Sculpture & Decorative Arts: 1075
Department of Modern & Contemporary Art: 474
Department of American Paintings, Sculpture & Decorative Arts: 228
Department of Islamic & Later Indian Art: 160
Harvard University Portrait Collection: 41
Department of Drawings: 35
Straus Center for Conservation and Technical Studies: 2
Department of Prints: 1


### artifact_media Table

#### 6. Which artifacts have more than 1 image

In [27]:
cursor.execute("""
SELECT m.title, md.imagecount
FROM artifact_metadata m
JOIN artifact_media md ON m.id = md.objectid
WHERE md.imagecount > 1;
""")
results = cursor.fetchall()
print("Artifacts with more than 1 image:")
for title, imagecount in results:
    print(f"Title: {title}, Images: {imagecount}")

Artifacts with more than 1 image:
Title: 23 fragments of Greek pottery - black varnish, Images: 23
Title: CASTS FROM ARRETINE POTTERY AT THE MUSEUM OF FINE ARTS, BOSTON, Images: 3
Title: Body Sherd: Black Gloss, Images: 2
Title: Rim Sherd: Part of tondo, Images: 2
Title: Body Sherd: From Cup, Images: 2
Title: Body Sherd: Part of Tondo (?), Images: 2
Title: Rim Sherd: Black Gloss, Images: 2
Title: Body Sherd: Face of Woman Looking Left, Images: 2
Title: Body Sherd: Part of Garment (?), Images: 2
Title: Body Sherd: From Cup, Images: 2
Title: 27 Ceramic Fragments, Images: 27
Title: Body Sherd: Palmette, Images: 2
Title: Body Sherd: Black Gloss, Images: 2
Title: Body Sherd: Dot Register, Images: 2
Title: Body Sherd: Tendrils, Images: 2
Title: Body Sherd: Black gloss, Images: 2
Title: Kylix Fragment: Frontal Torso and Right Hand, Images: 2
Title: Body Sherd: Black-Figure Decoration, Images: 2
Title: Rim Sherd: Part of Woman's Head, Images: 2
Title: Body Sherd: Waist of Warrior, Images: 2
Ti

#### 7. What is the average rank of all artifacts

In [28]:
cursor.execute("""
SELECT AVG(rank_value) AS average_rank
FROM artifact_media
WHERE rank_value IS NOT NULL;
""")
result = cursor.fetchone()
print(f"Average rank of all artifacts: {result[0]}")

Average rank of all artifacts: 139514.2169


#### 8. Which artifacts have a higher colorcount than mediacount

In [29]:
cursor.execute("""
SELECT m.title, am.colorcount, am.mediacount
FROM artifact_media am
JOIN artifact_metadata m ON am.objectid = m.id
WHERE am.colorcount > am.mediacount;
""")
results = cursor.fetchall()
print("Artifacts where color count is higher than media count:")
for title, colorcount, mediacount in results:
    print(f"Title: {title}, Color Count: {colorcount}, Media Count: {mediacount}")


Artifacts where color count is higher than media count:
Title: Fragments to Standing Madonna and Child, Color Count: 10, Media Count: 0
Title: One of three fragments of geometric ware from Argive Heraeum, Color Count: 9, Media Count: 0
Title: One of three fragments of geometric ware from Argive Heraeum, Color Count: 8, Media Count: 0
Title: MARKED ROME, PALATINE STEPD OF CAVES., Color Count: 9, Media Count: 0
Title: Fragment of Etruscan bucchero ware, Color Count: 2, Media Count: 0
Title: Twenty Fragments from Mount Kyeryong, Color Count: 7, Media Count: 0
Title: Apulian Krater Fragment in Gnathian Style, Color Count: 10, Media Count: 0
Title: Fragment of red clay vase, rippled surface, Color Count: 9, Media Count: 0
Title: 23 fragments of Greek pottery - black varnish, Color Count: 9, Media Count: 0
Title: Fragment of coarse black pottery, Color Count: 9, Media Count: 0
Title: Half of shallow bowl, Color Count: 10, Media Count: 0
Title: Apulian Krater Fragment in Gnathian Style with P

#### 9. List all artifacts created between 1500 and 1600

In [30]:
cursor.execute("""
    SELECT title, datebegin, dateend
    FROM artifact_metadata m
    JOIN artifact_media me ON m.id = me.objectid
    WHERE (me.datebegin BETWEEN 1500 AND 1600)
       OR (me.dateend BETWEEN 1500 AND 1600);
""")
results = cursor.fetchall()
print("Artifacts created between 1500 and 1600:")
for row in results:
    print(row)


Artifacts created between 1500 and 1600:
('Fragments to Standing Madonna and Child', 1515, 1525)
('Blossoming Branch of an Old, Weathered Plum Tree', 1600, 1700)
('The Blessed Lorenzo Giustiniani', 1495, 1505)
('Allegorical Portrait of a Young Man in the Guise of Mercury Slaying Argus', 1575, 1580)
('Portrait of Madonna', 1595, 1605)
('Virgin and Child with Saints Sebastian, Francis, John the Baptist, Jerome, an unidentified female saint, Saint  Anthony of Padua and two donors', 1510, 1520)
('Carved Altarpiece with Madonna and Child in Glory, St. Erasmus, St. Catherine; the Twelve Apostles; St. Anthony, St. Anne with Madonna and Child, St. Vitus, St. Margaret, St. Sebastian', 1524, 1524)
("A Gathering of Scholars in a Garden ('Touhu tu')", 1550, 1599)
('Illustrated Scroll of Bunshō, the Honest Man (Bunshō emaki)', 1600, 1732)
('Illustrated Scroll of Bunshō, the Honest Man (Bunshō emaki)', 1600, 1732)
('Illustrated Scroll of Bunshō, the Honest Man (Bunshō emaki)', 1600, 1732)
('Origin o

#### 10. How many artifacts have no media files

In [31]:
cursor.execute("""
    SELECT COUNT(*)
    FROM artifact_media
    WHERE mediacount = 0 OR mediacount IS NULL;
""")
no_media_count = cursor.fetchone()[0]
print(f"Artifacts with no media files: {no_media_count}")


Artifacts with no media files: 12470


### artifact_colors Table

#### 11. What are all the distinct hues used in the dataset

In [32]:
cursor.execute("""
    SELECT DISTINCT hue
    FROM artifact_colors
    WHERE hue IS NOT NULL
    ORDER BY hue;
""")
results = cursor.fetchall()
print("Distinct hues:")
for row in results:
    print(row[0])


Distinct hues:
Black
Blue
Brown
Green
Grey
Orange
Red
Violet
White
Yellow


#### 12. What are the top 5 most used colors by frequency

In [33]:
cursor.execute("""
SELECT color, COUNT(*) AS frequency
FROM artifact_colors
WHERE color IS NOT NULL
GROUP BY color
ORDER BY frequency DESC
LIMIT 5;
""")
results = cursor.fetchall()
print("Top 5 most used colors:")
for color, freq in results:
    print(f"{color}: {freq}")


Top 5 most used colors:
#c8c8c8: 5637
#afafaf: 5233
#e1e1e1: 5108
#fafafa: 4203
#969696: 3942


#### 13. What is the average coverage percentage for each hue

In [34]:
cursor.execute("""
    SELECT hue, AVG(percent) AS avg_percent
    FROM artifact_colors
    WHERE percent IS NOT NULL
    GROUP BY hue
    ORDER BY avg_percent DESC;
""")
results = cursor.fetchall()
print("Average coverage percentage per hue:")
for hue, avg in results:
    print(f"{hue}: {avg:.2f}%")


Average coverage percentage per hue:
White: 0.25%
Black: 0.18%
Green: 0.17%
Orange: 0.15%
Violet: 0.13%
Grey: 0.12%
Yellow: 0.09%
Brown: 0.09%
Blue: 0.08%
Red: 0.07%


#### 14. List all colors used for a given artifact ID

In [None]:
artifact_id = int(input("Enter Artifact ID: "))
cursor.execute("""
SELECT color, hue, percent, spectrum, css3
FROM artifact_colors
WHERE objectid = %s;
""", (artifact_id,))
results = cursor.fetchall()
print(f"Colors used for artifact ID {artifact_id}:")
for color, hue, percent, spectrum, css3 in results:
    print(f"{color} | Hue: {hue} | Percent: {percent} | Spectrum: {spectrum} | CSS3: {css3}")

#### 15. What is the total number of color entries in the dataset

In [None]:
cursor.execute("""
SELECT COUNT(*) 
FROM artifact_colors;
""")
total_colors = cursor.fetchone()[0]
print(f"Total number of color entries in the dataset: {total_colors}")


### Join-Based Queries

#### 16. List artifact titles and hues for all artifacts belonging to the Byzantine culture.

In [None]:
cursor.execute("""
SELECT m.title, c.hue
FROM artifact_metadata m
JOIN artifact_colors c ON m.id = c.objectid
WHERE m.culture = 'Byzantine';
""")
results = cursor.fetchall()
print("Artifacts from Byzantine culture with their hues:")
for row in results:
    print(f"Title: {row[0]}, Hue: {row[1]}")


#### 17. List each artifact title with its associated hues. 

In [None]:
cursor.execute("""
SELECT md.title, GROUP_CONCAT(ac.hue SEPARATOR ', ') AS hues
FROM artifact_metadata md
JOIN artifact_colors ac ON md.id = ac.objectid
GROUP BY md.title;
""")
results = cursor.fetchall()
print("Artifact titles with their associated hues:")
for title, hues in results:
    print(f"{title}: {hues}")


#### 18. Get artifact titles, cultures, and media ranks where the period is not null.

In [None]:
cursor.execute("""
SELECT md.title, md.culture, am.rank_value
FROM artifact_metadata md
JOIN artifact_media am ON md.id = am.objectid
WHERE md.period IS NOT NULL;
""")
results = cursor.fetchall()
for title, culture, rank in results:
    print(f"{title} | Culture: {culture} | Rank: {rank}")

#### 19. Find artifact titles ranked in the top 10 that include the color hue "Grey".

In [None]:
cursor.execute("""
SELECT md.title, am.rank_value, ac.hue
FROM artifact_metadata md
JOIN artifact_media am ON md.id = am.objectid
JOIN artifact_colors ac ON md.id = ac.objectid
WHERE ac.hue = 'Grey'
ORDER BY am.rank_value ASC
LIMIT 10;
""")
results = cursor.fetchall()
print("Top 10 ranked artifacts with hue 'Grey':")
for title, rank, hue in results:
    print(f"{title} | Rank: {rank} | Hue: {hue}")


#### 20. How many artifacts exist per classification, and what is the average media count for each?

In [None]:
cursor.execute("""
SELECT md.classification, 
       COUNT(*) AS artifact_count, 
       AVG(am.mediacount) AS avg_media_count
FROM artifact_metadata md
JOIN artifact_media am ON md.id = am.objectid
GROUP BY md.classification
ORDER BY artifact_count DESC;
""")
results = cursor.fetchall()
print("Artifacts per classification and average media count:")
for classification, count, avg_media in results:
    print(f"{classification}: Count = {count}, Average Media Count = {avg_media:.2f}")


### Extra SQL queries

#### 21. Check how many records you have in each table

In [None]:
tables = ['artifact_metadata', 'artifact_media', 'artifact_colors']
for table in tables:
    cursor.execute(f"SELECT COUNT(*) FROM {table};")
    print(f"{table}: {cursor.fetchone()[0]} rows")

#### 22.  Find the most common classifications of artifacts

In [None]:
cursor.execute("""
    SELECT classification, COUNT(*) AS count
    FROM artifact_metadata
    GROUP BY classification
    ORDER BY count DESC
    LIMIT 10;
""")
print("Top 10 classifications:")
for row in cursor.fetchall():
    print(row)

#### 23. How many artifacts belong to each century

In [None]:
cursor.execute("""
    SELECT century, COUNT(*) AS count
    FROM artifact_metadata
    GROUP BY century
    ORDER BY count DESC
    LIMIT 10;
""")
print("Artifacts per century:")
for row in cursor.fetchall():
    print(row)

#### 24. Show top artifacts with the highest number of images

In [None]:
cursor.execute("""
    SELECT m.title, me.imagecount
    FROM artifact_metadata m
    JOIN artifact_media me ON m.id = me.objectid
    ORDER BY me.imagecount DESC
    LIMIT 10;
""")
print("Top 10 artifacts by image count:")
for row in cursor.fetchall():
    print(row)

#### 25.  Find most frequent colors

In [None]:
cursor.execute("""
    SELECT color, COUNT(*) AS count
    FROM artifact_colors
    GROUP BY color
    ORDER BY count DESC
    LIMIT 10;
""")
print("Top colors:")
for row in cursor.fetchall():
    print(row)