In [1]:
"""
Wikidata Ancient Writings Query Script
All Q-IDs verified on December 26, 2025
"""
import requests
import time
import pandas as pd

ENDPOINT = "https://query.wikidata.org/sparql"

# CORRECTED AND VERIFIED Q-IDs
TYPES = {
    "inscription": "Q1640824",           # ✓ words, texts, lettering, or symbols marked on a work
    "clay tablet": "Q1570005",           # ✓ writing implement (cuneiform)
    "manuscript": "Q87167",              # ✓ document written by hand
    "codex": "Q213924",                  # ✓ book with handwritten content
    "illuminated manuscript": "Q48498",  # ✓ manuscript with decoration
    "papyrus": "Q125576",                # ✓ ancient writing material from papyrus plant
    "oracle bone": "Q283127",            # ✓ CORRECTED: pieces of ox scapula or turtle plastron
    "bamboo and wooden slips": "Q905725",# ✓ CORRECTED: writing medium in ancient China
    "ostracon": "Q834459",               # ✓ CORRECTED: broken piece of pottery with inscription
    "stele": "Q178743",                  # ✓ CORRECTED: stone or wooden slab for funerals/commemorative
    "scroll": "Q16355570",               # ✓ CORRECTED: roll of papyrus, parchment, or paper
    "parchment": "Q226697",              # ✓ animal skin processed for writing
    "palm-leaf manuscript": "Q1641020",  # ✓ manuscripts made out of dried palm leaves
    "birch bark manuscript": "Q865595",  # ✓ CORRECTED: birch bark scroll
    "wax tablet": "Q1428312",            # ✓ CORRECTED: tablets of wood filled with wax
    "written work": "Q47461344",         # ✓ any work expressed in writing
    "literary work": "Q7725634",         # ✓ written work read for enjoyment
    "book": "Q571",                      # ✓ medium for recording information
}

def count(qid):
    query = f"SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{ ?item wdt:P31/wdt:P279* wd:{qid} . }}"
    r = requests.get(ENDPOINT, params={"query": query, "format": "json"}, timeout=120)
    return int(r.json()["results"]["bindings"][0]["count"]["value"])

results = []
for name, qid in TYPES.items():
    print(f"Querying {name}...", end=" ", flush=True)
    try:
        c = count(qid)
        print(f"{c:,}")
        results.append({"type": name, "qid": qid, "count": c})
    except Exception as e:
        print(f"TIMEOUT/ERROR")
        results.append({"type": name, "qid": qid, "count": None})
    time.sleep(1)

df = pd.DataFrame(results).sort_values("count", ascending=False, na_position='last')
print("\n" + "=" * 60)
print("RESULTS TABLE")
print("=" * 60)
print(df.to_string(index=False))

Querying inscription... 9,496
Querying clay tablet... 2,137
Querying manuscript... 190,297
Querying codex... 530
Querying illuminated manuscript... 1,618
Querying papyrus... 32
Querying oracle bone... 12,845
Querying bamboo and wooden slips... 9
Querying ostracon... 449
Querying stele... 11,500
Querying scroll... 0
Querying parchment... 12
Querying palm-leaf manuscript... 1,118
Querying birch bark manuscript... 11
Querying wax tablet... 36
Querying written work... TIMEOUT/ERROR
Querying literary work... 1,347,367
Querying book... 54,536

RESULTS TABLE
                   type       qid     count
          literary work  Q7725634 1347367.0
             manuscript    Q87167  190297.0
                   book      Q571   54536.0
            oracle bone   Q283127   12845.0
                  stele   Q178743   11500.0
            inscription  Q1640824    9496.0
            clay tablet  Q1570005    2137.0
 illuminated manuscript    Q48498    1618.0
   palm-leaf manuscript  Q1641020    1118.0
  

In [5]:
#!/usr/bin/env python3
import requests
import time
import pandas as pd

ENDPOINT = "https://query.wikidata.org/sparql"

FORMATS = [
    ("literary work", "Q7725634"),
    ("manuscript", "Q87167"),
    ("book", "Q571"),
    ("oracle bone", "Q283127"),
    ("stele", "Q178743"),
    ("inscription", "Q1640824"),
    ("clay tablet", "Q1570005"),
    ("illuminated manuscript", "Q48498"),
    ("palm-leaf manuscript", "Q1641020"),
    ("codex", "Q213924"),
    ("ostracon", "Q834459"),
    ("wax tablet", "Q1428312"),
    ("papyrus", "Q125576"),
    ("parchment", "Q226697"),
    ("birch bark manuscript", "Q865595"),
    ("bamboo and wooden slips", "Q905725"),
    ("scroll", "Q16355570"),
    ("tablet", "Q16744570"),
    ("Writing surface", "Q3327760"),
]

def query_total(qid):
    """Query total count for a QID"""
    sparql = f"""
    SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{
      ?item wdt:P31/wdt:P279* wd:{qid} .
    }}"""
    r = requests.get(ENDPOINT, params={"query": sparql}, 
                     headers={"Accept": "application/sparql-results+json"}, timeout=180)
    if r.ok:
        v = r.json()["results"]["bindings"][0].get("count", {}).get("value")
        return int(v) if v else 0
    return None

def query_before_1800(qid):
    """Query count before 1800 for a QID"""
    sparql = f"""
    SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{
      ?item wdt:P31/wdt:P279* wd:{qid} .
      OPTIONAL {{ ?item wdt:P577 ?pubDate }}
      OPTIONAL {{ ?item wdt:P571 ?incDate }}
      BIND(COALESCE(?pubDate, ?incDate) AS ?date)
      FILTER(YEAR(?date) < 1800)
    }}"""
    r = requests.get(ENDPOINT, params={"query": sparql}, 
                     headers={"Accept": "application/sparql-results+json"}, timeout=180)
    if r.ok:
        v = r.json()["results"]["bindings"][0].get("count", {}).get("value")
        return int(v) if v else 0
    return None

# Collect data
data = []
print(f"{'Name':<30} {'QID':<12} {'Total':>10} {'Pre-1800':>10}")
print("="*65)

for name, qid in FORMATS:
    total = query_total(qid)
    time.sleep(2)  # Be nice to the API
    
    before_1800 = query_before_1800(qid)
    time.sleep(2)  # Be nice to the API
    
    print(f"{name:<30} {qid:<12} {total:>10} {before_1800:>10}")
    
    data.append({
        'name': name,
        'id': qid,
        'total_count': total,
        'total_count_before_1800': before_1800
    })

# Create DataFrame
df = pd.DataFrame(data)

print("\n" + "="*65)
print("DATAFRAME:")
print("="*65)
print(df.to_string(index=False))

# Optionally save to CSV


Name                           QID               Total   Pre-1800
literary work                  Q7725634        1347369      18709
manuscript                     Q87167           190297      30886
book                           Q571              54746       2052
oracle bone                    Q283127           12845          1
stele                          Q178743           11500       1712
inscription                    Q1640824           9496       1039
clay tablet                    Q1570005           2137         77
illuminated manuscript         Q48498             1618       1031
palm-leaf manuscript           Q1641020           1118         16
codex                          Q213924             530        313
ostracon                       Q834459             449         95
wax tablet                     Q1428312             36          8
papyrus                        Q125576              32          7
parchment                      Q226697              12          6
birch bark

In [12]:
#!/usr/bin/env python3
import requests
import time
import pandas as pd

ENDPOINT = "https://query.wikidata.org/sparql"

FORMATS = [
    ("literary work", "Q7725634"),
    ("manuscript", "Q87167"),
    ("book", "Q571"),
    ("oracle bone", "Q283127"),
    ("stele", "Q178743"),
    ("inscription", "Q1640824"),
    ("clay tablet", "Q1570005"),
    ("illuminated manuscript", "Q48498"),
    ("palm-leaf manuscript", "Q1641020"),
    ("codex", "Q213924"),
    ("ostracon", "Q834459"),
    ("wax tablet", "Q1428312"),
    ("papyrus", "Q125576"),
    ("parchment", "Q226697"),
    ("birch bark manuscript", "Q865595"),
    ("bamboo and wooden slips", "Q905725"),
    ("scroll", "Q16355570"),
    ("tablet", "Q16744570"),
    ("Writing surface", "Q3327760"),
]

def query_total(qid):
    """Query total count for a QID"""
    sparql = f"""
    SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{
      ?item wdt:P31/wdt:P279* wd:{qid} .
    }}"""
    try:
        r = requests.get(ENDPOINT, params={"query": sparql}, 
                         headers={"Accept": "application/sparql-results+json"}, timeout=180)
        if r.ok:
            v = r.json()["results"]["bindings"][0].get("count", {}).get("value")
            return int(v) if v else 0
    except Exception as e:
        print(f"Error in query_total: {e}")
    return 0

def query_with_date(qid):
    """Query count with inception or publication date for a QID"""
    sparql = f"""
    SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{
      ?item wdt:P31/wdt:P279* wd:{qid} .
      {{ ?item wdt:P577 ?pubDate }} UNION {{ ?item wdt:P571 ?incDate }}
    }}"""
    try:
        r = requests.get(ENDPOINT, params={"query": sparql}, 
                         headers={"Accept": "application/sparql-results+json"}, timeout=180)
        if r.ok:
            v = r.json()["results"]["bindings"][0].get("count", {}).get("value")
            return int(v) if v else 0
    except Exception as e:
        print(f"Error in query_with_date: {e}")
    return 0

def query_before_1800(qid):
    """Query count before 1800 for a QID"""
    sparql = f"""
    SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{
      ?item wdt:P31/wdt:P279* wd:{qid} .
      OPTIONAL {{ ?item wdt:P577 ?pubDate }}
      OPTIONAL {{ ?item wdt:P571 ?incDate }}
      BIND(COALESCE(?pubDate, ?incDate) AS ?date)
      FILTER(YEAR(?date) < 1800)
    }}"""
    try:
        r = requests.get(ENDPOINT, params={"query": sparql}, 
                         headers={"Accept": "application/sparql-results+json"}, timeout=180)
        if r.ok:
            v = r.json()["results"]["bindings"][0].get("count", {}).get("value")
            return int(v) if v else 0
    except Exception as e:
        print(f"Error in query_before_1800: {e}")
    return 0

def query_wikisource_before_1800(qid):
    """Query count with Wikisource link before 1800 for a QID"""
    sparql = f"""
    SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{
      ?item wdt:P31/wdt:P279* wd:{qid} .
      ?item ?wikisource ?article .
      FILTER(CONTAINS(STR(?wikisource), "wikisource"))
      OPTIONAL {{ ?item wdt:P577 ?pubDate }}
      OPTIONAL {{ ?item wdt:P571 ?incDate }}
      BIND(COALESCE(?pubDate, ?incDate) AS ?date)
      FILTER(YEAR(?date) < 1800)
    }}"""
    try:
        r = requests.get(ENDPOINT, params={"query": sparql}, 
                         headers={"Accept": "application/sparql-results+json"}, timeout=180)
        if r.ok:
            v = r.json()["results"]["bindings"][0].get("count", {}).get("value")
            return int(v) if v else 0
    except Exception as e:
        print(f"Error in query_wikisource_before_1800: {e}")
    return 0

# Collect data
data = []
print(f"{'Name':<30} {'QID':<12} {'Total':>10} {'With Date':>10} {'Pre-1800':>10} {'%':>8} {'Wikisrc':>8}")
print("="*100)

for name, qid in FORMATS:
    print(f"Processing {name}...", end=" ")
    
    total = query_total(qid)
    time.sleep(2)
    
    with_date = query_with_date(qid)
    time.sleep(2)
    
    before_1800 = query_before_1800(qid)
    time.sleep(2)
    
    wikisource = query_wikisource_before_1800(qid)
    time.sleep(2)
    
    # Calculate percentage
    percentage = (before_1800 / with_date * 100) if with_date > 0 else 0
    
    print(f"\r{name:<30} {qid:<12} {total:>10} {with_date:>10} {before_1800:>10} {percentage:>7.2f}% {wikisource:>8}")
    
    data.append({
        'name': name,
        'id': qid,
        'total_count': total,
        'has_inception_or_publication_date': with_date,
        'total_count_before_1800': before_1800,
        'percentage_pre_1800': round(percentage, 2),
        'wikisource_before_1800': wikisource
    })

# Create DataFrame
df = pd.DataFrame(data)

print("\n" + "="*100)
print("DATAFRAME:")
print("="*100)
print(df.to_string(index=False))


print("\nDataFrame saved to 'wikidata_results.csv'")

Name                           QID               Total  With Date   Pre-1800        %  Wikisrc
literary work                  Q7725634        1347370     362042      18709    5.17%        0
manuscript                     Q87167           190297      40348      30886   76.55%        0
book                           Q571              54746      35925       2052    5.71%        0
oracle bone                    Q283127           12845          1          1  100.00%        0
stele                          Q178743           11500       2131       1712   80.34%        0
inscription                    Q1640824           9496       1468       1039   70.78%        0
clay tablet                    Q1570005           2137         77         77  100.00%        0
illuminated manuscript         Q48498             1618       1083       1031   95.20%        0
palm-leaf manuscript           Q1641020           1118        178         16    8.99%        0
codex                          Q213924            

In [20]:
df.sort_values('total_count_before_1800', ascending=False).set_index('name')

Unnamed: 0_level_0,id,total_count,has_inception_or_publication_date,total_count_before_1800,percentage_pre_1800,wikisource_before_1800
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
manuscript,Q87167,190297,40348,30886,76.55,0
literary work,Q7725634,1347370,362042,18709,5.17,0
book,Q571,54746,35925,2052,5.71,0
stele,Q178743,11500,2131,1712,80.34,0
inscription,Q1640824,9496,1468,1039,70.78,0
illuminated manuscript,Q48498,1618,1083,1031,95.2,0
codex,Q213924,530,334,313,93.71,0
Writing surface,Q3327760,2931,410,273,66.59,0
tablet,Q16744570,2431,185,170,91.89,0
ostracon,Q834459,449,216,95,43.98,0


## Libvrary identifiers

In [26]:
#!/usr/bin/env python3
"""
Query Wikidata to count literary works/manuscripts with identifiers
from historical text databases, filtered by date before 1800.
"""

import requests
import time
import pandas as pd

SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"

# Historical text database identifiers: "Name": "Property ID"
# ALL VERIFIED against Wikidata
IDENTIFIERS = {
    "ESTC": "P3939",                 # English Short Title Catalogue ID
    "Project Gutenberg": "P2034",    # Project Gutenberg ebook ID
    "VIAF": "P214",                  # Virtual International Authority File
    "GND": "P227",                   # Gemeinsame Normdatei (German)
    "BnF": "P268",                   # Bibliothèque nationale de France
    "Library of Congress": "P244",   # Library of Congress authority ID
    "ISNI": "P213",                  # International Standard Name Identifier
    "Open Library": "P648",          # Open Library ID
    "Internet Archive": "P724",      # Internet Archive ID
    "HathiTrust": "P1844",           # HathiTrust ID
    "Google Books": "P675",          # Google Books ID
    "DBNL": "P723",                  # Digitale Bibliotheek Nederlandse Letteren
    "BNE": "P950",                   # Biblioteca Nacional de España
    "ICCU": "P396",                  # Istituto Centrale Catalogo Unico (Italy)
    "Europeana": "P7704",            # Europeana entity
    "Perseus": "P7041",              # Perseus author ID
    "TLG": "P3576",                  # Thesaurus Linguae Graecae author ID
}

# Item types to filter for
ITEM_TYPES = """
  { ?item wdt:P31/wdt:P279* wd:Q7725634. }   # literary work
  UNION { ?item wdt:P31/wdt:P279* wd:Q87167. }    # manuscript

"""

def query(sparql):
    """Execute SPARQL query."""
    headers = {"Accept": "application/sparql-results+json", "User-Agent": "Bot/1.0"}
    r = requests.get(SPARQL_ENDPOINT, params={"query": sparql}, headers=headers, timeout=120)
    r.raise_for_status()
    return int(r.json()["results"]["bindings"][0]["count"]["value"])

def count_total(prop):
    """Count all items with identifier."""
    return query(f"SELECT (COUNT(?item) AS ?count) WHERE {{ ?item wdt:{prop} ?id. }}")

def count_pre1800(prop):
    """Count literary works/manuscripts with identifier and date < 1800."""
    return query(f"""
        SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{
          ?item wdt:{prop} ?id.
          {ITEM_TYPES}
          {{ ?item wdt:P577 ?date. }} UNION {{ ?item wdt:P571 ?date. }}
          FILTER(YEAR(?date) < 1800)
        }}
    """)

def count_pre1800_with_wikisource(prop):
    """Count literary works/manuscripts with identifier, date < 1800, AND Wikisource link."""
    return query(f"""
        SELECT (COUNT(DISTINCT ?item) AS ?count) WHERE {{
          ?item wdt:{prop} ?id.
          {ITEM_TYPES}
          {{ ?item wdt:P577 ?date. }} UNION {{ ?item wdt:P571 ?date. }}
          FILTER(YEAR(?date) < 1800)
          ?article schema:about ?item ;
                   schema:isPartOf/wikibase:wikiGroup "wikisource" .
        }}
    """)

# Run queries
results = []
for name, prop in IDENTIFIERS.items():
    print(f"Querying {name} ({prop})...")
    try:
        total = count_total(prop)
        time.sleep(1)
        pre1800 = count_pre1800(prop)
        time.sleep(1)
        time.sleep(1)
        results.append({"name": name, "property": prop, "total": total, "pre1800": pre1800})
        print(f"  Total: {total:,} | Pre-1800: {pre1800:,}")
    except Exception as e:
        print(f"  Error: {e}")
        results.append({"name": name, "property": prop, "total": 0, "pre1800": 0, "wikisource": 0})

# Create DataFrame
df = pd.DataFrame(results)
df = df.sort_values("pre1800", ascending=False).set_index("name")

print("\n" + "="*60)
print(df)
print("="*60)

df.to_csv("wikidata_library_counts.csv")
print("\nSaved to wikidata_library_counts.csv")

Querying ESTC (P3939)...
  Total: 263 | Pre-1800: 10
Querying Project Gutenberg (P2034)...
  Total: 4,051 | Pre-1800: 158
Querying VIAF (P214)...
  Total: 4,418,266 | Pre-1800: 3,040
Querying GND (P227)...
  Total: 2,688,504 | Pre-1800: 1,719
Querying BnF (P268)...
  Total: 742,074 | Pre-1800: 1,329
Querying Library of Congress (P244)...
  Total: 1,718,591 | Pre-1800: 1,053
Querying ISNI (P213)...
  Total: 2,323,318 | Pre-1800: 0
Querying Open Library (P648)...
  Total: 475,689 | Pre-1800: 444
Querying Internet Archive (P724)...
  Total: 139,194 | Pre-1800: 692
Querying HathiTrust (P1844)...
  Total: 14,937 | Pre-1800: 60
Querying Google Books (P675)...
  Error: 504 Server Error: Gateway Timeout for url: https://query.wikidata.org/sparql?query=%0A++++++++SELECT+%28COUNT%28DISTINCT+%3Fitem%29+AS+%3Fcount%29+WHERE+%7B%0A++++++++++%3Fitem+wdt%3AP675+%3Fid.%0A++++++++++%0A++%7B+%3Fitem+wdt%3AP31%2Fwdt%3AP279%2A+wd%3AQ7725634.+%7D+++%23+literary+work%0A++UNION+%7B+%3Fitem+wdt%3AP31%2Fwdt%3A

Unnamed: 0_level_0,property,total,pre1800,wikisource
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
VIAF,P214,4418266,3040,
GND,P227,2688504,1719,
BnF,P268,742074,1329,
Library of Congress,P244,1718591,1053,
Internet Archive,P724,139194,692,
Open Library,P648,475689,444,
BNE,P950,300778,327,
Project Gutenberg,P2034,4051,158,
HathiTrust,P1844,14937,60,
ESTC,P3939,263,10,


df