In [64]:
import gzip            
import io              
import numpy as np
import pandas as pd    
import pymysql.cursors 
import rdflib
from rdflib import Namespace
import urllib.request 
import math

import matplotlib.pyplot as plt
plt.style.use('ggplot')

%matplotlib inline

# Connect to the database

connection = pymysql.connect(host='hosting.nyu.edu',
                             user='cmrougha_adsq',
                             password='###REPLACE###',
                             db='cmrougha_adsq2017',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

# RDF and Queries

Search for the header 'Some Queries' to skip to the queries run on the resultant RDF graph.

Initial work experimented with converting a number of different tables from the SQL database into RDF, but this notebook focuses on one table newly created this week, `17.3.9_pinAstr_all`. This table combines all information drawn from Pinakes with new, randomly generated identifiers (columns starting with `astrID_`) for each author, work, witness of that work, and manuscript. This allows for easier work with the hundreds of non-Little Astronomy texts that have been included in the database (formerly only Little Astronomy texts and authors had received unique identifiers).

This table, `17.3.9_pinAstr_all`, is not at all in any normal forms, but the notebook uses various SQL queries to pull out columns with distinct values that are then input into the RDF graph.

The full table has 2,871 columns and 26 rows. A sample of the table is below:

In [65]:
sql = """
SELECT * FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,witness,astrID_wi,MS,astrID_ms,Pays,Ville,Depot,Fonds,Cote,Folios,...,Identifiant,Recension,BHG,DateLit.,Siècle,Date,Commentaire,Contenu,Rev.,Bibliographies
0,6769_11734,4512229,11734,26250,Magyarország,Budapest,Országos Széchényi Könyvtár,fonds principal,4° Gr. 01 (Kubinyi 10),002-30*,...,,,,,16,,,,,
1,1484_11734,4739938,11734,26250,Magyarország,Budapest,Országos Széchényi Könyvtár,fonds principal,4° Gr. 01 (Kubinyi 10),030-53v*,...,,,,,16,,,,,
2,913_11734,9876248,11734,26250,Magyarország,Budapest,Országos Széchényi Könyvtár,fonds principal,4° Gr. 01 (Kubinyi 10),066v-69v,...,,,,,16,,,,,
3,969_11734,6792546,11734,26250,Magyarország,Budapest,Országos Széchényi Könyvtár,fonds principal,4° Gr. 01 (Kubinyi 10),"074v-140 , p. 280-288",...,,,,,16,,,,,
4,12764_11895,3068424,11895,34044,United Kingdom,Cambridge,Queen's College Libr.,fonds principal,19,001-233,...,,,,,16,,Libb 1-9,,,


## A Function to convert to RDF

In [66]:
def convertToRDF(g,df,primaryKey):
    pseudo_triples = pd.melt(df,id_vars=primaryKey)
    for t in pseudo_triples.iterrows():
        for resourceKey in resourceSyntax:
            if primaryKey in resourceSyntax[resourceKey]:
                s = rdflib.URIRef(resourceKey + t[1][primaryKey])
    
        for verbKey in verbs:
            if t[1]['variable'] in verbs[verbKey]:
                p = rdflib.URIRef(verbKey)
            
        g.add((s,rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),rdflib.URIRef('http://www.w3.org/2004/02/skos/core#Concept')))

        if t[1]['value'] == None or t[1]['value'] == '':
            pass
            #print(t)
        elif isinstance(t[1]['value'], float) and math.isnan(t[1]['value']):
            pass
            #print(t)
        else:
            if t[1]['variable'] in literals:
                if t[1]['variable'] in languages:
                    o = rdflib.Literal(t[1]['value'],lang=languages[t[1]['variable']])
                else:
                    o = rdflib.Literal(t[1]['value'])
                g.add((s,p,o))
            
            elif t[1]['variable'] in numerics:
                o = rdflib.Literal(int(t[1]['value']))
                g.add((s,p,o))
    
            elif t[1]['variable'] in resources:
                for resourceKey in resourceSyntax:
                    if t[1]['variable'] in resourceSyntax[resourceKey]:
                        o = rdflib.URIRef(resourceKey + str(t[1]['value']))
                        g.add((s,p,o))
        
            else:
                print('Error: Unknown whether %s is a literal or resource.' % t[1]['variable'])
    return g
    
    

# RDF Conversion

## Geographic Information

### MS to Institution

In [67]:
sql = """
SELECT DISTINCT astrID_ms, REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Depot,')',''),'(',''),"'",""),'.',''),',',''),' ','') AS institution
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,astrID_ms,institution
0,26250,OrszágosSzéchényiKönyvtár
1,34044,QueensCollegeLibr
2,65283,TrinityCollege
3,48846,TrinityCollege
4,34813,UniversityLibrary


In [68]:
# astrID_ms, Depot from `17.3.9_pinAstr_all`

literals = list()

resources = list()
resources.append('astrID_ms')
resources.append('institution')

numerics = list()

verbs = dict()
verbs['http://www.w3.org/2004/02/skos/core#prefLabel'] = list()

verbs['http://example.org/vocab/locatedIn'] = list()
verbs['http://example.org/vocab/locatedIn'].append('institution')

resourceSyntax = dict()
resourceSyntax['http://example.org/id/'] = list()
resourceSyntax['http://example.org/id/'].append('astrID_ms')
resourceSyntax['http://example.org/id/'].append('institution')

In [69]:
g = rdflib.Graph()

In [70]:
primaryKey = 'astrID_ms'

g = convertToRDF(g,df,primaryKey)

In [71]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/54405,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
1,http://example.org/id/62037,http://example.org/vocab/locatedIn,http://example.org/id/BibliotheekderRijksunive...
2,http://example.org/id/57839,http://example.org/vocab/locatedIn,http://example.org/id/TopkapiSarayiMüzesiKütüp...
3,http://example.org/id/65456,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
4,http://example.org/id/68396,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept


### Institution to Pinakes Label

In [72]:
sql = """
SELECT DISTINCT Depot, REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Depot,')',''),'(',''),"'",""),'.',''),',',''),' ','') AS institution
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,Depot,institution
0,Országos Széchényi Könyvtár,OrszágosSzéchényiKönyvtár
1,Queen's College Libr.,QueensCollegeLibr
2,Trinity College,TrinityCollege
3,University Library,UniversityLibrary
4,"Thirlestaine House, coll. Phillipps",ThirlestaineHousecollPhillipps


In [73]:
# Depot from `17.3.9_pinAstr_all`

literals.append('Depot')

verbs['http://www.w3.org/2004/02/skos/core#prefLabel'].append('Depot')

languages = dict()

In [74]:
primaryKey = 'institution'

g = convertToRDF(g,df,primaryKey)

In [75]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/54405,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
1,http://example.org/id/62037,http://example.org/vocab/locatedIn,http://example.org/id/BibliotheekderRijksunive...
2,http://example.org/id/57839,http://example.org/vocab/locatedIn,http://example.org/id/TopkapiSarayiMüzesiKütüp...
3,http://example.org/id/65456,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
4,http://example.org/id/68396,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept


### Institution to City

In [76]:
sql = """
SELECT DISTINCT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Ville,')',''),'(',''),"'",""),'.',''),',',''),' ','') AS city, REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Depot,')',''),'(',''),"'",""),'.',''),',',''),' ','') AS institution
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,city,institution
0,Budapest,OrszágosSzéchényiKönyvtár
1,Cambridge,QueensCollegeLibr
2,Cambridge,TrinityCollege
3,Cambridge,UniversityLibrary
4,Cheltenham,ThirlestaineHousecollPhillipps


In [77]:
# Depot, Ville from `17.3.9_pinAstr_all`

resources.append('city')

verbs['http://example.org/vocab/locatedIn'].append('city')

resourceSyntax['http://example.org/id/'].append('city')

In [78]:
primaryKey = 'institution'

g = convertToRDF(g,df,primaryKey)

In [79]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/BibliotecacomunaledegliI...,http://example.org/vocab/locatedIn,http://example.org/id/Siena
1,http://example.org/id/54405,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
2,http://example.org/id/62037,http://example.org/vocab/locatedIn,http://example.org/id/BibliotheekderRijksunive...
3,http://example.org/id/57839,http://example.org/vocab/locatedIn,http://example.org/id/TopkapiSarayiMüzesiKütüp...
4,http://example.org/id/65456,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept


### City to Pinakes Label

In [80]:
sql = """
SELECT DISTINCT Ville, REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Ville,')',''),'(',''),"'",""),'.',''),',',''),' ','') AS city
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,Ville,city
0,Budapest,Budapest
1,Cambridge,Cambridge
2,Cheltenham,Cheltenham
3,Chicago (IL),ChicagoIL
4,Cologny (Genève),ColognyGenève


In [81]:
# Ville from `17.3.9_pinAstr_all`

literals.append('Ville')

verbs['http://www.w3.org/2004/02/skos/core#prefLabel'].append('Ville')

languages = dict()

In [82]:
primaryKey = 'city'

g = convertToRDF(g,df,primaryKey)

In [83]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/BibliotecacomunaledegliI...,http://example.org/vocab/locatedIn,http://example.org/id/Siena
1,http://example.org/id/54405,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
2,http://example.org/id/62037,http://example.org/vocab/locatedIn,http://example.org/id/BibliotheekderRijksunive...
3,http://example.org/id/57839,http://example.org/vocab/locatedIn,http://example.org/id/TopkapiSarayiMüzesiKütüp...
4,http://example.org/id/65456,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept


### City to Country

In [84]:
sql = """
SELECT DISTINCT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Ville,')',''),'(',''),"'",""),'.',''),',',''),' ','') AS city, REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Pays,')',''),'(',''),"'",""),'.',''),',',''),' ','') AS country
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,city,country
0,Budapest,Magyarország
1,Cambridge,UnitedKingdom
2,Cheltenham,UnitedKingdom
3,ChicagoIL,UnitedStatesofAmerica
4,ColognyGenève,Suisse


In [85]:
# Pays, Ville from `17.3.9_pinAstr_all`

resources.append('country')

verbs['http://example.org/vocab/locatedIn'].append('country')

resourceSyntax['http://example.org/id/'].append('country')

In [86]:
primaryKey = 'city'

g = convertToRDF(g,df,primaryKey)

In [87]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/BibliotecacomunaledegliI...,http://example.org/vocab/locatedIn,http://example.org/id/Siena
1,http://example.org/id/54405,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
2,http://example.org/id/62037,http://example.org/vocab/locatedIn,http://example.org/id/BibliotheekderRijksunive...
3,http://example.org/id/68396,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
4,http://example.org/id/65456,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept


### Country to Pinakes Label

In [88]:
sql = """
SELECT DISTINCT Pays, REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Pays,')',''),'(',''),"'",""),'.',''),',',''),' ','') AS country
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,Pays,country
0,Magyarország,Magyarország
1,United Kingdom,UnitedKingdom
2,United States of America,UnitedStatesofAmerica
3,Suisse,Suisse
4,Hellas,Hellas


In [89]:
# Ville from `17.3.9_pinAstr_all`

literals.append('Pays')

verbs['http://www.w3.org/2004/02/skos/core#prefLabel'].append('Pays')


In [90]:
primaryKey = 'country'

g = convertToRDF(g,df,primaryKey)

In [91]:
result = g.query(
        """SELECT * WHERE {
            ?1s <http://www.w3.org/2004/02/skos/core#prefLabel> ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,3o
0,http://example.org/id/LincolnCollege,Lincoln College
1,http://example.org/id/Milano,Milano
2,http://example.org/id/Madrid,Madrid
3,http://example.org/id/Berlin,Berlin
4,http://example.org/id/BibliotecaUniversitaria,Biblioteca Universitaria


## The Manuscripts

### Astr to Pinakes ID and Label

* Perhaps adjust SQL query to give not Pinakes 'ID' but Pinakes URLs instead.

In [92]:
sql = """
SELECT DISTINCT astrID_ms, MS, CONCAT(Fonds, ' ', Cote) AS shelfmark
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,astrID_ms,MS,shelfmark
0,26250,11734,fonds principal 4° Gr. 01 (Kubinyi 10)
1,34044,11895,fonds principal 19
2,65283,12000,fonds principal O.05.15 (1296)
3,48846,12028,fonds principal O.10.12 (1464)
4,34813,12151,Fonds ancien Dd. III. 86 (173)


In [93]:
# MS, astrID_ms, Fonds, Cote from `17.3.9_pinAstr_all`

literals.append('MS')
literals.append('shelfmark')

verbs['http://example.org/vocab/pinakesLabel'] = list()
verbs['http://example.org/vocab/pinakesLabel'].append('shelfmark')

verbs['http://example.org/vocab/pinakesID'] = list()
verbs['http://example.org/vocab/pinakesID'].append('MS')

In [94]:
primaryKey = 'astrID_ms'

g = convertToRDF(g,df,primaryKey)

In [95]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/BibliotecacomunaledegliI...,http://example.org/vocab/locatedIn,http://example.org/id/Siena
1,http://example.org/id/54405,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
2,http://example.org/id/68396,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
3,http://example.org/id/62037,http://example.org/vocab/locatedIn,http://example.org/id/BibliotheekderRijksunive...
4,http://example.org/id/65456,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept


### Manuscript to Witness

In [96]:
sql = """
SELECT DISTINCT astrID_ms, astrID_wi
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,astrID_ms,astrID_wi
0,26250,4512229
1,26250,4739938
2,26250,9876248
3,26250,6792546
4,34044,3068424


In [97]:
# astrID_ms, astrID_wi from `17.3.9_pinAstr_all`

resources.append('astrID_wi')

verbs['http://example.org/vocab/msContains'] = list()
verbs['http://example.org/vocab/msContains'].append('astrID_wi')

resourceSyntax['http://example.org/id/'].append('astrID_wi')

In [98]:
primaryKey = 'astrID_ms'

g = convertToRDF(g,df,primaryKey)

In [99]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/80751,http://example.org/vocab/msContains,http://example.org/id/8252583
1,http://example.org/id/65456,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
2,http://example.org/id/93364,http://example.org/vocab/msContains,http://example.org/id/5459803
3,http://example.org/id/63517,http://example.org/vocab/msContains,http://example.org/id/4945563
4,http://example.org/id/41625,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept


In [100]:
# REVERSE
# astrID_ms, astrID_wi from `17.3.9_pinAstr_all`

verbs['http://example.org/vocab/inMS'] = list()
verbs['http://example.org/vocab/inMS'].append('astrID_ms')


In [101]:
primaryKey = 'astrID_wi'

g = convertToRDF(g,df,primaryKey)

In [102]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/3814921,http://example.org/vocab/inMS,http://example.org/id/40084
1,http://example.org/id/65456,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
2,http://example.org/id/1616905,http://example.org/vocab/inMS,http://example.org/id/84028
3,http://example.org/id/93364,http://example.org/vocab/msContains,http://example.org/id/5459803
4,http://example.org/id/63517,http://example.org/vocab/msContains,http://example.org/id/4945563


### Witness to Pinakes ID, Folios, and Century

* note: witness is not a real Pinakes ID but was constructed from MS and Work. maybe remove later.

In [103]:
sql = """
SELECT DISTINCT astrID_wi, witness, Folios, `Siècle`
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,astrID_wi,witness,Folios,Siècle
0,4512229,6769_11734,002-30*,16
1,4739938,1484_11734,030-53v*,16
2,9876248,913_11734,066v-69v,16
3,6792546,969_11734,"074v-140 , p. 280-288",16
4,3068424,12764_11895,001-233,16


In [104]:
# MS, astrID_ms, Fonds, Cote from `17.3.9_pinAstr_all`

literals.append('witness')
literals.append('Folios')
literals.append('Siècle')

verbs['http://example.org/vocab/pinakesID'].append('witness')

verbs['http://example.org/vocab/pinakesFolios'] = list()
verbs['http://example.org/vocab/pinakesFolios'].append('Folios')

verbs['http://example.org/vocab/pinakesCentury'] = list()
verbs['http://example.org/vocab/pinakesCentury'].append('Siècle')

In [105]:
primaryKey = 'astrID_wi'

g = convertToRDF(g,df,primaryKey)

In [106]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/1248448,http://example.org/vocab/pinakesFolios,055v-61v*
1,http://example.org/id/3814921,http://example.org/vocab/inMS,http://example.org/id/40084
2,http://example.org/id/93364,http://example.org/vocab/msContains,http://example.org/id/5459803
3,http://example.org/id/1854084,http://example.org/vocab/pinakesID,12764_66260
4,http://example.org/id/2556153,http://example.org/vocab/inMS,http://example.org/id/69719


### Witness to Text

In [107]:
sql = """
SELECT DISTINCT astrID_wi, astrID_wo
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,astrID_wi,astrID_wo
0,4512229,963140
1,4739938,698551
2,9876248,709263
3,6792546,473477
4,3068424,697381


In [108]:
# astrID_wo, astrID_wi from `17.3.9_pinAstr_all`

resources.append('astrID_wo')

verbs['http://example.org/vocab/witnessOf'] = list()
verbs['http://example.org/vocab/witnessOf'].append('astrID_wo')

resourceSyntax['http://example.org/id/'].append('astrID_wo')

In [109]:
primaryKey = 'astrID_wi'

g = convertToRDF(g,df,primaryKey)

In [110]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/1248448,http://example.org/vocab/pinakesFolios,055v-61v*
1,http://example.org/id/3814921,http://example.org/vocab/inMS,http://example.org/id/40084
2,http://example.org/id/93364,http://example.org/vocab/msContains,http://example.org/id/5459803
3,http://example.org/id/1854084,http://example.org/vocab/pinakesID,12764_66260
4,http://example.org/id/2556153,http://example.org/vocab/inMS,http://example.org/id/69719


In [111]:
# REVERSE
# astrID_wo, astrID_wi from `17.3.9_pinAstr_all`

verbs['http://example.org/vocab/hasWitness'] = list()
verbs['http://example.org/vocab/hasWitness'].append('astrID_wi')

In [112]:
primaryKey = 'astrID_wo'

g = convertToRDF(g,df,primaryKey)

In [113]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/1248448,http://example.org/vocab/pinakesFolios,055v-61v*
1,http://example.org/id/3814921,http://example.org/vocab/inMS,http://example.org/id/40084
2,http://example.org/id/93364,http://example.org/vocab/msContains,http://example.org/id/5459803
3,http://example.org/id/1854084,http://example.org/vocab/pinakesID,12764_66260
4,http://example.org/id/2556153,http://example.org/vocab/inMS,http://example.org/id/69719


## Texts and Authors
### astrID_wo to Pinakes Info, astrID_au

In [114]:
sql = """
SELECT DISTINCT astrID_wo, workID, Oeuvre, astrID_au
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,astrID_wo,workID,Oeuvre,astrID_au
0,963140,6769,Hermippus Siue De Astrologia,8487
1,698551,1484,Commentaria in Ptolemaei canones,3114
2,709263,913,Canon urbium insignium,8572
3,473477,969,"Canones Prompti (manuale : Ed. Heiberg Ii, P. ...",8572
4,697381,12764,Syntaxis mathematica (Almageste) (Lib. I-XIII ...,8572


In [115]:
# astrID_wo, workID, Oeuvre, astrID_au from `17.3.9_pinAstr_all`

literals.append('workID')
literals.append('Oeuvre')

resources.append('astrID_au')

verbs['http://example.org/vocab/pinakesID'].append('workID')

verbs['http://example.org/vocab/pinakesLabel'].append('Oeuvre')

verbs['http://purl.org/dc/elements/1.1/creator'] = list()
verbs['http://purl.org/dc/elements/1.1/creator'].append('astrID_au')

resourceSyntax['http://example.org/id/'].append('astrID_au')

In [116]:
primaryKey = 'astrID_wo'

g = convertToRDF(g,df,primaryKey)

In [117]:
result = g.query(
        """SELECT * WHERE {
            ?1s ?2p ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,2p,3o
0,http://example.org/id/1248448,http://example.org/vocab/pinakesFolios,055v-61v*
1,http://example.org/id/212037,http://purl.org/dc/elements/1.1/creator,http://example.org/id/2092
2,http://example.org/id/3814921,http://example.org/vocab/inMS,http://example.org/id/40084
3,http://example.org/id/93364,http://example.org/vocab/msContains,http://example.org/id/5459803
4,http://example.org/id/1854084,http://example.org/vocab/pinakesID,12764_66260


### Author to Pinakes Info

In [118]:
sql = """
SELECT DISTINCT astrID_au, AuthorID, Auteur
 FROM `17.3.9_pinAstr_all`
 """
with connection.cursor() as cursor:
    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()
df = pd.DataFrame(result, columns = names)

df.head()

Unnamed: 0,astrID_au,AuthorID,Auteur
0,8487,635,Iohannes Catrones
1,3114,2723,Theon Alexandrinus
2,8572,2416,Ptolemaeus Claudius
3,6152,857,Damianus
4,1282,2785,Triclinius Demetrius


In [119]:
# astrID_au, AuthorID, Auteur from `17.3.9_pinAstr_all`

literals.append('AuthorID')
literals.append('Auteur')

verbs['http://example.org/vocab/pinakesID'].append('AuthorID')

verbs['http://example.org/vocab/pinakesLabel'].append('Auteur')

In [120]:
primaryKey = 'astrID_au'

g = convertToRDF(g,df,primaryKey)

In [121]:
result = g.query(
        """SELECT * WHERE {
            ?1s <http://example.org/vocab/pinakesLabel> ?3o .
                }""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,1s,3o
0,http://example.org/id/4611,Catalogi bibliothecarum
1,http://example.org/id/619979,Daphnis et Chloe
2,http://example.org/id/641010,Opera
3,http://example.org/id/924275,Hom In Assumptionem Domini
4,http://example.org/id/6934,Antigonus Carystius


## Some Queries

Finding all texts attributed to Euclid in the Pinakes database.

In [122]:
result = g.query(
        """SELECT * WHERE {
            ?au <http://example.org/vocab/pinakesLabel> 'Euclides' .
            ?wo <http://purl.org/dc/elements/1.1/creator> ?au .
            ?wo <http://example.org/vocab/pinakesLabel> ?la .
                }""")

pd.DataFrame(result.bindings)

Unnamed: 0,au,la,wo
0,http://example.org/id/5215,Optica,http://example.org/id/154837
1,http://example.org/id/5215,Opera,http://example.org/id/132659
2,http://example.org/id/5215,Phaenomena,http://example.org/id/721862
3,http://example.org/id/5215,Catoptrica (spur.),http://example.org/id/544877
4,http://example.org/id/5215,Introductio harmonica,http://example.org/id/360545
5,http://example.org/id/5215,Data,http://example.org/id/613097
6,http://example.org/id/5215,Elementa,http://example.org/id/777790
7,http://example.org/id/5215,Sectio canonis,http://example.org/id/425216
8,http://example.org/id/5215,Epigramma,http://example.org/id/422006


Count how many texts by a certain author (below Euclid) appears in each manuscript.

In [123]:
pinakes_authorName = 'Euclides'
# For this query it is necessary to know how the author is named
# in the Pinakes database.

result = g.query(
        """
        PREFIX ex: <http://example.org/vocab/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        SELECT ?ms ?shelfmark (COUNT(*) AS ?c) WHERE {
            ?au ex:pinakesLabel '%s' .
            ?wo dc:creator ?au ;
                ex:pinakesLabel ?l_wo ;
                ex:hasWitness/ex:inMS ?ms .
            ?ms ex:pinakesLabel ?l_ms ;
                ex:locatedIn/skos:prefLabel ?l_inst .
            BIND(CONCAT(?l_inst, " ", ?l_ms) AS ?shelfmark)
                }
                GROUP BY ?ms
                ORDER BY DESC(?c)
                """ % pinakes_authorName)

pd.DataFrame(result.bindings)

Unnamed: 0,c,ms,shelfmark
0,6,http://example.org/id/60423,Staatsbibliothek zu Berlin (Preussischer Kultu...
1,5,http://example.org/id/38820,Biblioteca Angelica gr. 95
2,5,http://example.org/id/54144,Biblioteca comunale degli Intronati fonds prin...
3,5,http://example.org/id/53234,Biblioteca Ambrosiana fonds principal A 101 su...
4,5,http://example.org/id/51986,Bibliothèque nationale de France (BNF) gr. 2347
5,5,http://example.org/id/27520,Biblioteca Apostolica Vaticana Vat. gr. 192
6,5,http://example.org/id/38870,Bibliotheek der Rijksuniversiteit BPG 7
7,5,http://example.org/id/48620,Bibliothèque nationale de France (BNF) gr. 2342
8,4,http://example.org/id/10018,Biblioteca Universitaria fonds principal 2048
9,4,http://example.org/id/17320,Biblioteca Apostolica Vaticana Vat. gr. 191


In [124]:
pinakes_authorName = 'Euclides'

result = g.query(
        """
        PREFIX ex: <http://example.org/vocab/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        SELECT ?au ?ms ?shelfmark ?c WHERE {
        {
        SELECT ?au ?ms ?shelfmark (COUNT(*) AS ?c)  WHERE {
            ?au ex:pinakesLabel 'Euclides' .
            ?wo dc:creator ?au ;
                ex:pinakesLabel ?l_wo ;
                ex:hasWitness/ex:inMS ?ms .
            ?ms ex:pinakesLabel ?l_ms ;
                ex:locatedIn/skos:prefLabel ?l_inst .
            BIND(CONCAT(?l_inst, " ", ?l_ms) AS ?shelfmark)
            BIND('Euclides' AS ?au)
                }
                GROUP BY ?ms
                ORDER BY DESC(?c)
                LIMIT 1
        } UNION {
        SELECT ?au ?ms ?shelfmark (COUNT(*) AS ?c)  WHERE {
            ?au ex:pinakesLabel 'Theodosius Tripolita' .
            ?wo dc:creator ?au ;
                ex:pinakesLabel ?l_wo ;
                ex:hasWitness/ex:inMS ?ms .
            ?ms ex:pinakesLabel ?l_ms ;
                ex:locatedIn/skos:prefLabel ?l_inst .
            BIND(CONCAT(?l_inst, " ", ?l_ms) AS ?shelfmark)
            BIND('Theodosius Tripolita' AS ?au)
                }
                GROUP BY ?ms
                ORDER BY DESC(?c)
                LIMIT 1
        } UNION {
        SELECT ?au ?ms ?shelfmark (COUNT(*) AS ?c)  WHERE {
            ?au ex:pinakesLabel 'Autolycus astronomus' .
            ?wo dc:creator ?au ;
                ex:pinakesLabel ?l_wo ;
                ex:hasWitness/ex:inMS ?ms .
            ?ms ex:pinakesLabel ?l_ms ;
                ex:locatedIn/skos:prefLabel ?l_inst .
            BIND(CONCAT(?l_inst, " ", ?l_ms) AS ?shelfmark)
            BIND('Autolycus astronomus' AS ?au)
                }
                GROUP BY ?ms
                ORDER BY DESC(?c)
                LIMIT 1
        } UNION {
        SELECT ?au ?ms ?shelfmark (COUNT(*) AS ?c)  WHERE {
            ?au ex:pinakesLabel 'Ptolemaeus Claudius' .
            ?wo dc:creator ?au ;
                ex:pinakesLabel ?l_wo ;
                ex:hasWitness/ex:inMS ?ms .
            ?ms ex:pinakesLabel ?l_ms ;
                ex:locatedIn/skos:prefLabel ?l_inst .
            BIND(CONCAT(?l_inst, " ", ?l_ms) AS ?shelfmark)
            BIND('Ptolemaeus Claudius' AS ?au)
                }
                GROUP BY ?ms
                ORDER BY DESC(?c)
                LIMIT 1
        }
        }
                """ )

pd.DataFrame(result.bindings)

Unnamed: 0,au,c,ms,shelfmark
0,Euclides,6,http://example.org/id/60423,Staatsbibliothek zu Berlin (Preussischer Kultu...
1,Theodosius Tripolita,4,http://example.org/id/40159,Bibliothèque nationale de France (BNF) suppl. ...
2,Autolycus astronomus,3,http://example.org/id/54144,Biblioteca comunale degli Intronati fonds prin...
3,Ptolemaeus Claudius,6,http://example.org/id/92514,Bodleian Library Cromwell 12


Find all manuscripts containing at least one text from any given grouping of texts. The case below is the 11 Little Astronomy texts. `pinID`s provided to check (`pinakes.irht.cnrs.fr/notices/cote/?pinID/` -- replace `?pinID` with the given number).

In [125]:
pinakes_works = list()
pinakes_works = ('154837','544877','613097','721862','791205','926906','314341','966647','708053','399200','559345','150679')
# Above are the 11 LA works (technically 12 because of Theon's recension of Euclid's Optics)

query = """
PREFIX ex: <http://example.org/vocab/>
        PREFIX exi: <http://example.org/id/>
        PREFIX dc: <http://purl.org/dc/terms/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        
        SELECT ?ms (COUNT(*) AS ?c) ?pinID WHERE {
"""

for work in pinakes_works:
    if work == pinakes_works[0]:
        query = query + """
            {
                SELECT ?ms ?wit ?pinID WHERE {
                    ?wit ex:inMS ?ms ;
                         ex:witnessOf exi:%s ;
                         ex:inMS/ex:pinakesID ?pinID . 
                }
            }
        """ % work
    else:
        query = query + """ UNION
            {
                SELECT ?ms ?wit ?pinID WHERE {
                    ?wit ex:inMS ?ms ;
                         ex:witnessOf exi:%s ;
                         ex:inMS/ex:pinakesID ?pinID . 
                }
            }
        """ % work
        
query = query + """
        }
        GROUP BY ?ms
        ORDER BY DESC(?c)
        """




result = g.query(query)

pd.DataFrame(result.bindings)

Unnamed: 0,c,ms,pinID
0,11,http://example.org/id/53234,42210
1,11,http://example.org/id/84028,66447
2,11,http://example.org/id/17320,66822
3,11,http://example.org/id/48620,51974
4,9,http://example.org/id/51392,66835
5,8,http://example.org/id/89276,66833
6,8,http://example.org/id/14930,51998
7,7,http://example.org/id/19133,66834
8,7,http://example.org/id/54144,58281
9,6,http://example.org/id/40159,52784


Count how many texts appear in each manuscript. `pinID`s provided to check (`pinakes.irht.cnrs.fr/notices/cote/?pinID/` -- replace `?pinID` with the given number).

In [126]:
result = g.query(
        """
        PREFIX ex: <http://example.org/vocab/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        SELECT ?ms ?pinID ?shelfmark (COUNT(*) AS ?c) WHERE {
            ?wo dc:creator ?au ;
                ex:pinakesLabel ?l_wo ;
                ex:hasWitness/ex:inMS ?ms .
            ?ms ex:pinakesLabel ?l_ms ;
                ex:locatedIn/skos:prefLabel ?l_inst ;
                ex:pinakesID ?pinID .
            BIND(CONCAT(?l_inst, " ", ?l_ms) AS ?shelfmark)
                }
                GROUP BY ?ms
                ORDER BY DESC(?c)
                """)

pd.DataFrame(result.bindings)

Unnamed: 0,c,ms,pinID,shelfmark
0,138,http://example.org/id/63517,14971,Real Biblioteca fonds principal ?. I. 13 (Andr...
1,67,http://example.org/id/81231,47363,Bodleian Library Barocci 76
2,57,http://example.org/id/37794,52051,Bibliothèque nationale de France (BNF) gr. 2419
3,55,http://example.org/id/58834,65435,Biblioteca Apostolica Vaticana Ottob. gr. 192
4,54,http://example.org/id/57004,68511,Biblioteca Apostolica Vaticana Vat. gr. 1882
5,47,http://example.org/id/33726,52013,Bibliothèque nationale de France (BNF) gr. 2381
6,45,http://example.org/id/40321,68527,Biblioteca Apostolica Vaticana Vat. gr. 1898
7,38,http://example.org/id/82731,52183,Bibliothèque nationale de France (BNF) gr. 2551
8,37,http://example.org/id/52320,66547,Biblioteca Apostolica Vaticana Urb. gr. 80
9,34,http://example.org/id/17320,66822,Biblioteca Apostolica Vaticana Vat. gr. 191
