It might be useful to run 'pip3 install &lt;library&gt; --upgrade" to make sure you have latest versions of pandas, matplotlib, etc.

In [1]:
import gzip            
import io              
import numpy as np
import pandas as pd    
import pymysql.cursors 
import rdflib
from rdflib import Namespace
import urllib.request  

import matplotlib.pyplot as plt
plt.style.use('ggplot')

%matplotlib inline

In [2]:
# Connect to the database
# I will email you the file sebastia_adsq_params.txt . It's just a four line text file.
# Put that in directory and the following line will read it.
db_params = open("sebastia_adsqro_params.txt").read().split()

In [3]:
# Connect to the database

connection = pymysql.connect(host=db_params[0],
                             user=db_params[1],
                             password=db_params[2],
                             db=db_params[3],
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

## Turn 'ramphs' into Triples

In [4]:
sql = "select * from ramphs"

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = pd.DataFrame(result, columns = names)
df.head()

Unnamed: 0,id,title,label,pleiades,welch,golvin,type,capacity,modcountry,province,...,certainty,youtube,extmajor,extminor,arenamajor,arenaminor,latitude,longitude,elevation,pleiades_id
0,duraEuroposAmphitheater,Amphitheater at Dura Europos,Dura,http://pleiades.stoa.org/places/893989,,129.0,amphitheater,1000.0,Syria,syria,...,,,50.0,44.0,31.0,25.0,34.749855,40.728926,223,893989
1,arlesAmphitheater,Amphitheater at Arles,Arles,http://pleiades.stoa.org/places/148217,,154.0,amphitheater,20000.0,France,narbonensis,...,,https://www.youtube.com/watch?v=oCz-76hb1LU,136.0,107.0,47.0,32.0,43.677778,4.631111,21,148217
2,lyonAmphitheater,Amphitheater at Lyon,Lyon,http://pleiades.stoa.org/places/167717,,,amphitheater,20000.0,France,lugdunensis,...,,,105.0,,67.6,42.0,45.770556,4.830556,206,167717
3,ludusMagnusArena,Ludus Magnus Arena,Ludus Magnus,http://pleiades.stoa.org/places/423025,,,practice-arena,,Italy,,...,,,,,,,41.88995,12.494913,22,423025
4,romeFlavianAmphitheater,Flavian Amphitheater at Rome,Colosseum,http://pleiades.stoa.org/places/423025,,152.0,amphitheater,50000.0,Italy,,...,,https://www.youtube.com/watch?v=kxwenPo1grc,189.0,156.0,83.0,48.0,41.890169,12.492269,22,423025


In [5]:
pseudo_triples = pd.melt(df,id_vars='id')
pseudo_triples.replace('', np.nan, inplace=True)
pseudo_triples.dropna(inplace=True)
# pseudo_triples.head()

In [6]:
g = rdflib.Graph()
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
EXclass = Namespace("http://example.org/class/")
EXid = Namespace("http://example.org/id/")
EXproperty = Namespace("http://example.org/property/")

In [7]:
for t in pseudo_triples.iterrows():
    s = rdflib.URIRef('http://example.org/id/%s' % t[1]['id'])
    p = rdflib.URIRef('http://example.org/property/%s' % t[1]['variable'])

    
    g.add((s,rdflib.URIRef(RDF.type),EXclass.amphitheater))
    if t[1]['variable'] in ['arenamajor','arenaminor','capacity','elevation','extmajor','extminor','longitude','latitude']:
        o = rdflib.Literal(float(t[1]['value']))
        g.add((s,p,o))
    elif t[1]['variable'] in ['pleiades']:
        o = rdflib.URIRef(t[1]['value'])
        g.add((s,p,o))
    else:
        o = rdflib.Literal(str(t[1]['value']))
        g.add((s,p,o))

In [8]:
result = g.query(
        """SELECT * WHERE {
            ?s <http://example.org/property/capacity> ?o .
                } """)
# pd.DataFrame(result.bindings)

## Turn 'chronogroups' into Triples

In [9]:
sql = "select * from ramphs_chronogrps"

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = pd.DataFrame(result, columns = names)
# df.head()

In [10]:
df.rename(index=str, columns={"id": "label"}, inplace = True)
newcol = df['label'].replace(r' ', '', regex = True)
df = df.assign(idd = newcol)
df.rename(index=str, columns={"idd": "id"}, inplace = True)
# df.head()

In [11]:
pseudo_triples = pd.melt(df,id_vars='id').sort_values('id')
pseudo_triples.replace('', np.nan, inplace=True)
pseudo_triples.dropna(inplace=True)

for t in pseudo_triples.iterrows():
    s = rdflib.URIRef('http://example.org/id/%s' % t[1]['id'])
    p = rdflib.URIRef('http://example.org/property/%s' % t[1]['variable'])
    
    g.add((s,RDF.type,EXclass.chronogrp))
    if t[1]['variable'] in ['start','end']:
        o = rdflib.Literal(float(t[1]['value']))
        g.add((s,p,o))
    elif t[1]['variable'] in ['']:
        o = rdflib.URIRef(t[1]['value'])
        g.add((s,p,o))
    else:
        o = rdflib.Literal(str(t[1]['value']))
        g.add((s,p,o))

In [12]:
result = g.query(
        """SELECT ?p ?o WHERE {
            <http://example.org/id/FirstCentury> ?p ?o .
                } LIMIT 10""")
# pd.DataFrame(result.bindings)

In [13]:
result = g.query(
        """SELECT * WHERE {
            ?ramph a <http://example.org/class/amphitheater> ;
                    <http://example.org/property/chronogrp> ?rchrono .
                    
            ?chronogrp <http://example.org/property/label> ?rchrono ;
                       <http://example.org/property/end> ?end
                } ORDER BY ?end""")

pd.DataFrame(result.bindings).head()

Unnamed: 0,chronogrp,end,ramph,rchrono
0,http://example.org/id/Caesarean,-44.0,http://example.org/id/espejoAmphitheater,Caesarean
1,http://example.org/id/Caesarean,-44.0,http://example.org/id/carmonaAmphitheater,Caesarean
2,http://example.org/id/Caesarean,-44.0,http://example.org/id/corinthAmphitheater,Caesarean
3,http://example.org/id/Republican,-31.0,http://example.org/id/pozzuoliEarlyAmphitheater,Republican
4,http://example.org/id/Republican,-31.0,http://example.org/id/sutriumAmphitheater,Republican


In [14]:
g.serialize(destination='ramphs.ttl', format='turtle')