It might be useful to run 'pip3 install &lt;library&gt; --upgrade" to make sure you have latest versions of pandas, matplotlib, etc.

In [None]:
import gzip            
import io              
import numpy as np
import pandas as pd    
import pymysql.cursors 
import rdflib
from rdflib import Namespace
import urllib.request  

import matplotlib.pyplot as plt
plt.style.use('ggplot')

%matplotlib inline

In [None]:
# Connect to the database
# I will email you the file sebastia_adsq_params.txt . It's just a four line text file.
# Put that in directory and the following line will read it.
db_params = open("sebastia_adsqro_params.txt").read().split()

In [None]:
# Connect to the database

connection = pymysql.connect(host=db_params[0],
                             user=db_params[1],
                             password=db_params[2],
                             db=db_params[3],
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

## Turn 'ramphs' into Triples

In [None]:
sql = "select * from ramphs"

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = pd.DataFrame(result, columns = names)
df.head()

In [None]:
pseudo_triples = pd.melt(df,id_vars='id')
pseudo_triples.replace('', np.nan, inplace=True)
pseudo_triples.dropna(inplace=True)
# pseudo_triples.head()

In [None]:
g = rdflib.Graph()
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
EXclass = Namespace("http://example.org/class/")
EXid = Namespace("http://example.org/id/")
EXproperty = Namespace("http://example.org/property/")

In [None]:
for t in pseudo_triples.iterrows():
    s = rdflib.URIRef('http://example.org/id/%s' % t[1]['id'])
    p = rdflib.URIRef('http://example.org/property/%s' % t[1]['variable'])

    
    g.add((s,rdflib.URIRef(RDF.type),EXclass.amphitheater))
    if t[1]['variable'] in ['arenamajor','arenaminor','capacity','elevation','extmajor','extminor','longitude','latitude']:
        o = rdflib.Literal(float(t[1]['value']))
        g.add((s,p,o))
    elif t[1]['variable'] in ['pleiades']:
        o = rdflib.URIRef(t[1]['value'])
        g.add((s,p,o))
    else:
        o = rdflib.Literal(str(t[1]['value']))
        g.add((s,p,o))

In [None]:
result = g.query(
        """SELECT * WHERE {
            ?s <http://example.org/property/capacity> ?o .
                } """)
# pd.DataFrame(result.bindings)

## Turn 'chronogroups' into Triples

In [None]:
sql = "select * from ramphs_chronogrps"

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = pd.DataFrame(result, columns = names)
# df.head()

In [None]:
df.rename(index=str, columns={"id": "label"}, inplace = True)
newcol = df['label'].replace(r' ', '', regex = True)
df = df.assign(idd = newcol)
df.rename(index=str, columns={"idd": "id"}, inplace = True)
# df.head()

In [None]:
pseudo_triples = pd.melt(df,id_vars='id').sort_values('id')
pseudo_triples.replace('', np.nan, inplace=True)
pseudo_triples.dropna(inplace=True)

for t in pseudo_triples.iterrows():
    s = rdflib.URIRef('http://example.org/id/%s' % t[1]['id'])
    p = rdflib.URIRef('http://example.org/property/%s' % t[1]['variable'])
    
    g.add((s,RDF.type,EXclass.chronogrp))
    if t[1]['variable'] in ['start','end']:
        o = rdflib.Literal(float(t[1]['value']))
        g.add((s,p,o))
    elif t[1]['variable'] in ['']:
        o = rdflib.URIRef(t[1]['value'])
        g.add((s,p,o))
    else:
        o = rdflib.Literal(str(t[1]['value']))
        g.add((s,p,o))

In [None]:
result = g.query(
        """SELECT ?p ?o WHERE {
            <http://example.org/id/FirstCentury> ?p ?o .
                } LIMIT 10""")
# pd.DataFrame(result.bindings)

In [None]:
result = g.query(
        """SELECT * WHERE {
            ?ramph a <http://example.org/class/amphitheater> ;
                    <http://example.org/property/chronogrp> ?rchrono .
                    
            ?chronogrp <http://example.org/property/label> ?rchrono ;
                       <http://example.org/property/end> ?end
                } ORDER BY ?end""")

pd.DataFrame(result.bindings).head()

In [None]:
g.serialize(destination='ramphs.ttl', format='turtle')