In [432]:
import gzip            
import io              
import numpy as np
import pandas as pd    
import pymysql.cursors 
import rdflib
from rdflib import Namespace
import urllib.request 
import math

import matplotlib.pyplot as plt
plt.style.use('ggplot')

%matplotlib inline

# Connect to the database

connection = pymysql.connect(host='hosting.nyu.edu',
                             user='cmrougha_adsq',
                             password='###REPLACE###',
                             db='cmrougha_adsq2017',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

# Date

In [433]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = pd.DataFrame(result, columns = names)
print(len(df))
df.head()

4727


Unnamed: 0,astrID_wi,DateLit,Siecle,Date
0,9331503,,16,Fragmentum
1,9593102,,10,925
2,8490393,,19,1898
3,5911876,,19,1868
4,7772967,,19,1855


## Date: Simplest Case (925)

In [434]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(Date AS DECIMAL(4,0)) AS hasExactDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '% %' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = pd.DataFrame(result, columns = names)
print(len(df))
df.head()

2413


Unnamed: 0,astrID_wi,DateLit,Siecle,Date,hasExactDate
0,9593102,,10,925,925
1,8490393,,19,1898,1898
2,5911876,,19,1868,1868
3,7772967,,19,1855,1855
4,9792452,,19,1855,1855


## Date: Simple Range (1849-1853)

In [435]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(SUBSTRING_INDEX(Date,'-',1) AS DECIMAL(4,0)) AS hasExactStartDate, CAST(SUBSTRING_INDEX(Date,'-',-1) AS DECIMAL(4,0)) AS hasExactEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE '%-%' )
AND ( Date NOT LIKE '% %' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

3558


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasExactDate,hasExactEndDate,hasExactStartDate
0,925,,10,9593102,925,,
1,1898,,19,8490393,1898,,
2,1868,,19,5911876,1868,,
3,1855,,19,7772967,1855,,
4,1855,,19,9792452,1855,,


In [436]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(SUBSTRING_INDEX(Date,'/',1) AS DECIMAL(4,0)) AS hasExactStartDate, CAST(SUBSTRING_INDEX(Date,'/',-1) AS DECIMAL(4,0)) AS hasExactEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '% %' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

3566


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasExactDate,hasExactEndDate,hasExactStartDate
0,925,,10,9593102,925,,
1,1898,,19,8490393,1898,,
2,1868,,19,5911876,1868,,
3,1855,,19,7772967,1855,,
4,1855,,19,9792452,1855,,


In [437]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(SUBSTRING_INDEX(Date,', ',1) AS DECIMAL(4,0)) AS hasExactStartDate, CAST(SUBSTRING_INDEX(Date,', ',-1) AS DECIMAL(4,0)) AS hasExactEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE '%, %' )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

3577


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasExactDate,hasExactEndDate,hasExactStartDate
0,925,,10,9593102,925,,
1,1898,,19,8490393,1898,,
2,1868,,19,5911876,1868,,
3,1855,,19,7772967,1855,,
4,1855,,19,9792452,1855,,


## Date: Simple Approximation (1845 ca OR 1785 ca. OR 1500 circa)

In [438]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(SUBSTRING_INDEX(Date,' ',1) AS DECIMAL(4,0)) AS hasApproxDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE '% ca%' OR Date LIKE '%circa%' )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4367


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasExactDate,hasExactEndDate,hasExactStartDate
0,925,,10,9593102,,925,,
1,1898,,19,8490393,,1898,,
2,1868,,19,5911876,,1868,,
3,1855,,19,7772967,,1855,,
4,1855,,19,9792452,,1855,,


In [439]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(Date,' ',1),'-',1) AS DECIMAL(4,0)) AS hasApproxStartDate, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(Date,' ',1),'-',-1) AS DECIMAL(4,0)) AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE '% ca%' OR Date LIKE '%circa%' )
AND ( Date LIKE '%-%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4484


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate
0,925,,10,9593102,,,,925,,
1,1898,,19,8490393,,,,1898,,
2,1868,,19,5911876,,,,1868,,
3,1855,,19,7772967,,,,1855,,
4,1855,,19,9792452,,,,1855,,


In [440]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(Date,' ',1),'-',1) AS DECIMAL(4,0)) AS hasUncertainStartDate, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(Date,' ',1),'-',-1) AS DECIMAL(4,0)) AS hasUncertainEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE '%-%' )
AND ( Date LIKE '%?%' )
AND ( Date NOT LIKE '%c%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4485


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,
1,1898,,19,8490393,,,,1898,,,,
2,1868,,19,5911876,,,,1868,,,,
3,1855,,19,7772967,,,,1855,,,,
4,1855,,19,9792452,,,,1855,,,,


In [441]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(SUBSTRING_INDEX(Date,' ',1) AS DECIMAL(4,0)) AS hasUncertainDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date NOT LIKE '% c%' )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4569


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [442]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(Siecle,"00") AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(SUBSTRING_INDEX(Date,' ',1) AS DECIMAL(4,0)) AS hasExactEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE "%ante%" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle NOT LIKE "% %" )
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%/%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle NOT LIKE "%?%" )
AND ( Siecle NOT LIKE "%(%" )
AND ( Siecle NOT LIKE "%)%" )
AND ( Siecle NOT LIKE "%i%" )
AND ( Siecle NOT LIKE "%e%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4697


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [443]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Siecle,' ',''),'(1/2)','00'),'(2/2)','50'),'(1/3)','00'),'(2/3)','33'),'(3/3)','66'),'(1/4)','00'),'(2/4)','25'),'(3/4)','50'),'(4/4)','75'), 'mex.', 'mex'), 'mex', '40'), 'in.', 'in'), 'in', '00'), 'med.', 'med'), 'med','40'), 'ex.', 'ex'), 'ex','85') AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(SUBSTRING_INDEX(Date,' ',1) AS DECIMAL(4,0)) AS hasExactEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE "%ante%" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%?%" )

ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4699


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [444]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(SUBSTRING_INDEX(Date,' ',1) AS DECIMAL(4,0)) AS hasExactStartDate,
 CAST(CONCAT(Siecle,"00") AS DECIMAL(4,0)) - 1 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE "%post%" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle NOT LIKE "% %" )
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%/%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle NOT LIKE "%?%" )
AND ( Siecle NOT LIKE "%(%" )
AND ( Siecle NOT LIKE "%)%" )
AND ( Siecle NOT LIKE "%i%" )
AND ( Siecle NOT LIKE "%e%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4701


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [445]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(SUBSTRING_INDEX(Date,' ',1) AS DECIMAL(4,0)) AS hasExactStartDate,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Siecle,' ',''),'(1/2)','49'),'(2/2)','99'),'(1/3)','32'),'(2/3)','65'),'(3/3)','99'),'(1/4)','24'),'(2/4)','49'),'(3/4)','74'),'(4/4)','99'), 'mex.', 'mex'), 'mex', '60'), 'in.', 'in'), 'in', '15'), 'med.', 'med'), 'med','60'), 'ex.', 'ex'), 'ex','99') AS DECIMAL(4,0)) - 100 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE "%post%" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%?%" )

ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4723


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [446]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(Siecle,"00") AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(Date,' ',1),'/',-1) AS DECIMAL(4,0)) AS hasExactEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE "%ante%" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle NOT LIKE "% %" )
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%/%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle NOT LIKE "%?%" )
AND ( Siecle NOT LIKE "%(%" )
AND ( Siecle NOT LIKE "%)%" )
AND ( Siecle NOT LIKE "%i%" )
AND ( Siecle NOT LIKE "%e%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4724


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [447]:
# There is no instance of eg 1488/1489 post in the table
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(Date,' ',1),'/',1) AS DECIMAL(4,0)) AS hasExactStartDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date LIKE "%post%" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

#df = pd.DataFrame(result, columns = names)
#print(len(df))
#df.head()
print("no results")

no results


In [448]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date, Date AS hasExactDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NOT NULL )
AND ( Date != "" )
AND ( Date NOT LIKE '%-%' )
AND ( Date NOT LIKE '% %' )
AND ( Date LIKE '%.%' )
AND ( Date NOT LIKE '%,%' )
AND ( Date NOT LIKE '%?%' )
AND ( Date NOT LIKE '%/%' )
AND ( Date NOT LIKE '%F%' )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

4726


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


# Siecle

In [449]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

#df = pd.DataFrame(result, columns = names)
#print(len(df))
#df.head()

In [450]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(Siecle,"00") AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(CONCAT(Siecle,"00") AS DECIMAL(4,0)) - 1 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle NOT LIKE "% %" )
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%/%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle NOT LIKE "%?%" )
AND ( Siecle NOT LIKE "%(%" )
AND ( Siecle NOT LIKE "%)%" )
AND ( Siecle NOT LIKE "%i%" )
AND ( Siecle NOT LIKE "%e%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

#for index, row in df.iterrows():
#    if len(str(row['hasApproxDateStart'])) < 3 or len(str(row['hasApproxDateStart'])) > 4:
#        print(row)
#    elif len(str(row['hasApproxDateEnd'])) < 3 or len(str(row['hasApproxDateEnd'])) > 4:
#        print(row)

24338


  self._do_get_result()


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [451]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(REPLACE(REPLACE(Siecle,' ?',''),'?',''),"00") AS DECIMAL(4,0)) - 100 AS hasUncertainStartDate,
 CAST(CONCAT(REPLACE(REPLACE(Siecle,' ?',''),'?',''),"00") AS DECIMAL(4,0)) - 1 AS hasUncertainEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%/%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle LIKE "%?%" )
AND ( Siecle NOT LIKE "%(%" )
AND ( Siecle NOT LIKE "%)%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()



24351


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [452]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Siecle,' ',''),'(1/2)','00'),'(2/2)','50'),'(1/3)','00'),'(2/3)','33'),'(3/3)','66'),'(1/4)','00'),'(2/4)','25'),'(3/4)','50'),'(4/4)','75'), 'mex.', 'mex'), 'mex', '40'), 'in.', 'in'), 'in', '00'), 'med.', 'med'), 'med','40'), 'ex.', 'ex'), 'ex','85') AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Siecle,' ',''),'(1/2)','49'),'(2/2)','99'),'(1/3)','32'),'(2/3)','65'),'(3/3)','99'),'(1/4)','24'),'(2/4)','49'),'(3/4)','74'),'(4/4)','99'), 'mex.', 'mex'), 'mex', '60'), 'in.', 'in'), 'in', '15'), 'med.', 'med'), 'med','60'), 'ex.', 'ex'), 'ex','99') AS DECIMAL(4,0)) - 100 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%?%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()



30889


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [453]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Siecle,' ',''),'?',''),'(1/2)','00'),'(2/2)','50'),'(1/3)','00'),'(2/3)','33'),'(3/3)','66'),'(1/4)','00'),'(2/4)','25'),'(3/4)','50'),'(4/4)','75'), 'mex.', 'mex'), 'mex', '40'), 'in.', 'in'), 'in', '00'), 'med.', 'med'), 'med','40'), 'ex.', 'ex'), 'ex','85') AS DECIMAL(4,0)) - 100 AS hasUncertainStartDate,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(Siecle,' ',''),'?',''),'(1/2)','49'),'(2/2)','99'),'(1/3)','32'),'(2/3)','65'),'(3/3)','99'),'(1/4)','24'),'(2/4)','49'),'(3/4)','74'),'(4/4)','99'), 'mex.', 'mex'), 'mex', '60'), 'in.', 'in'), 'in', '15'), 'med.', 'med'), 'med','60'), 'ex.', 'ex'), 'ex','99') AS DECIMAL(4,0)) - 100 AS hasUncertainEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle LIKE "%?%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

30890


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [454]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(SUBSTRING_INDEX(Siecle,'-',1),"00") AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(CONCAT(SUBSTRING_INDEX(Siecle,'-',-1),"00") AS DECIMAL(4,0)) - 1 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle LIKE "__-__" OR Siecle LIKE "_-__" OR Siecle LIKE "_-_" )
AND ( Siecle NOT LIKE "%/%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle NOT LIKE "%(%" )
AND ( Siecle NOT LIKE "%)%" )
AND ( Siecle NOT LIKE "%*%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()



35294


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [455]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(SUBSTRING_INDEX(REPLACE(REPLACE(Siecle,' ?',''),'?',''),'-',1),"00") AS DECIMAL(4,0)) - 100 AS hasUncertainStartDate,
 CAST(CONCAT(SUBSTRING_INDEX(REPLACE(REPLACE(Siecle,' ?',''),'?',''),'-',-1),"00") AS DECIMAL(4,0)) - 1 AS hasUncertainEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle LIKE "__-__?" OR Siecle LIKE "__-__ ?"
OR Siecle LIKE "_-__?" OR Siecle LIKE "_-__ ?"
OR Siecle LIKE "_-_?" OR Siecle LIKE "_-_ ?")
AND ( Siecle NOT LIKE "%/%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle NOT LIKE "%(%" )
AND ( Siecle NOT LIKE "%)%" )
AND ( Siecle NOT LIKE "%*%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

35296


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [456]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(SUBSTRING_INDEX(REPLACE(Siecle,'*',''),'-',1),"00") - 100 AS DECIMAL(4,0)) AS hasUncertainStartDate,
 CAST(CONCAT(SUBSTRING_INDEX(REPLACE(Siecle,'*',''),'-',-1),"00") - 1 AS DECIMAL(4,0)) AS hasUncertainEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle LIKE "%*%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

35300


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [457]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING_INDEX(Siecle,'-',1),' ',''),'(1/2)','00'),'(2/2)','50'),'(1/3)','00'),'(2/3)','33'),'(3/3)','66'),'(1/4)','00'),'(2/4)','25'),'(3/4)','50'),'(4/4)','75'), 'mex.', 'mex'), 'mex', '40'), 'in.', 'in'), 'in', '00'), 'med.', 'med'), 'med','40'), 'ex.', 'ex'), 'ex','85') AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING_INDEX(Siecle,'-',-1),' ',''),'(1/2)','49'),'(2/2)','99'),'(1/3)','32'),'(2/3)','65'),'(3/3)','99'),'(1/4)','24'),'(2/4)','49'),'(3/4)','74'),'(4/4)','99'), 'mex.', 'mex'), 'mex', '60'), 'in.', 'in'), 'in', '15'), 'med.', 'med'), 'med','60'), 'ex.', 'ex'), 'ex','99') AS DECIMAL(4,0)) - 100 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle LIKE "%-%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle NOT LIKE "%?%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()



35507


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [458]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date#,
 #CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING_INDEX(Siecle,'-',1),' ',''),'(1/2)','00'),'(2/2)','50'),'(1/3)','00'),'(2/3)','33'),'(3/3)','66'),'(1/4)','00'),'(2/4)','25'),'(3/4)','50'),'(4/4)','75'), 'mex.', 'mex'), 'mex', '40'), 'in.', 'in'), 'in', '00'), 'med.', 'med'), 'med','40'), 'ex.', 'ex'), 'ex','85') AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 #CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING_INDEX(Siecle,'-',-1),' ',''),'?',''),'(1/2)','49'),'(2/2)','99'),'(1/3)','32'),'(2/3)','65'),'(3/3)','99'),'(1/4)','24'),'(2/4)','49'),'(3/4)','74'),'(4/4)','99'), 'mex.', 'mex'), 'mex', '60'), 'in.', 'in'), 'in', '15'), 'med.', 'med'), 'med','60'), 'ex.', 'ex'), 'ex','99') AS DECIMAL(4,0)) - 100 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle LIKE "%-%" )
AND ( Siecle NOT LIKE "%,%" )
AND ( Siecle LIKE "%?%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

#df = pd.DataFrame(result, columns = names)
#print(len(df))
#df.head()
print("No results")

No results


In [459]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(SUBSTRING_INDEX(Siecle,', ',1),"00") AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(CONCAT(SUBSTRING_INDEX(Siecle,', ',-1),"00") AS DECIMAL(4,0)) - 1 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle LIKE "%_, __" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle NOT LIKE "%?%" )
AND ( Siecle NOT LIKE "%-%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()



35931


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [460]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(SUBSTRING_INDEX(Siecle,', ',1),"00") AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(CONCAT(REPLACE(SUBSTRING_INDEX(Siecle,', ',-1),'?',''),"00") AS DECIMAL(4,0)) - 1 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle LIKE "%_, __" OR Siecle LIKE "%_, __?" OR Siecle LIKE "%_, __ ?" )
AND ( Siecle NOT LIKE "%.%" )
AND ( Siecle LIKE "%?%" )
AND ( Siecle NOT LIKE "%-%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

#df = pd.DataFrame(result, columns = names)
#print(len(df))
#df.head()
print("No results")

No results


In [461]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(SUBSTRING_INDEX(SUBSTRING_INDEX(Siecle,'-',1),', ',1),"00") AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(CONCAT(SUBSTRING_INDEX(SUBSTRING_INDEX(Siecle,'-',-1),', ',-1),"00") AS DECIMAL(4,0)) - 1 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle NOT LIKE "." )
AND ( Siecle LIKE "%,%" )
AND ( Siecle NOT LIKE "__, __" )
AND ( Siecle LIKE "%-%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()



36007


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [462]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(REPLACE(Siecle,' 1/2','00') AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(REPLACE(Siecle,' 1/2','50') AS DECIMAL(4,0)) - 100 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle NOT LIKE "?" )
AND ( Siecle LIKE "% 1/2%" )
AND ( Siecle NOT LIKE "%(%" )
AND ( Siecle NOT LIKE "%)%" )
AND ( Siecle NOT LIKE "%,%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

36009


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [463]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(REPLACE(Siecle,' (2-2)','50') AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(REPLACE(Siecle,' (2-2)','99') AS DECIMAL(4,0)) - 100 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( Siecle LIKE "%(%" )
AND ( Siecle NOT LIKE "%/%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

36010


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [464]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(CONCAT(SUBSTRING_INDEX(Siecle,',',1),"00") AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING_INDEX(Siecle,',',-1),' ',''),'(1/2)','49'),'(2/2)','99'),'(1/3)','32'),'(2/3)','65'),'(3/3)','99'),'(1/4)','24'),'(2/4)','49'),'(3/4)','74'),'(4/4)','99'), 'mex.', 'mex'), 'mex', '60'), 'in.', 'in'), 'in', '15'), 'med.', 'med'), 'med','60'), 'ex.', 'ex'), 'ex','99') AS DECIMAL(4,0)) - 100 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle LIKE "__,%" OR Siecle LIKE "_,%" )
AND ( Siecle NOT LIKE "__, __" )
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%?%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

36016


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [465]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING_INDEX(Siecle,',',1),' ',''),'(1/2)','00'),'(2/2)','50'),'(1/3)','00'),'(2/3)','33'),'(3/3)','66'),'(1/4)','00'),'(2/4)','25'),'(3/4)','50'),'(4/4)','75'), 'mex.', 'mex'), 'mex', '40'), 'in.', 'in'), 'in', '00'), 'med.', 'med'), 'med','40'), 'ex.', 'ex'), 'ex','85') AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(CONCAT(SUBSTRING_INDEX(Siecle,', ',-1),"00") AS DECIMAL(4,0)) - 1 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle LIKE "%, __" OR Siecle LIKE "%, _" )
AND ( Siecle NOT LIKE "__, __" )
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%?%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

36027


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [466]:
sql = """
SELECT astrID_wi, DateLit, Siecle, Date,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING_INDEX(Siecle,',',1),' ',''),'(1/2)','00'),'(2/2)','50'),'(1/3)','00'),'(2/3)','33'),'(3/3)','66'),'(1/4)','00'),'(2/4)','25'),'(3/4)','50'),'(4/4)','75'), 'mex.', 'mex'), 'mex', '40'), 'in.', 'in'), 'in', '00'), 'med.', 'med'), 'med','40'), 'ex.', 'ex'), 'ex','85') AS DECIMAL(4,0)) - 100 AS hasApproxStartDate,
 CAST(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTRING_INDEX(Siecle,',',-1),' ',''),'(1/2)','49'),'(2/2)','99'),'(1/3)','32'),'(2/3)','65'),'(3/3)','99'),'(1/4)','24'),'(2/4)','49'),'(3/4)','74'),'(4/4)','99'), 'mex.', 'mex'), 'mex', '60'), 'in.', 'in'), 'in', '15'), 'med.', 'med'), 'med','60'), 'ex.', 'ex'), 'ex','99') AS DECIMAL(4,0)) - 100 AS hasApproxEndDate
FROM `17.4.12_pinAstr_all`
WHERE ( Date IS NULL OR Date = "" )
AND ( Siecle != "" )
AND ( ( Siecle LIKE "%in%" )
OR ( Siecle LIKE "%med%" )
OR ( Siecle LIKE "%ex%" )
OR ( Siecle LIKE "%(1/2)%" )
OR ( Siecle LIKE "%(2/2)%" )
OR ( Siecle LIKE "%(1/3)%" )
OR ( Siecle LIKE "%(2/3)%" )
OR ( Siecle LIKE "%(3/3)%" )
OR ( Siecle LIKE "%(1/4)%" )
OR ( Siecle LIKE "%(2/4)%" )
OR ( Siecle LIKE "%(3/4)%" )
OR ( Siecle LIKE "%(4/4)%" )
)
AND ( Siecle LIKE "%, %" )
AND ( Siecle NOT LIKE "__, __" )
AND ( Siecle NOT LIKE "%, __" )
AND ( Siecle NOT LIKE "__, %" )
AND ( Siecle NOT LIKE "%-%" )
AND ( Siecle NOT LIKE "%?%" )
ORDER BY Date DESC
"""

with connection.cursor() as cursor:

    cursor.execute(sql)
    names = [ x[0] for x in cursor.description]
    result = cursor.fetchall()

df = df.append(pd.DataFrame(result, columns = names))
print(len(df))
df.head()

36033


Unnamed: 0,Date,DateLit,Siecle,astrID_wi,hasApproxDate,hasApproxEndDate,hasApproxStartDate,hasExactDate,hasExactEndDate,hasExactStartDate,hasUncertainDate,hasUncertainEndDate,hasUncertainStartDate
0,925,,10,9593102,,,,925,,,,,
1,1898,,19,8490393,,,,1898,,,,,
2,1868,,19,5911876,,,,1868,,,,,
3,1855,,19,7772967,,,,1855,,,,,
4,1855,,19,9792452,,,,1855,,,,,


In [480]:
df = df.reset_index()

In [491]:
for index, row in df.iterrows():
    if not math.isnan(row['hasApproxEndDate']) and not math.isnan(row['hasApproxStartDate']):
        if int(row['hasApproxEndDate']) < int(row['hasApproxStartDate']):
            print(index, row)
    if not math.isnan(row['hasExactEndDate']) and not math.isnan(row['hasExactStartDate']):
        if int(row['hasExactEndDate']) < int(row['hasExactStartDate']):
            print(index, row)
    if not math.isnan(row['hasUncertainEndDate']) and not math.isnan(row['hasUncertainStartDate']):
        if int(row['hasUncertainEndDate']) < int(row['hasUncertainStartDate']):
            print(index, row)
    if not math.isnan(row['hasApproxEndDate']) and not math.isnan(row['hasExactStartDate']):
        if int(row['hasApproxEndDate']) < int(row['hasExactStartDate']):
            print(index, row)
    if not math.isnan(row['hasExactEndDate']) and not math.isnan(row['hasApproxStartDate']):
        if int(row['hasExactEndDate']) < int(row['hasApproxStartDate']):
            print(index, row)

In [489]:
df = df.drop([4722,36026])

In [513]:
g = rdflib.Graph()
print("graph has %s statements." % len(g))

graph has 0 statements.


In [548]:
resourceKey = 'http://www.astronomoumenos.com/id/'
verbKey = 'http://www.astronomoumenos.com/ontologies/astr.owl#'

def convertToRDF(g,df,convType):
    cols = list(df)
    for t in df.iterrows():
        # s will always be a resource
        s = rdflib.URIRef(resourceKey + "wi" + str(t[1][cols[0]]) + "_pinakes")
        
        # p might have different prefixes when using ontologies
        # external to the project
        p = rdflib.URIRef(verbKey + cols[1])
        
        # o can be a resource or literal
        if convType == "resource-numeric":
            if '.' in str(t[1][cols[1]]):
                t[1][cols[1]] = float(str(t[1][cols[1]]).split('.')[0])
                o = rdflib.Literal(int(t[1][cols[1]]))
                g.add((s,p,o))
            elif not math.isnan(float(t[1][cols[1]])):
                o = rdflib.Literal(int(t[1][cols[1]]))
                g.add((s,p,o))
                
    return g


In [514]:
hasApproxDate = df[['astrID_wi','hasApproxDate']]

In [515]:
g = convertToRDF(g,hasApproxDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 744 statements.


In [521]:
hasApproxEndDate = df[['astrID_wi','hasApproxEndDate']]
g = convertToRDF(g,hasApproxEndDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 30490 statements.


In [522]:
hasApproxStartDate = df[['astrID_wi','hasApproxStartDate']]
g = convertToRDF(g,hasApproxStartDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 60302 statements.


In [549]:
hasExactDate = df[['astrID_wi','hasExactDate']]
g = convertToRDF(g,hasExactDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 65049 statements.


In [524]:
hasExactEndDate = df[['astrID_wi','hasExactEndDate']]
g = convertToRDF(g,hasExactEndDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 63814 statements.


In [525]:
hasExactStartDate = df[['astrID_wi','hasExactStartDate']]
g = convertToRDF(g,hasExactStartDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 64930 statements.


In [526]:
hasUncertainDate = df[['astrID_wi','hasUncertainDate']]
g = convertToRDF(g,hasUncertainDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 65005 statements.


In [527]:
hasUncertainEndDate = df[['astrID_wi','hasUncertainEndDate']]
g = convertToRDF(g,hasUncertainEndDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 65026 statements.


In [528]:
hasUncertainStartDate = df[['astrID_wi','hasUncertainStartDate']]
g = convertToRDF(g,hasUncertainStartDate,'resource-numeric')
print("graph has %s statements." % len(g))

graph has 65047 statements.


In [551]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT DISTINCT ?2p WHERE {
            ?1s ?2p ?3o .
            }
            ORDER BY RAND() LIMIT 20
            """)

b = pd.DataFrame(result.bindings)

In [557]:
pd.set_option('display.max_colwidth', -1)
print(dict(b))

{rdflib.term.Variable('2p'): 0    http://www.astronomoumenos.com/ontologies/astr.owl#hasExactEndDate      
1    http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxStartDate   
2    http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxEndDate     
3    http://www.astronomoumenos.com/ontologies/astr.owl#hasExactDate         
4    http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxDate        
5    http://www.astronomoumenos.com/ontologies/astr.owl#hasUncertainDate     
6    http://www.astronomoumenos.com/ontologies/astr.owl#hasExactStartDate    
7    http://www.astronomoumenos.com/ontologies/astr.owl#hasUncertainStartDate
8    http://www.astronomoumenos.com/ontologies/astr.owl#hasUncertainEndDate  
Name: 2p, dtype: object}


In [625]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT DISTINCT * WHERE {
            ?1s astr:hasApproxStartDate ?3o .
            }
            ORDER BY ?3o LIMIT 20
            """)

pd.DataFrame(result.bindings)

Unnamed: 0,1s,3o
0,http://www.astronomoumenos.com/id/wi4482192_pinakes,0
1,http://www.astronomoumenos.com/id/wi7854853_pinakes,100
2,http://www.astronomoumenos.com/id/wi5547372_pinakes,100
3,http://www.astronomoumenos.com/id/wi4577722_pinakes,100
4,http://www.astronomoumenos.com/id/wi8677755_pinakes,100
5,http://www.astronomoumenos.com/id/wi1422242_pinakes,100
6,http://www.astronomoumenos.com/id/wi8283893_pinakes,100
7,http://www.astronomoumenos.com/id/wi2936248_pinakes,300
8,http://www.astronomoumenos.com/id/wi6138478_pinakes,300
9,http://www.astronomoumenos.com/id/wi4594605_pinakes,300


In [None]:
# 9738159, 2991919

In [623]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT *
        WHERE {
            ?1 ?2 ?3 .
            FILTER( ?1 = <http://www.astronomoumenos.com/id/wi2991919_pinakes> )
            }
            """)

b = pd.DataFrame(result.bindings)

In [622]:
b

Unnamed: 0,1,2,3
0,http://www.astronomoumenos.com/id/wi2991919_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxStartDate,-86
1,http://www.astronomoumenos.com/id/wi2991919_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxEndDate,1499
2,http://www.astronomoumenos.com/id/wi2991919_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxStartDate,1325


In [624]:
cols = list(b)
print(b[cols[0]][0])
print(b[cols[1]][0])
print(b[cols[2]][0])
g.remove( (b[cols[0]][0],b[cols[1]][0],b[cols[2]][0]) )
print("graph has %s statements." % len(g))

http://www.astronomoumenos.com/id/wi2991919_pinakes
http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxStartDate
-86
graph has 65047 statements.


In [560]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT DISTINCT * WHERE {
            ?1s astr:hasExactStartDate ?3o .
            }
            ORDER BY ?3o LIMIT 20
            """)

pd.DataFrame(result.bindings)

Unnamed: 0,1s,3o
0,http://www.astronomoumenos.com/id/wi2772246_pinakes,802
1,http://www.astronomoumenos.com/id/wi1957187_pinakes,1250
2,http://www.astronomoumenos.com/id/wi7205810_pinakes,1250
3,http://www.astronomoumenos.com/id/wi3792728_pinakes,1250
4,http://www.astronomoumenos.com/id/wi6931483_pinakes,1250
5,http://www.astronomoumenos.com/id/wi3609466_pinakes,1250
6,http://www.astronomoumenos.com/id/wi5004143_pinakes,1250
7,http://www.astronomoumenos.com/id/wi8721615_pinakes,1250
8,http://www.astronomoumenos.com/id/wi6796919_pinakes,1250
9,http://www.astronomoumenos.com/id/wi7250712_pinakes,1250


In [None]:
# 3912493

In [561]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT DISTINCT * WHERE {
            ?1s astr:hasUncertainStartDate ?3o .
            }
            ORDER BY ?3o LIMIT 20
            """)

pd.DataFrame(result.bindings)

Unnamed: 0,1s,3o
0,http://www.astronomoumenos.com/id/wi3912493_pinakes,-90
1,http://www.astronomoumenos.com/id/wi3912493_pinakes,985
2,http://www.astronomoumenos.com/id/wi1513403_pinakes,1000
3,http://www.astronomoumenos.com/id/wi2132688_pinakes,1000
4,http://www.astronomoumenos.com/id/wi5849439_pinakes,1000
5,http://www.astronomoumenos.com/id/wi3885465_pinakes,1100
6,http://www.astronomoumenos.com/id/wi7155908_pinakes,1100
7,http://www.astronomoumenos.com/id/wi5649107_pinakes,1100
8,http://www.astronomoumenos.com/id/wi6634161_pinakes,1200
9,http://www.astronomoumenos.com/id/wi7333568_pinakes,1200


In [633]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT *
        WHERE {
            ?1 ?2 ?3 .
            FILTER( ?1 = <http://www.astronomoumenos.com/id/wi3912493_pinakes> && ?3 = -90 )
            }
            """)

b = pd.DataFrame(result.bindings)

In [634]:
b

Unnamed: 0,1,2,3
0,http://www.astronomoumenos.com/id/wi3912493_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasUncertainStartDate,-90


In [635]:
cols = list(b)
print(b[cols[0]][0])
print(b[cols[1]][0])
print(b[cols[2]][0])
g.remove( (b[cols[0]][0],b[cols[1]][0],b[cols[2]][0]) )
print("graph has %s statements." % len(g))

http://www.astronomoumenos.com/id/wi3912493_pinakes
http://www.astronomoumenos.com/ontologies/astr.owl#hasUncertainStartDate
-90
graph has 65045 statements.


In [649]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT DISTINCT * WHERE {
            ?1s astr:hasApproxEndDate ?3o .
            }
            ORDER BY DESC(?3o) LIMIT 20
            """)

pd.DataFrame(result.bindings)

Unnamed: 0,1s,3o
0,http://www.astronomoumenos.com/id/wi5662937_pinakes,1899
1,http://www.astronomoumenos.com/id/wi5824064_pinakes,1899
2,http://www.astronomoumenos.com/id/wi9625711_pinakes,1899
3,http://www.astronomoumenos.com/id/wi8595578_pinakes,1899
4,http://www.astronomoumenos.com/id/wi3860200_pinakes,1899
5,http://www.astronomoumenos.com/id/wi9775969_pinakes,1899
6,http://www.astronomoumenos.com/id/wi8330212_pinakes,1899
7,http://www.astronomoumenos.com/id/wi6460952_pinakes,1899
8,http://www.astronomoumenos.com/id/wi6854593_pinakes,1899
9,http://www.astronomoumenos.com/id/wi6839780_pinakes,1899


In [None]:
# 7565275

In [645]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT *
        WHERE {
            ?1 ?2 ?3 .
            FILTER( ?1 = <http://www.astronomoumenos.com/id/wi7565275_pinakes> && ?3 = 9899 )
            }
            """)

b = pd.DataFrame(result.bindings)

In [646]:
b

Unnamed: 0,1,2,3
0,http://www.astronomoumenos.com/id/wi7565275_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxStartDate,9899


In [647]:
cols = list(b)
print(b[cols[0]][0])
print(b[cols[1]][0])
print(b[cols[2]][0])
g.remove( (b[cols[0]][0],b[cols[1]][0],b[cols[2]][0]) )
print("graph has %s statements." % len(g))
g.add( (b[cols[0]][0],b[cols[1]][0],rdflib.Literal(1400)) )
print("graph has %s statements." % len(g))

http://www.astronomoumenos.com/id/wi7565275_pinakes
http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxStartDate
9899
graph has 65044 statements.
graph has 65045 statements.


In [651]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT DISTINCT * WHERE {
            ?1s astr:hasExactEndDate ?3o .
            }
            ORDER BY DESC(?3o) LIMIT 20
            """)

pd.DataFrame(result.bindings)

Unnamed: 0,1s,3o
0,http://www.astronomoumenos.com/id/wi4407381_pinakes,1853
1,http://www.astronomoumenos.com/id/wi9215795_pinakes,1853
2,http://www.astronomoumenos.com/id/wi4753180_pinakes,1853
3,http://www.astronomoumenos.com/id/wi3752645_pinakes,1853
4,http://www.astronomoumenos.com/id/wi7137684_pinakes,1853
5,http://www.astronomoumenos.com/id/wi2397495_pinakes,1853
6,http://www.astronomoumenos.com/id/wi4324702_pinakes,1853
7,http://www.astronomoumenos.com/id/wi9224126_pinakes,1851
8,http://www.astronomoumenos.com/id/wi5215729_pinakes,1851
9,http://www.astronomoumenos.com/id/wi5856078_pinakes,1851


In [653]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT DISTINCT * WHERE {
            ?1s astr:hasUncertainEndDate ?3o .
            }
            ORDER BY DESC(?3o) LIMIT 20
            """)

pd.DataFrame(result.bindings)

Unnamed: 0,1s,3o
0,http://www.astronomoumenos.com/id/wi9097714_pinakes,1709
1,http://www.astronomoumenos.com/id/wi7901870_pinakes,1599
2,http://www.astronomoumenos.com/id/wi5124969_pinakes,1599
3,http://www.astronomoumenos.com/id/wi5649107_pinakes,1499
4,http://www.astronomoumenos.com/id/wi4361012_pinakes,1499
5,http://www.astronomoumenos.com/id/wi3885465_pinakes,1499
6,http://www.astronomoumenos.com/id/wi8242410_pinakes,1399
7,http://www.astronomoumenos.com/id/wi9044219_pinakes,1399
8,http://www.astronomoumenos.com/id/wi2967800_pinakes,1399
9,http://www.astronomoumenos.com/id/wi4460342_pinakes,1399


In [654]:
g.serialize(destination='astr_dates.ttl', format='turtle')

In [657]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT ?1 (COUNT(*) AS ?c) WHERE {
            ?1 ?2 ?3 .
            }
            GROUP BY ?1
            ORDER BY DESC(?c)
            LIMIT 20
            """)

pd.DataFrame(result.bindings)

Unnamed: 0,1,c
0,http://www.astronomoumenos.com/id/wi6919791_pinakes,6
1,http://www.astronomoumenos.com/id/wi8587253_pinakes,5
2,http://www.astronomoumenos.com/id/wi7045022_pinakes,4
3,http://www.astronomoumenos.com/id/wi4228880_pinakes,4
4,http://www.astronomoumenos.com/id/wi8737006_pinakes,4
5,http://www.astronomoumenos.com/id/wi1498844_pinakes,4
6,http://www.astronomoumenos.com/id/wi6578745_pinakes,4
7,http://www.astronomoumenos.com/id/wi6508146_pinakes,4
8,http://www.astronomoumenos.com/id/wi8346690_pinakes,4
9,http://www.astronomoumenos.com/id/wi6058005_pinakes,4


In [658]:
result = g.query(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        SELECT * WHERE {
            ?1 ?2 ?3 .
            FILTER( ?1 = <http://www.astronomoumenos.com/id/wi6919791_pinakes> )
            }
            """)

pd.DataFrame(result.bindings)

Unnamed: 0,1,2,3
0,http://www.astronomoumenos.com/id/wi6919791_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxEndDate,1299
1,http://www.astronomoumenos.com/id/wi6919791_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasExactEndDate,1270
2,http://www.astronomoumenos.com/id/wi6919791_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasExactStartDate,1250
3,http://www.astronomoumenos.com/id/wi6919791_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasApproxStartDate,1200
4,http://www.astronomoumenos.com/id/wi6919791_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasExactStartDate,1260
5,http://www.astronomoumenos.com/id/wi6919791_pinakes,http://www.astronomoumenos.com/ontologies/astr.owl#hasExactEndDate,1280


## Notes

Going to have to somehow filter for only highest and lowest date to limit things to two points rather than 4, 5, or 6.

In [661]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasStartDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasApproxStartDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 65045 statements.
graph has 94855 statements.


In [662]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasStartDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasExactStartDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 94855 statements.
graph has 95971 statements.


In [663]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasStartDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasUncertainStartDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 95971 statements.
graph has 95991 statements.


In [664]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasEndDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasApproxEndDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 95991 statements.
graph has 125737 statements.


In [665]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasEndDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasExactEndDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 125737 statements.
graph has 126952 statements.


In [666]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasEndDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasUncertainEndDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 126952 statements.
graph has 126972 statements.


In [667]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasApproxDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 126972 statements.
graph has 127716 statements.


In [668]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasExactDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 127716 statements.
graph has 130013 statements.


In [669]:
print("graph has %s statements." % len(g))
result = g.update(
        """
        PREFIX astr: <http://www.astronomoumenos.com/ontologies/astr.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
        
        INSERT { ?1 astr:hasDate ?3 }
        WHERE
          { ?1 ?2 ?3 .
            FILTER( ?2 = astr:hasUncertainDate )
          }
            """)

#pd.DataFrame(result.bindings)
print("graph has %s statements." % len(g))

graph has 130013 statements.
graph has 130088 statements.


In [670]:
g.serialize(destination='astr_dates.ttl', format='turtle')