# Working with ISMI project dates

## Load date samples from RDF

In [1]:
from rdflib import Graph, RDF, URIRef
from rdflib.namespace import Namespace, RDFS
from undate.undate import Undate

# additional RDF namespaces
crmNs = Namespace('http://www.cidoc-crm.org/cidoc-crm/')

g = Graph()
g.bind('crm', crmNs)
# load ISMI RDF sample data
g.parse('data/ismi-crm-date-samples.ttl')
# check: number of triples
len(g)

78

In [2]:
date_uris = [u for u in g.subjects(RDF.type, crmNs['E52_Time-Span'])]

for uri in date_uris:
    q = '''SELECT ?uri ?label ?note 
           WHERE { 
             ?uri crm:P3_has_note ?note ;
               crm:P1_is_identified_by / rdfs:label ?label .
            } limit 10'''
    res = g.query(q, initBindings={'uri': uri})
    for r in res:
        print(f"uri={str(uri)} label={r.label} note={r.note}")

uri=http://content.mpiwg-berlin.mpg.de/ns/ismi/date1 label=901 Rabīʿ I 14 (islamic) note=day-precision date in islamic calendar
uri=http://content.mpiwg-berlin.mpg.de/ns/ismi/date2 label=884 (islamic) note=year-precision date in islamic calendar
uri=http://content.mpiwg-berlin.mpg.de/ns/ismi/date3 label=900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 (islamic) note=range-type (century in islamic calendar) date in islamic calendar
uri=http://content.mpiwg-berlin.mpg.de/ns/ismi/date4 label=1830 February 8 (gregorian) note=day-precision date in gregorian calendar
uri=http://content.mpiwg-berlin.mpg.de/ns/ismi/date5 label=1796 (gregorian) note=year-precision date in gregorian calendar
uri=http://content.mpiwg-berlin.mpg.de/ns/ismi/date6 label=1600 January 1 - 1699 December 31 (gregorian) note=range-type (century in gregorian calendar) date in gregorian calendar
uri=http://content.mpiwg-berlin.mpg.de/ns/ismi/date7 label=1035 May 29 (julian) note=day-precision date in julian calendar
uri=http://content

## Convert RDF dates to Undate manually

In [3]:
from undate.date import DatePrecision, Date
import datetime

uri = date_uris[1]

#
# read date type
#
date_type = None
for date_type_uri in g.objects(uri, crmNs.P2_has_type):
    for dt in ['day', 'year', 'range']:
        if str(date_type_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/' + dt:
            date_type = dt

if not date_type:
    raise RuntimeError(f"Unknown datetype URI {date_type_uri}")

#
# read label and calendar
#
date_label_uri = next(g.objects(uri, crmNs.P1_is_identified_by))
date_label = str(next(g.objects(date_label_uri, RDFS.label)))
for date_label_calendar_uri in g.objects(date_label_uri, crmNs.P2_has_type):
    for ct in ['gregorian', 'julian', 'islamic']:
        if str(date_label_calendar_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/' + ct:
            calendar_type = ct

if not calendar_type:
    raise RuntimeError(f"Unknown calendar type URI {date_label_calendar_uri}")

#
# create undate
#
if date_type == 'day':
    xsd_date = next(g.objects(uri, crmNs.P82_at_some_time_within))
    date = Undate.parse(str(xsd_date), 'ISO8601')
    date.precision = DatePrecision.DAY
    date.label = date_label

if date_type == 'year':
    xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))
    xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))
    date_from = datetime.date.fromisoformat(str(xsd_date_from))
    if calendar_type == 'gregorian':
        # this should be fine
        date = Undate(year=date_from.year)

    else:
        # create day precision Undate from end date
        date = Undate.parse(str(xsd_date_until), 'ISO8601')
        # change earliest date
        date.earliest = Date(year=date_from.year, month=date_from.month, day=date_from.day)

    # change precision and label
    date.precision = DatePrecision.DAY
    date.label = date_label

if date_type == 'range':
    xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))
    xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))
    # create day precision Undate from start date
    date = Undate.parse(str(xsd_date_from), 'ISO8601')
    # change latest date
    date_until = datetime.date.fromisoformat(str(xsd_date_until))
    date.latest = Date(year=date_until.year, month=date_until.month, day=date_until.day)
    # change precision and label
    date.precision = DatePrecision.DAY
    date.label = date_label


print(f"{date_label=} {date_type=} {calendar_type=} {date=}")

date_label='884 (islamic)' date_type='year' calendar_type='islamic' date=<Undate '884 (islamic)' (1479-04-03)>


## Convert RDF dates to Undate using Hijri parser

In [18]:
for uri in date_uris:
    
    # load ISMI date parts
    date_label_uri = next(g.objects(uri, crmNs.P1_is_identified_by))
    date_label = str(next(g.objects(date_label_uri, RDFS.label)))
    xsd_date = next(g.objects(uri, crmNs.P82_at_some_time_within), None)
    if xsd_date:
        ismi_from = Undate.parse(str(xsd_date), 'ISO8601')
        ismi_until = ismi_from
    else:
        xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))
        xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))
        ismi_from = Undate.parse(str(xsd_date_from), 'ISO8601')
        ismi_until = Undate.parse(str(xsd_date_until), 'ISO8601')

    # use Hirji parser and compare results
    try:
        if date_label.endswith('(islamic)'):
            label = date_label.replace('(islamic)', '')
            date = Undate.parse(label, 'Hijri')
            print(f"'{date_label} -> {repr(date)}")
            if ismi_from.earliest != date.earliest:
                print(f"  different earliest dates: ismi={ismi_from.earliest} parser={date.earliest}")
            if ismi_until.latest != date.latest:
                print(f"  different latest dates: ismi={ismi_until.latest} parser={date.latest}")
            
        elif date_label.endswith('(gregorian)'):
            label = date_label.replace('(gregorian)', '')
            date = Undate.parse(label, 'ISO8601')
            print(repr(date))
        
        else:
            print(f"unknown calendar type for date {date_label}")
            date = None
        
    except Exception as e:
        print(e)
        

'901 Rabīʿ I 14 (islamic) -> <Undate '901 Rabīʿ I 14  Hijrī' (1495-12-11)>
'884 (islamic) -> <UndateInterval '884  Hijrī' (1479-04-03/1480-03-21)>
Could not parse '900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 ' as a Hijri date
invalid literal for int() with base 10: '1830 February 8 '
<Undate 1796>
invalid literal for int() with base 10: '1600 January 1 '
unknown calendar type for date 1035 May 29 (julian)
unknown calendar type for date 1013 (julian)
unknown calendar type for date 1200 January 1 - 1299 December 31 (julian)
