In [None]:
import lxml.etree as ET
import pandas as pd
from teimporter.teimodule.tei import TeiPersonList
from apis_core.apis_metainfo.models import Collection as ACollection
from apis_core.apis_entities.models import Person as APerson
from apis_core.apis_entities.models import Place as APlace
from apis_core.apis_entities.models import Work as AWork
from apis_core.helper_functions.RDFparsers import GenericRDFParser

In [None]:
file = r"C:\Users\pandorfer\Documents\Redmine\konde\cpas\102_derived_tei\cp-TEI.xml"

In [None]:
col, _ = ACollection.objects.get_or_create(name='CPAS')
text_type, _ = TextType.objects.get_or_create(name="xml/tei transcription")
rel_type, _ = PersonWorkRelation.objects.get_or_create(
        name="mentioned in", name_reverse="mentiones"
    )
work_kind, _ = WorkType.objects.get_or_create(
    name='diary'
)
default_domain = "https://schnitzler-kino.acdh.oeaw.ac.at"

In [None]:
class CPASParser(TeiPersonList):

    """ a class to process cpas TEIs """
    
    def get_entries(self):
        """returns diary entries"""
        expr = "//tei:div[@type='tagebucheintrag']"
        nodes = self.tree.xpath(expr, namespaces=self.nsmap)
        return nodes
    
    def get_id(self, node):
        expr = "./@xml:id"
        nodes = node.xpath(expr, namespaces=self.nsmap)[0]
        return nodes
    
    def get_ev_id(self, node):
        expr = "./@corresp"
        nodes = node.xpath(expr, namespaces=self.nsmap)[0]
        return nodes.split('#')[1]
    
    def get_titles(self):
        """returns title nodes in tei:titleStmt """
        expr = "//tei:titleStmt//tei:title/text()"
        nodes = self.tree.xpath(expr, namespaces=self.nsmap)
        return nodes
    
    def get_title_str(self):
        """ returns a title string """
        titles = self.get_titles()
        return "{}".format(titles[1])
    
    def mentioned_pers(self, uri_base="https://bahrschnitzler.acdh.oeaw.ac.at/"):
        expr = "//tei:persName/@key"
        nodes = self.tree.xpath(expr, namespaces=self.nsmap)
        return ["{}{}".format(uri_base, x) for x in nodes]
    
    def get_idno(self):
        expr = "//tei:publicationStmt/tei:idno[@type='URI']/text()"
        idno = self.tree.xpath(expr, namespaces=self.nsmap)
        return "{}".format(idno[0])
    
    def get_date(self):
        expr = "//tei:date[@when]/@when"
        date = self.tree.xpath(expr, namespaces=self.nsmap)[0]
        return date
    
    def get_written_date(self):
        expr = "//tei:date[@when]/text()"
        date = self.tree.xpath(expr, namespaces=self.nsmap)[0]
        return date
    
    

In [None]:
class ListEvent(TeiPersonList):
    
    def get_date(self, ev_id):
        expr = "//tei:event[@xml:id=$name]/@key"
        nodes = self.tree.xpath(expr, name=ev_id, namespaces=self.nsmap)[0]
        return nodes

In [None]:
list_event = ListEvent(r"C:\Users\pandorfer\Documents\Redmine\konde\cpas\102_derived_tei\synopse-TEI.xml")

In [None]:
doc = CPASParser(file)

In [None]:
for x in doc.get_entries():
    legacy_id = "{}/{}".format(default_domain, doc.get_id(x))
    date = list_event.get_date(doc.get_ev_id(x))
    title = "{}, Tagebucheintrag der Clara Katharina Pollaczek".format(date)
    work, _ = Work.objects.get_or_create(name=title)
    work.kind = work_kind
    Uri.objects.get_or_create(uri=legacy_id, entity=work)
    work.start_date_written = date
    work.end_date_written = date
    work.start_date = date
    work.end_date = date
    work.save(parse_dates=False)
    work.collection.add(col)