In [None]:
import os
from django.core.exceptions import ObjectDoesNotExist
from acdh_tei_pyutils.tei import TeiReader
from acdh_tei_pyutils.utils import nsmap, normalize_string

In [None]:
doc = TeiReader("https://raw.githubusercontent.com/csae8092/insel-data-for-pmb/main/schoeffling_anhang_pmb.xml")
tmp_file = "hansi.xml"
domain = "insel"
col, _ = Collection.objects.get_or_create(name="Die Insel")
wrote = PersonWorkRelation.objects.get(name="hat geschaffen")
translated = PersonWorkRelation.objects.get(name="hat übersetzt")
label_type = LabelType.objects.get(name="Werk_Bibliografische-Angabe")
work_work_type = WorkWorkRelation.objects.get(name="enthält")
insel = Work.objects.get(id="40598")

In [None]:
for i, x in enumerate(doc.any_xpath(".//tei:biblStruct[.//tei:author]"), start=1):
    insel_id = f"insel-werk_{i:04}"
    x.attrib["key"] = insel_id
doc.tree_to_file(tmp_file)
doc = TeiReader(tmp_file)

In [None]:
for i, x in enumerate(doc.any_xpath(".//tei:biblStruct[.//tei:author]")):
    werk_id = x.attrib["key"]
    werk_uri = f"https://die-insel.foo.bar/{werk_id}"
    title = normalize_string(" ".join(x.xpath(".//tei:title[1]//text()", namespaces=nsmap)))
    authors = set()
    for a in x.xpath(".//tei:author/@ref", namespaces=nsmap):
        authors.add(a[4:])
    translators = set()
    for a in x.xpath(".//tei:title//tei:persName[@role='translator']/@ref", namespaces=nsmap):
        translators.add(a[4:])
    labels = []
    for label in x.xpath(".//tei:monogr", namespaces=nsmap):
        jg = label.xpath(".//tei:biblScope[@unit='jg']/text()", namespaces=nsmap)[0]
        volume = label.xpath(".//tei:biblScope[@unit='volume']/text()", namespaces=nsmap)[0]
        issue = label.xpath(".//tei:biblScope[@unit='issue']/text()", namespaces=nsmap)[0]
        page = label.xpath(".//tei:biblScope[@unit='page']/text()", namespaces=nsmap)[0]
        bibl_full = f"Die Insel, Jg. {jg}, Bd. {volume}, Nr. {issue}, S. {page}"
        labels.append(bibl_full)
    try:
        start_year = x.xpath(".//tei:biblScope[@unit='publ-year']", namespaces=nsmap)[0].text
        end_year = x.xpath(".//tei:biblScope[@unit='publ-year']", namespaces=nsmap)[-1].text
    except IndexError:
        start_year, end_year = False, False
    if start_year:
        print(start_year)
    try:
        start_month = x.xpath(".//tei:biblScope[@unit='publ-month']", namespaces=nsmap)[0].text.split("/")[0]
        end_month = x.xpath(".//tei:biblScope[@unit='publ-month']", namespaces=nsmap)[-1].text.split("/")[0]
    except IndexError:
        start_month, end_month = False, False
    if start_month:
        print(start_month)
    try:
        uri = Uri.objects.get(uri=werk_uri)
        entity = Work.objects.get(id=uri.entity.id)
    except (ObjectDoesNotExist, AttributeError):
        entity = False
    if not entity:
        uri, _ = Uri.objects.get_or_create(uri=werk_uri, domain=domain)
        entity = Work.objects.create(name=title[:250])
    entity.collection.add(col)
    uri.entity = entity
    # Werk_Bibliografische-Angabe
    for l in labels:
        label, _ = Label.objects.get_or_create(label=l, label_type=label_type, temp_entity=entity)
    for item in authors:
        person = Person.objects.get(id=item)
        author_rel = PersonWork.objects.get_or_create(related_person=person, related_work=entity, relation_type=wrote)
    for item in translators:
        person = Person.objects.get(id=item)
        author_rel = PersonWork.objects.get_or_create(related_person=person, related_work=entity, relation_type=translated)
    # link to "Die Insel"
    WorkWork.objects.get_or_create(related_worka=insel, related_workb=entity, relation_type=work_work_type)
    uri.save()