In [None]:
from django.core.exceptions import ObjectDoesNotExist
from acdh_tei_pyutils.tei import TeiReader
from acdh_tei_pyutils.utils import get_xmlid
from tqdm import tqdm

In [None]:
source_file = "https://github.com/semantic-kraus/fa-data/raw/main/data/indices/listperson.xml"

In [None]:
doc = TeiReader(source_file)

In [None]:
nsmap = doc.nsmap
base_url = "https://fackel.oeaw.ac.at/?p=fackelp"
domain = "fackel"
col, _ = Collection.objects.get_or_create(name="neue fackel personen")
geboren_in = PersonPlaceRelation.objects.filter(name="geboren in").first()
gestorben_in = PersonPlaceRelation.objects.filter(name="gestorben in").first()

In [None]:
to_create = []
for x in tqdm(doc.any_xpath(".//tei:person[@xml:id]")):
    entity = False
    xml_id = get_xmlid(x)
    fackel_uri = f"{base_url}{xml_id[2:]}"
    try:
        uri = Uri.objects.get(uri=fackel_uri)
        continue
    except ObjectDoesNotExist:
        name = x.xpath(".//tei:persName/text()", namespaces=nsmap)[0]
        first_name = ""
        if ", " in name:
            try:
                name, first_name = name.split(", ")
            except ValueError:
                name = name
        try:
            gender = x.xpath(".//tei:sex/@value", namespaces=nsmap)[0]
        except IndexError:
            gender = ""
        try:
            start_date_written = x.xpath(".//tei:birth/@when", namespaces=nsmap)[0]
        except IndexError:
            start_date_written = ""
        try:
            end_date_written = x.xpath(".//tei:death/@when", namespaces=nsmap)[0]
        except IndexError:
            end_date_written = ""
        entity = {
            "name": name,
            "first_name": first_name,
            "start_date_written": start_date_written,
            "end_date_written": end_date_written,
            "gender": gender
        }
        
            
        person = Person.objects.create(**entity)
        person.collection.add(col)
        ent_uri = Uri.objects.create(uri=fackel_uri, domain=domain, entity=person)
        for job in x.xpath(".//tei:occupation/text()", namespaces=nsmap):
            profession = ProfessionType.objects.filter(name__startswith=job).first()
            if profession:
                person.profession.add(profession)
        
        try:
            birth_place_name = x.xpath(".//tei:birth/tei:placeName/text()", namespaces=nsmap)[0]
        except IndexError:
            birth_place_name = False
        if birth_place_name:
            birth_place = Place.objects.filter(name=birth_place_name).first()
        else:
            birth_place = False
        if birth_place:
            person_place = PersonPlace.objects.create(
                related_person=person,
                related_place=birth_place,
                start_date_written=start_date_written,
                relation_type=geboren_in
            )
        
        try:
            death_place_name = x.xpath(".//tei:death/tei:placeName/text()", namespaces=nsmap)[0]
        except IndexError:
            death_place_name = False
        if death_place_name:
            death_place = Place.objects.filter(name=death_place_name).first()
        else:
            death_place = False
        if death_place:
            person_place = PersonPlace.objects.create(
                related_person=person,
                related_place=death_place,
                start_date_written=start_date_written,
                relation_type=gestorben_in
            )        