In [None]:
# 2025-12-08 run against production
from csae_pyutils import gsheet_to_df
from django.core.exceptions import ObjectDoesNotExist

from tqdm import tqdm
import pandas as pd
from apis_core.apis_entities.models import Person, Place, Institution
from apis_core.apis_metainfo.models import Uri, Collection
from apis_core.apis_vocabularies.models import CollectionType
from apis_core.apis_vocabularies.models import PersonPlaceRelation, InstitutionType
from apis_core.apis_relations.models import PersonPlace

In [None]:
df = gsheet_to_df("1-oRC9J8VjHvt2jym3AdZBWS3klZv3T_uT8rkUZHkb6I")

In [None]:
df.head()

In [None]:
col, _ = Collection.objects.get_or_create(name="Briefedition Wedekind")
domain = "wedekind"
col_type, _ = CollectionType.objects.get_or_create(name="Projekt")
col.description = 'Frank Wedekinds Korrespondenz digital. <a href="https://briefedition.wedekind.fernuni-hagen.de">https://briefedition.wedekind.fernuni-hagen.de<a>'
col.collection_type = col_type
col.save()

In [None]:
# process those without GND first
data = []
for i, row in tqdm(df.iterrows()):
    if row["gender"] in ["männlich", "weiblich"]:
        gnd = f'{row["gnd"]}'
        if gnd.endswith("nan"):
            data.append(row.to_dict())

In [None]:
born_in = PersonPlaceRelation.objects.get(id=88)
died_in = PersonPlaceRelation.objects.get(id=89)
for row in tqdm(data, total=len(data)):
    domain_uri = row["URI"]
    try:
        uri = Uri.objects.get(uri=domain_uri)
        entity = uri.entity
        entity = Person.objects.get(id=entity.id)
    except ObjectDoesNotExist:
        item = {}
        if pd.notna(row["firstname"]):
            item["first_name"] = row["firstname"]
        item["name"] = row["lastname"]
        if pd.notna(row["birth_date"]):
            item["start_date_written"] = row["birth_date"]
        if pd.notna(row["death_date"]):
            item["end_date_written"] = row["death_date"]
        if row["gender"] == "männlich":
            item["gender"] = "male"
        if row["gender"] == "weiblich":
            item["gender"] = "female"
        entity = Person.objects.create(**item)
        entity.collection.add(col)
        uri, _ = Uri.objects.get_or_create(uri=domain_uri, domain=domain)
        uri.entity = entity
        uri.save()
    if pd.notna(row["birth_place"]):
        try:
            place, _ = Place.objects.get_or_create(name=row["birth_place"])
        except:
            place = Place.objects.filter(name=row["birth_place"]).reverse()[0]
        if pd.notna(row["birth_date"]):
            start_date = row["birth_date"]
        else:
            start_date = None
        pp, _ = PersonPlace.objects.get_or_create(
            related_person=entity,
            related_place=place,
            relation_type=born_in,
            start_date_written=start_date,
            end_date_written=start_date
        )
    if pd.notna(row["death_place"]):
        try:
            place, _ = Place.objects.get_or_create(name=row["death_place"])
        except:
            place = Place.objects.filter(name=row["death_place"]).reverse()[0]
        if pd.notna(row["death_date"]):
            end_date = row["death_date"]
        else:
            end_date = None
        pp, _ = PersonPlace.objects.get_or_create(
            related_person=entity,
            related_place=place,
            relation_type=died_in,
            start_date_written=end_date,
            end_date_written=end_date
        )
    

In [None]:
for i, row in df.iterrows():
    if pd.notna(row["gnd"]) and row["entity_type"] != "Person":
        domain_uri = row["URI"]
        try:
            uri = Uri.objects.get(uri=domain_uri)
            entity = uri.entity
            entity = Institution.objects.get(id=entity.id)
            inst_type, _ = InstitutionType.objects.get_or_create(name=row["entity_type"])
            entity.kind = inst_type
            entity.save()
        except ObjectDoesNotExist:
            item = {}
            item["name"] = row["name"]
            entity = Institution.objects.create(**item)
            entity.collection.add(col)
            uri, _ = Uri.objects.get_or_create(uri=domain_uri, domain=domain)
            uri.entity = entity
            uri.save()
            inst_type, _ = InstitutionType.objects.get_or_create(name=row["entity_type"])
            entity.kind = inst_type
            entity.save()

In [None]:
for x in Uri.objects.filter(uri__icontains="https://briefedition.wedekind.h-da.de/view"):
    x.domain = "wedekind-korrespondenz"
    x.save()