In [None]:
import pandas as pd

from csae_pyutils import gsheet_to_df
from django.core.exceptions import ObjectDoesNotExist

from tqdm import tqdm
from normdata.utils import import_from_normdata
from apis_core.apis_entities.models import Person
from apis_core.apis_metainfo.models import Uri, Collection
from apis_core.apis_vocabularies.models import CollectionType

In [None]:
df = gsheet_to_df("1EkKAYAydBGoTCJePwZAbLr9aRd7WYwyDg3Iq6Njd7aM")

In [None]:
col, _ = Collection.objects.get_or_create(name="Conrad Ansorge")
domain = "ansorge"
col_type, _ = CollectionType.objects.get_or_create(name="Projekt")
col.description = 'Conrad Ansorge 1862–1930. Ein Pianist des Fin de siècle, herausgegeben von Eike Rathgeber, Christian Heitler, Manuela Schwartz, 2025. <a href="https://conrad-ansorge.github.io/ansorge-static/">https://conrad-ansorge.github.io/ansorge-static/<a>'
col.collection_type = col_type
col.save()

In [None]:
# process those with GND first
data = {}
for i, row in df.iterrows():
    data[row["URI"]] = f"https://d-nb.info/gnd/{row['gnd']}"
cleaned_data = {}
for key, value in data.items():
    if not value.endswith("gnd/nan"):
        cleaned_data[key] = value

In [None]:
broken_gnd = []
pmb_uris = []
for key, value in tqdm(cleaned_data.items()):
    entity = import_from_normdata(value, "person")
    if entity:
        pmb_uri, _ = Uri.objects.get_or_create(uri=key, domain=domain)
        pmb_uri.entity = entity
        pmb_uri.save()
        entity.collection.add(col)
        pmb_uris.append([key, value])
    else:
        broken_gnd.append([key, value])

In [None]:
for x in broken_gnd:
    print(x)

In [None]:
for i, row in tqdm(df.iterrows(), total=len(df)):
    if not pd.notna(row["gnd"]):
        domain_uri = row["URI"]
        try:
            uri = Uri.objects.get(uri=domain_uri)
            entity = uri.entity
            entity = Person.objects.get(id=entity.id)
        except ObjectDoesNotExist:
            item = {}
            if pd.notna(row["firstname"]):
                item["first_name"] = row["firstname"]
            item["name"] = row["lastname"]
            if pd.notna(row["birth_date"]):
                item["start_date_written"] = int(row["birth_date"])
            if pd.notna(row["death_date"]):
                item["end_date_written"] = int(row["death_date"])
            entity = Person.objects.create(**item)
            entity.collection.add(col)
            uri, _ = Uri.objects.get_or_create(uri=domain_uri, domain=domain)
            uri.entity = entity
            uri.save()