In [None]:
import json
import pandas as pd
from django.core.exceptions import MultipleObjectsReturned
from apis_core.apis_metainfo.models import Collection as ACollection
from apis_core.apis_entities.models import Person as APerson
from apis_core.apis_entities.models import Place as APlace
from apis_core.apis_entities.models import Institution as AInstitution
from apis_core.apis_entities.models import Work as AWork

In [None]:
ACollection.objects.all()

In [None]:
col, _ = ACollection.objects.get_or_create(
    name="https://schnitzler.ub.uni-freiburg.de"
)

In [None]:
file = "data/schnitzler.json"

In [None]:
with open(file, encoding="utf-8") as data_file:
    data = json.load(data_file, encoding="utf-8")

In [None]:
docs = data['response']['docs']
df = pd.DataFrame(docs).fillna('False')

In [None]:
df.keys()

In [None]:
text_kind, _ = TextType.objects.get_or_create(name='abstract')
basic_perswork_rel, _ = PersonWorkRelation.objects.get_or_create(
    name='bibliographical relation to work',
    name_reverse='bibliographical relation to person'
)
hrsg, _ = PersonWorkRelation.objects.get_or_create(
    name='herausgeber von',
    name_reverse='wurde herausgegeben von',
    parent_class=basic_perswork_rel
)
pub_org_rel_kind, _ = InstitutionWorkRelation.objects.get_or_create(
    name="published work",
    name_reverse="work published by"
)
pl_wo_rel, _ = PlaceWorkRelation.objects.get_or_create(
    name="publication place of",
    name_reverse="work published in"
)

In [None]:
for i, row in df.iterrows():
    work, _ = Work.objects.get_or_create(
        name=row['ti_all_string'][0]
    )
    if row['id_all_facet'] != 'False':
        uri_id = "https://schnitzler.ub.uni-freiburg.de/{}".format(row['id_all_facet'][0])
        Uri.objects.get_or_create(
            uri=uri_id,
            domain="https://schnitzler.ub.uni-freiburg.de",
            entity=work
        )
    if row['uri_all_string'] != 'False':
        for x in row['uri_all_string']:
            try:
                Uri.objects.get_or_create(
                    uri=x,
                    entity=work
                )
            except:
                print(row['uri_all_string'])
    work.collection.add(col)
    work_kind, _ = WorkType.objects.get_or_create(
        name=row['doctype_all_string'][0]
    )
    work.kind = work_kind
    work.start_date_written = row['date_string']
    if row['abstract_string'] != 'False':
        abstract, _ = Text.objects.get_or_create(text=row['abstract_string'], kind=text_kind)
        work.text.add(abstract)

    if row['person_all_string'] != 'False':
        for x in row['person_all_string']:
            hrsg_pers = None
            if "[" in x:
                persname = x.split('[')[0].strip()
                if "," in persname:
                    try:
                        hrsg_pers, _ = APerson.objects.get_or_create(
                            name=persname.split(',')[0].strip(),
                            first_name=persname.split(',')[1].strip()
                        )
                    except MultipleObjectsReturned:
                        hrsg_pers = APerson.objects.create(
                            name=persname.split(',')[0].strip(),
                            first_name=persname.split(',')[1].strip()
                        )
                else:
                    try:
                        hrsg_pers, _ = APerson.objects.get_or_create(
                            name=persname.strip(),
                            first_name=persname.strip()
                        )
                    except MultipleObjectsReturned:
                        hrsg_pers, _ = APerson.objects.create(
                            name=persname.strip(),
                            first_name=persname.strip()
                        )
                hrsg_pers.collection.add(col)
                PersonWork.objects.get_or_create(
                    relation_type=hrsg,
                    related_person=hrsg_pers,
                    related_work=work,
                    start_date=work.start_date
                )
            else:
                if "," in x:
                    try:
                        hrsg_pers, _ = APerson.objects.get_or_create(
                            name=x.split(',')[0].strip(),
                            first_name=x.split(',')[1].strip()
                        )
                    except MultipleObjectsReturned:
                        hrsg_pers = APerson.objects.create(
                            name=persname.split(',')[0].strip(),
                            first_name=persname.split(',')[1].strip()
                        )
                else:
                    try:
                        hrsg_pers, _ = APerson.objects.get_or_create(
                            name=x.strip(),
                            first_name=x.strip()
                        )
                    except MultipleObjectsReturned:
                        hrsg_pers, _ = APerson.objects.create(
                            name=x.strip(),
                            first_name=x.strip()
                        )
                hrsg_pers.collection.add(col)
                PersonWork.objects.get_or_create(
                    relation_type=basic_perswork_rel,
                    related_person=hrsg_pers,
                    related_work=work,
                    start_date=work.start_date
                )

    if row['pu_all_string'] != 'False':
        for x in row['pu_all_string']:
            pub, _ = AInstitution.objects.get_or_create(
                name=x
            )
            pub.collection.add(col)
            InstitutionWork.objects.get_or_create(
                relation_type=pub_org_rel_kind,
                related_institution=pub,
                related_work=work,
                start_date=work.start_date
            )
    
    if row['pp_all_text'] != 'False':
        for x in row['pp_all_text']:
            if "," in x:
                for x in x.split(","):
                    pl, _ = APlace.objects.get_or_create(
                        name=x
                    )
                    PlaceWork.objects.get_or_create(
                        relation_type=pl_wo_rel,
                        related_place=pl,
                        related_work=work,
                        start_date=work.start_date
                    )
            else:
                pl, _ = APlace.objects.get_or_create(
                        name=x
                    )
                PlaceWork.objects.get_or_create(
                    relation_type=pl_wo_rel,
                    related_place=pl,
                    related_work=work,
                    start_date=work.start_date
                )
            pl.collection.add(col)
    work.save()
        

In [None]:
# flatten a pandas.Series of list-items:
df['ti_all_string'].apply(pd.Series).stack().reset_index(drop=True).unique()