#### This notebook parses original objects' metadata retrieved from Collectie Nederland and generates json files with simplified metadata structure

In [None]:
import json
import os

In [None]:
path_to_objects = 'MetadataEditorWebApp/ch_objects/'
ch_json = [f for f in os.listdir(path_to_objects) if f.endswith('.json')]

In [None]:
list_of_objects = []
for f in ch_json:
    with open(f"{path_to_objects}{f}",'r') as jf:
        single_object = json.load(jf)
        if type(single_object) == list:  # check if the objects is from CN
            # process a single file / skip objects from Europeana
            list_of_objects.append(parse_object(single_object))

In [None]:
# save simplified objects metadata
all_objects = {
    "user_id":"00000",
    "objects":list_of_objects
}
with open('all_objects.json', 'w') as jf:
	json.dump(all_objects, jf)

In [None]:
def parse_object(object_json) -> dict:
    '''
    Parsing the metadata fields of one object from the original json file
    object_json: dict, original object metadata from Collectie Nederland
    '''
    # export mappings
    with open("MetadataEditorWebApp/fields_properties_mapping.json",'r') as jf:
        field_mappings = json.load(jf)

    # define prefixes
    edm_prefix = "http://www.europeana.eu/schemas/edm/"
    dc_prefix = "http://purl.org/dc/elements/1.1/"
    dc_terms_prefix = "http://purl.org/dc/terms/"
    nave_prefix = "http://schemas.delving.eu/nave/terms/"
    
    object_dict = {
        "object_id":"",
        "img":"",
        "fields": []
    }
    
    for l in object_json:
    
        if "ProvidedCHO" in l["@type"][0]:

            keyword_field_index = 0
            
            # title
            if l.get(f"{dc_prefix}title"):
                keyword_field_index += 1 # move keywords below title
                # there can be multiple title values
                title_value = ""
                for t in l[f"{dc_prefix}title"]:
                    if "@language" in t.keys():
                        if t["@language"] == "nl":
                            title_value += t["@value"]
                    else:
                        title_value += t["@value"]
    
                title = {"name": "Titel",
                         "property": "dc:title",
                         "value": title_value,
                         "type": "editable",
                         "hidden": "False",
                         "removed": "False",
                         "has_note": "",
                         "has_warning": "",
                         "by_user": "False"}
                
                object_dict["fields"].insert(0, title) # put title first
    
            # description
            if l.get(f"{dc_prefix}description"):
                keyword_field_index += 1 # move keywords below description
                
                description_value = ""
                
                for d in l[f"{dc_prefix}description"]:
                    if "@language" in d.keys():
                        if d["@language"] == "nl":
                            description_value += d["@value"]
                    else:
                        description_value += d["@value"]

                
                description = {"name": "Beschrijving",
                               "property": "dc:description",
                               "value": description_value,
                               "type": "editable",
                               "hidden": "False",
                               "removed": "False",
                               "has_note": "",
                               "has_warning": "",
                               "by_user": "False"}
                
                object_dict["fields"].insert(1, description)
    
            # subject
            if l.get(f"{dc_prefix}subject"):
                keywords_dict = {"name": "Onderwerp: Wat",
                                 "property": "dc:subject",
                                 "value": {},
                                 "type": "keywords"}
                
                for s in l[f"{dc_prefix}subject"]:
                    if "@language" in s.keys():
                         if s["@language"] == "nl":
                            keyword = s["@value"]
                    else:
                        keyword = list(s.values())[0]

                    keywords_dict["value"][keyword] = {"hidden": "False",
                                                  "removed": "False",
                                                  "has_note": "",
                                                  "by_user": "False"}
    
                object_dict["fields"].insert(keyword_field_index, keywords_dict)
    
            # other non-editable fields
            for property, field_name in field_mappings.items():
                # dcterms
                if "dcterms" in property:
                    # remove prefix
                    p = property.replace('dcterms:','')
                    
                    if l.get(f"{dc_terms_prefix}{p}"):
                        # concatinate multiple values
                        value = ""
                        
                        for i in l[f"{dc_terms_prefix}{p}"]:
                            if "@language" in i.keys():
                                if i["@language"] == "nl":
                                    value += i["@value"]
                            else:
                                value += list(i.values())[0] + ", "

                        field_dict = {"name": field_name,
                                      "property": property,
                                      "value": value.rstrip(", "),
                                      "type": "non-editable"}
                        
                        object_dict["fields"].append(field_dict)
                # dc
                if "dc" in property and property not in ["dc:title", "dc:description", "dc:subject"]:
                    # remove prefix
                    p = property.replace('dc:','')
                    
                    if l.get(f"{dc_prefix}{p}"):
                        # concatinate multiple values
                        value = ""
                        
                        for i in l[f"{dc_prefix}{p}"]:
                            if "@language" in i.keys():
                                if i["@language"] == "nl":
                                    value += i["@value"]
                            else:
                                value += list(i.values())[0] + ", "
                            
                        field_dict = {"name": field_name,
                                      "property": property,
                                      "value": value.rstrip(", "),
                                      "type": "non-editable"}
                        
                        object_dict["fields"].append(field_dict)
        
        if "Aggregation" in l["@type"][0]:
            # object ID
            object_dict["object_id"] = l["@id"]
    
            #img
            if l.get(f"{edm_prefix}object"):
                object_dict["img"] = list(l[f"{edm_prefix}object"][0].values())[0]
    
            # other non-editable fields
            for property, field_name in field_mappings.items():
                # edm
                if "edm" in property:
                    # remove prefix
                    p = property.replace('edm:','')
                    
                    if l.get(f"{edm_prefix}{p}"):
                        value = ""
                        
                        for i in l[f"{edm_prefix}{p}"]:
                            if "@language" in i.keys():
                                if i["@language"] == "nl":
                                    value += i["@value"]
                            else:
                                value += list(i.values())[0] + ", "
                        
                        field_value = {"name": field_name,
                                        "property": property,
                                        "value": value,
                                        "type": "non-editable"}
                        
                        object_dict["fields"].append(field_value)
    
            if l.get(f"{dc_prefix}rights"):
                dc_rights = {"name": "Rechthebbende(n)",
                             "property": "dc:rights",
                             "value": l[f"{dc_prefix}rights"][0]["@value"],
                             "type": "non-editable"}
    
                object_dict["fields"].append(dc_rights)
    
        if "DcnResource" in l["@type"][0]:
            for property, field_name in field_mappings.items():
                # nave
                if "nave" in property:
                    # remove prefix
                    p = property.replace('nave:','')
                    if l.get(f"{nave_prefix}{p}"):
                        # concatinate multiple values
                        value = ""
                        for i in l[f"{nave_prefix}{p}"]:
                            if "@language" in i.keys():
                                if i["@language"] == "nl":
                                    value += i["@value"]
                            else:
                                value += list(i.values())[0] + ", "
                            
                        field_dict = {"name": field_name,
                                      "property": property,
                                      "value": value.rstrip(", "),
                                      "type": "non-editable"}
                        
                        object_dict["fields"].append(field_dict)

    return object_dict