# ISO 19139 GML 3.2 metadata format

Steps:
1. See what's in the filled out XML
1. See what's in the sparse XML
1. What sections need to be lifted to be part of the `default.xml`
1. What parts belong to a dataset that need to follow it, even after the template is applied?
1. What parts need to be overwritten?

In [1]:
import xml.etree.ElementTree as ET
import xmltodict 

In [2]:
FILLED_OUT_XML_FILE = "ca_hq_transit_stops.xml"
SPARSE_XML_FILE = "ca_transit_routes.xml"

In [3]:
def xml_to_json(path: str) -> dict:  
    try:
        print(f"Loading XML as JSON from {path}")
        xml = ET.tostring(ET.parse(path).getroot())
        return xmltodict.parse(xml, 
                               attr_prefix="", cdata_key="text", 
                               #process_namespaces=True,
                               #namespaces={"ns:0", ""},
                               dict_constructor=dict)
    except:
        print(f"Loading failed for {path}")
    return {}

In [4]:
filled_meta = xml_to_json(FILLED_OUT_XML_FILE)
sparse_meta = xml_to_json(SPARSE_XML_FILE)

Loading XML as JSON from ca_hq_transit_stops.xml
Loading XML as JSON from ca_transit_routes.xml


In [7]:
filled_meta2 = filled_meta["ns0:MD_Metadata"]
sparse_meta2 = sparse_meta["ns0:MD_Metadata"]

In [16]:
def compare_dict_items(my_dict1: dict, my_dict2: dict):
    for key, value in my_dict1.items():
        print(f"**********{key}**********")        
        print("filled meta")
        print(my_dict1[key])
        print("sparse meta")
        if my_dict2.get(key) is not None:
            print(my_dict2[key])
        else: 
            print(f"MISSING KEY {key}")

In [17]:
compare_dict_items(filled_meta2, sparse_meta2)

**********xmlns:ns0**********
filled meta
http://www.isotc211.org/2005/gmd
sparse meta
http://www.isotc211.org/2005/gmd
**********xmlns:ns1**********
filled meta
http://www.isotc211.org/2005/gco
sparse meta
http://www.isotc211.org/2005/gco
**********xmlns:ns2**********
filled meta
http://www.opengis.net/gml/3.2
sparse meta
MISSING KEY xmlns:ns2
**********ns0:language**********
filled meta
{'ns0:LanguageCode': {'codeList': 'http://www.loc.gov/standards/iso639-2/php/code_list.php', 'codeListValue': 'eng', 'codeSpace': 'ISO639-2', 'text': 'eng'}}
sparse meta
{'ns0:LanguageCode': {'codeList': 'http://www.loc.gov/standards/iso639-2/php/code_list.php', 'codeListValue': 'eng', 'codeSpace': 'ISO639-2', 'text': 'eng'}}
**********ns0:characterSet**********
filled meta
{'ns0:MD_CharacterSetCode': {'codeList': 'http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_CharacterSetCode', 'codeListValue': 'utf8', 'codeSpace': 'ISOTC211/19115', 'text': 'utf8'}}
sparse meta
{'ns0:MD_Characte

In [9]:
sparse_meta2

{'xmlns:ns0': 'http://www.isotc211.org/2005/gmd',
 'xmlns:ns1': 'http://www.isotc211.org/2005/gco',
 'ns0:language': {'ns0:LanguageCode': {'codeList': 'http://www.loc.gov/standards/iso639-2/php/code_list.php',
   'codeListValue': 'eng',
   'codeSpace': 'ISO639-2',
   'text': 'eng'}},
 'ns0:characterSet': {'ns0:MD_CharacterSetCode': {'codeList': 'http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_CharacterSetCode',
   'codeListValue': 'utf8',
   'codeSpace': 'ISOTC211/19115',
   'text': 'utf8'}},
 'ns0:hierarchyLevel': {'ns0:MD_ScopeCode': {'codeList': 'http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ScopeCode',
   'codeListValue': 'dataset',
   'codeSpace': 'ISOTC211/19115',
   'text': 'dataset'}},
 'ns0:hierarchyLevelName': {'ns1:CharacterString': 'dataset'},
 'ns0:contact': {'ns1:nilReason': 'missing'},
 'ns0:dateStamp': {'ns1:Date': '2022-10-06'},
 'ns0:metadataStandardName': {'ns1:CharacterString': 'ISO 19139 Geographic Information - Metadata - 

In [None]:
# Lift necessary stuff from 1st time through shp to file gdb
def lift_missing_dataset_elements(metadata_json):
    m = metadata_json["ns0:MD_Metadata"]
    
    # Store this info in a dictionary
    d = {}
    
    # Data Quality
    d['ns0:dataQualityInfo'] = m['ns0:dataQualityInfo']
    
    # metadata info
    d['xmlns:ns2'] = m['xmlns:ns2']
    
    return d

lift_missing_dataset_elements(meta)

In [None]:
meta['ns0:MD_Metadata']




In [None]:
metadata_input = {
    dataset_name: str
    publish_entity: str = "California Integrated Travel Project"
    abstract: str
    purpose: str
    beginning_date: str
    end_date: str
    place: str = "California"
    status: str = "Complete"
    frequency: str = "Monthly"
    theme_topics: Dict
    methodology: str
    data_dict_type: str
    data_dict_url: str
    contact_organization: str = "Caltrans"
    contact_person: str
    contact_email: str = "hello@calitp.org"
    horiz_accuracy: str = "0.00004 decimal degrees"
}

In [None]:
from pydantic import BaseModel
from typing import Dict, List

class metadata_input(BaseModel):
    dataset_name: str
    publish_entity: str = "California Integrated Travel Project"
    abstract: str
    purpose: str
    beginning_date: str
    end_date: str
    place: str = "California"
    status: str = "Complete"
    frequency: str = "Monthly"
    theme_topics: Dict
    methodology: str
    data_dict_type: str
    data_dict_url: str
    contact_organization: str = "Caltrans"
    contact_person: str
    contact_email: str = "hello@calitp.org"
    horiz_accuracy: str = "0.00004 decimal degrees"

In [None]:
meta['ns0:MD_Metadata']['ns0:dataQualityInfo']["ns0:DQ_DataQuality"].keys()

In [None]:
# This entire dict item needs to be lifted and appended
# because it's not in default
meta['ns0:MD_Metadata']['xmlns:ns2']