In [1]:
import os
import arcpy
import json

from arcpy import metadata as md
S_NUMBER = "s156485"

In [2]:
arcpy.env.workspace = os.path.join(
    "C:\\", "Users", S_NUMBER,
    "Documents", "ArcGIS"
)
working_dir = arcpy.env.workspace
working_dir

'C:\\Users\\s156485\\Documents\\ArcGIS'

In [3]:
directory = arcpy.GetInstallInfo("desktop")["InstallDir"] 
directory

'c:\\program files\\arcgis\\pro\\'

In [4]:
# Set datasets to update...match to `update_vars.RUN_ME`
in_features = [
    'ca_hq_transit_areas',
    'ca_hq_transit_stops',
    'ca_transit_routes',
    'ca_transit_stops',
    'speeds_by_stop_segments',
    'speeds_by_route_time_of_day'
]

staging_location = 'staging.gdb'
out_location = 'open_data.gdb'

def feature_class_in_gdb_path(my_gdb, file_name):
    return os.path.join(my_gdb, file_name)


### Unzip zipped shapefiles, download metadata.json into local path

### Set FGDC field defs for each dataset and export XML (do once when new dataset added)

Only the FGDC standard keeps fields.
See if we can use this and combine it with our ISO 19139 standard later.

In [5]:
# Read in json with all the changes we need for each layer
with open(f"{working_dir}\metadata.json") as f:
    meta_dict = json.load(f)

    
def update_metadata_class(this_feature_class, meta_dict_for_dataset: dict):
    """
    Update the elements in the arcpy.metadata class.
    """
    # Now update metadata class elements that are available
    source_metadata = md.Metadata(this_feature_class)

    source_metadata.title = meta_dict_for_dataset["dataset_name"]
    source_metadata.tags = meta_dict_for_dataset["theme_keywords"]
    source_metadata.summary = meta_dict_for_dataset["summary_purpose"]
    source_metadata.description = meta_dict_for_dataset["description"]
    source_metadata.accessConstraints = meta_dict_for_dataset["public_access"]
    source_metadata.save()
    
    return

In [6]:
def export_fgdc_metadata(one_feature_class):
    """
    Export XML as FGDC format, 
    that's the only one that keeps field names and definitions
    available.
    """
    this_feature_class = feature_class_in_gdb_path(
        staging_location, 
        one_feature_class
    )
    
    subset_meta_dict = meta_dict[one_feature_class]

    update_metadata_class(this_feature_class, subset_meta_dict)
    
    source_metadata = md.Metadata(this_feature_class)
    
    # Export metadata XML in FGDC   
    meta_output = os.path.join(working_dir, 
                               f"./{one_feature_class}_fgdc.xml")
            
    TRANSLATOR = "FGDC_CSDGM"     

    source_metadata.exportMetadata(
        outputPath = meta_output, 
        metadata_export_option = TRANSLATOR
    )
    print(f"Exported FGDC XML for {one_feature_class}")


### Do field data dictionary updates in Jupyter Hub
### Use shapefile and write it to a file gdb layer

In [7]:
# Clean up last run (if applicable)
for f in in_features:
    feature_path = f"{working_dir}\{f}.xml"
    if os.path.exists(feature_path):
        os.remove(feature_path)

In [8]:
arcpy.management.CreateFileGDB(working_dir, 'staging')

In [9]:
def shp_to_feature_class(file_name: str):
    """
    From shapefile (directory of files), unpack those
    and write it to our staging gdb as a feature class.
    """
    # construct the filename, which is takes form of routes_assembled/routes_assembled.shp
    shp_file_name = f"{os.path.join(file_name, f'{file_name}.shp')}"
    
    this_feature_class = os.path.join(staging_location, file_name)
    
    if arcpy.Exists(this_feature_class): 
        arcpy.management.Delete(this_feature_class)

    # Execute FeatureClassToGeodatabase
    arcpy.FeatureClassToGeodatabase_conversion(
        shp_file_name, 
        staging_location
    )
    
    # Print field names, just in case it needs renaming
    # get a list of fields for each feature class
    field_list = arcpy.ListFields(this_feature_class)  
    
    print(this_feature_class)
    for field in field_list: 
        print(field.name)
    
    return


for f in in_features:
    shp_to_feature_class(f)

staging.gdb\ca_hq_transit_areas
OBJECTID
Shape
agency_pri
agency_sec
hqta_type
hqta_detai
route_id
base64_url
base64_u_1
org_id_pri
org_id_sec
mpo
plan_name
stop_id
avg_trips_
Shape_Length
Shape_Area
staging.gdb\ca_hq_transit_stops
OBJECTID
Shape
agency_pri
hqta_type
stop_id
route_id
hqta_detai
agency_sec
base64_url
base64_u_1
org_id_pri
org_id_sec
avg_trips_
mpo
plan_name
staging.gdb\ca_transit_routes
OBJECTID
Shape
org_id
agency
route_id
route_type
route_name
shape_id
n_trips
base64_url
Shape_Length
staging.gdb\ca_transit_stops
OBJECTID
Shape
org_id
agency
stop_id
stop_name
n_routes
route_ids_
routetypes
n_arrivals
n_hours_in
meters_to_
base64_url
district_n
staging.gdb\speeds_by_stop_segments
OBJECTID
Shape
route_id
direction_
stop_pair
stop_pair_
time_perio
p50_mph
n_trips
p20_mph
p80_mph
district_n
org_id
agency
base64_url
Shape_Length
staging.gdb\speeds_by_route_time_of_day
OBJECTID
Shape
route_id
direction_
time_perio
speed_mph
district_n
org_id
agency
base64_url
route_name
Shap

In [10]:
def rename_columns_with_dict(this_feature_class, rename_dict: dict):
    """
    Get a list of fields for each feature class and use a dict to rename.
    To change field names, must use AlterField_management, 
    because changing it in XML won't carry through when you sync
    """
    field_list = arcpy.ListFields(this_feature_class)  

    for field in field_list: 
        if field.name in rename_dict: 
            arcpy.AlterField_management(
                this_feature_class, 
                field.name, 
                rename_dict[field.name], # new_field_name
                rename_dict[field.name] # new_field_alias
            ) 
    return

In [12]:
meta_dict['ca_hq_transit_areas']['rename_cols']

{'agency_pri': 'agency_primary',
 'agency_sec': 'agency_secondary',
 'hqta_detai': 'hqta_details',
 'base64_url': 'base64_url_primary',
 'base64_u_1': 'base64_url_secondary',
 'org_id_pri': 'org_id_primary',
 'org_id_sec': 'org_id_secondary',
 'avg_trips_': 'avg_trips_per_peak_hr'}

In [13]:
def update_feature_class_with_json(one_feature_class, meta_json_dict: dict):
    """
    Update a single feature class.
    Rename columns, apply FGDC metadata fields 
    template, and update metadata class attributes
    that can be accessed through the arcpy.metadata class.
    """
    this_feature_class = feature_class_in_gdb_path(
        staging_location, 
        one_feature_class
    )
        
    subset_meta_dict = meta_json_dict[one_feature_class]
        
    if "rename_cols" in subset_meta_dict.keys():  
        rename_dict = subset_meta_dict["rename_cols"]

        rename_columns_with_dict(this_feature_class, rename_dict)
    
    # Check that renaming is done
    print(this_feature_class)
    check_fields = arcpy.ListFields(this_feature_class)
    for field in check_fields:
        print(field.name)
    
    #  We don't seem to have this function anywhere, but it or something similar should be (re)created to properly update FGDC.
    #  For now, it can be done semi-manually (Hub scripts update field-level data, manually edit summary/descriptions)
    # Sync with FGDC metadata
    # (this is on the one_feature_class, which sits outside of staging/)
    #import_fgdc_metadata_and_sync(one_feature_class)
    
    # Now update the rest of the metadata elements
    update_metadata_class(this_feature_class, subset_meta_dict)

    return

    
for f in in_features:
    update_feature_class_with_json(f, meta_dict)


staging.gdb\ca_hq_transit_areas
OBJECTID
Shape
agency_primary
agency_secondary
hqta_type
hqta_details
route_id
base64_url_primary
base64_url_secondary
org_id_primary
org_id_secondary
mpo
plan_name
stop_id
avg_trips_per_peak_hr
Shape_Length
Shape_Area
staging.gdb\ca_hq_transit_stops
OBJECTID
Shape
agency_primary
hqta_type
stop_id
route_id
hqta_details
agency_secondary
base64_url_primary
base64_url_secondary
org_id_primary
org_id_secondary
avg_trips_per_peak_hr
mpo
plan_name
staging.gdb\ca_transit_routes
OBJECTID
Shape
org_id
agency
route_id
route_type
route_name
shape_id
n_trips
base64_url
Shape_Length
staging.gdb\ca_transit_stops
OBJECTID
Shape
org_id
agency
stop_id
stop_name
n_routes
route_ids_served
routetypes
n_arrivals
n_hours_in_service
meters_to_ca_state_highway
base64_url
district_name
staging.gdb\speeds_by_stop_segments
OBJECTID
Shape
route_id
direction_id
stop_pair
stop_pair_name
time_period
p50_mph
n_trips
p20_mph
p80_mph
district_name
org_id
agency
base64_url
Shape_Length
st

In [14]:
# if there are updates to data_dictionary.yml, this needs to be run
# so fields reflect new definitions.
for f in in_features:
    export_fgdc_metadata(f)

Exported FGDC XML for ca_hq_transit_areas
Exported FGDC XML for ca_hq_transit_stops
Exported FGDC XML for ca_transit_routes
Exported FGDC XML for ca_transit_stops
Exported FGDC XML for speeds_by_stop_segments
Exported FGDC XML for speeds_by_route_time_of_day


In [15]:
for f in in_features:
    this_feature_class = feature_class_in_gdb_path(staging_location, f)

    # Original metadata
    # Migrating to Pro: https://pro.arcgis.com/en/pro-app/latest/arcpy/metadata/migrating-from-arcmap-to-arcgis-pro.htm

    source_metadata = md.Metadata(this_feature_class)
    # Export metadata XML    
    meta_output = os.path.join(working_dir, f"{f}.xml")
            
    # In ArcGIS Pro, instead of FGDC for Desktop, use ISO 19139 GML 3.2
    # https://sv03tmcpo.ct.dot.ca.gov/portal/apps/sites/#/geep/pages/open-data-request
    TRANSLATOR = "ISO19139_GML32"     
    
    source_metadata.exportMetadata(
        outputPath = meta_output, 
        metadata_export_option = TRANSLATOR
    )
    
    print(f"successful export: {f}")


successful export: ca_hq_transit_areas
successful export: ca_hq_transit_stops
successful export: ca_transit_routes
successful export: ca_transit_stops
successful export: speeds_by_stop_segments
successful export: speeds_by_route_time_of_day


### Update XML in JupyterHub

Run `python metadata_update_pro.py`

### Import FGDC metadata for each dataset manually by opening ArcGIS Pro
The button to Metadata > Import > type of metadata set to FGDC does something different than the `metadata.importMetadata` feature, which doesn't do it. Manually doing the import for the fgdb metadata works for each dataset only.

Do this FGDC metadata first to get the field descriptions populated. If we do this second, certain items in the metadata will get overwritten and set to blank.

Somewhere once FGDC applied first, it erases the tags we included. Sad.

### With new XML, finish up workflow

### Write layers to open_data gdb

In [7]:
arcpy.management.CreateFileGDB(working_dir, 'open_data')

In [8]:
# Write layers to open_data (with the overwritten and updated XML already)
def write_feature_class_to_open_data(
    one_feature_class,
    staging_gdb = staging_location, 
    output_gdb = out_location, 
):
    """
    Move the feature class from the staging gdb to the output gdb.
    Delete the feature class in the output gdb because
    we don't want _1 appended to the end
    """
    staging_feature_class = feature_class_in_gdb_path(
        staging_gdb, 
        one_feature_class
    )
    out_feature_class = feature_class_in_gdb_path(
        output_gdb, 
        one_feature_class
    )
    
    if arcpy.Exists(out_feature_class): 
        arcpy.management.Delete(out_feature_class)

    # Copy over the feature class from staging.gdb to open_data.gdb
    arcpy.conversion.FeatureClassToFeatureClass(
        staging_feature_class, 
        output_gdb, 
        one_feature_class
    )
    
#     arcpy.conversion.FeatureClassToFeatureClass(
#         staging_feature_class, 
#         output_gdb, 
#         one_feature_class
#     )
    
    return
    

for f in in_features:
    write_feature_class_to_open_data(f)
    print(f"in open_data.gdb: {f}")

in open_data.gdb: ca_hq_transit_areas
in open_data.gdb: ca_hq_transit_stops
in open_data.gdb: ca_transit_routes
in open_data.gdb: ca_transit_stops
in open_data.gdb: speeds_by_stop_segments
in open_data.gdb: speeds_by_route_time_of_day


### Exit and restart ArcPro to clear locks on layers in overwriting

If we don't exit, the layer will be locked because it shows we're already using it (staging to open_data), and it will prevent writing from open_data to the enterprise gdb.

License Select must be set to `Advanced` for this to work

In [7]:
ENTERPRISE_DATABASE = "HQrail(edit)@sv03tmcsqlprd1.sde"

In [8]:
for f in in_features:
    out_feature_class = feature_class_in_gdb_path(out_location, f)
    
    arcpy.FeatureClassToFeatureClass_conversion(
        in_features = out_feature_class,
        out_path = ENTERPRISE_DATABASE,
        out_name = f
    )