# Add metadata to zipped shapefile

* `pyshp`: https://github.com/GeospatialPython/pyshp
* `pyshp` doesn't appear to support writing metadata: https://github.com/GeospatialPython/pyshp/issues/77, https://gis.stackexchange.com/questions/57635/updating-attributes-using-pyshp
* `arcpy_metadata`: https://github.com/ucd-cws/arcpy_metadata

Metadata stored as XML in shapefile folder.
https://desktop.arcgis.com/en/arcmap/10.3/manage-data/metadata/the-arcgis-metadata-format.htm

https://chrishavlin.com/tag/pyshp/
Sample XML: https://water.usgs.gov/GIS/metadata/usgswrd/XML/physio.xml
https://github.com/chrishavlin/learning_shapefiles/blob/master/src/inspect_shapefile.py

In [1]:
#!pip install pyshp -- works, but can't deal with metadata
#!pip install arcpy_metadata -- we don't have arcpy
#!pip install dbfpy -- works, but then the module won't import
# import dbfpy works. but dbfpy.dbf doesn't work


#https://github.com/GeospatialPython/pyshp/blob/master/changelog.txt
# Use a much older version to get the Editor class
!pip install pyshp==1.2.12



In [2]:
import geopandas as gpd
import pandas as pd
import os
import shutil

def make_shapefile(df, path):
    """
    Make a zipped shapefile and save locally
    Parameters
    ==========
    df: gpd.GeoDataFrame to be saved as zipped shapefile
    path: str, local path to where the zipped shapefile is saved.
            Ex: "folder_name/census_tracts" 
                "folder_name/census_tracts.zip"
                
    Remember: ESRI only takes 10 character column names!!
    """
    # Grab first element of path (can input filename.zip or filename)
    dirname = os.path.splitext(path)[0]
    print(f"Path name: {path}")
    print(f"Dirname (1st element of path): {dirname}")
    # Make sure there's no folder with the same name
    shutil.rmtree(dirname, ignore_errors=True)
    # Make folder
    os.mkdir(dirname)
    shapefile_name = f"{os.path.basename(dirname)}.shp"
    print(f"Shapefile name: {shapefile_name}")
    # Export shapefile into its own folder with the same name
    df.to_file(driver="ESRI Shapefile", filename=f"{dirname}/{shapefile_name}")
    print(f"Shapefile component parts folder: {dirname}/{shapefile_name}")
    

stops = gpd.read_parquet("./stops_assembled.parquet")
stops = stops[stops.itp_id==182].reset_index(drop=True)

make_shapefile(stops, "test_stops")



Path name: test_stops
Dirname (1st element of path): test_stops
Shapefile name: test_stops.shp
Shapefile component parts folder: test_stops/test_stops.shp


In [2]:
import shapefile
import pprint as p

def modify(key,value,e):
    for i in range(len(e.records)):
        if(e.records[i][0]==key):
            e.records[i][2]=value
            break

def editShape():
    SHP_PATH = "./test_stops/"
    e = shapefile.Editor(f"{SHP_PATH}test_stops.dbf")
    p.pprint(e.records[1][1])
    p.pprint(e.records[3][1])
    p.pprint(e.records[7][1])
    modify("1131","1130",e)
    modify("11606","1130",e)
    modify("11647","1130",e)
    print("--------- AFTER EDIT ---------")
    p.pprint(e.records[1][1])
    p.pprint(e.records[3][1])
    p.pprint(e.records[7][1])
    #e.save(f"{SHP_PATH}test_stops.dbf")

editShape()


'1131'
'11606'
'11647'
--------- AFTER EDIT ---------
'1131'
'11606'
'11647'


In [18]:
import shapefile
import xml.etree.cElementTree as ET

SHP_PATH = "./test_stops/"
basename = "test_stops"
sf = shapefile.Reader(f"{SHP_PATH}{basename}")

In [14]:
metadata = ET.Element("metadata")

In [21]:
eainfo = ET.SubElement(metadata, "eainfo")
detailed = ET.SubElement(eainfo,"detailed",name=basename)
attr = ET.SubElement(detailed,"attr")

In [25]:
ET.SubElement(attr, "attrlbl").text = "label"
ET.SubElement(attr, "attrtype").text = "type"
# if string with small number of types
ET.SubElement(attr, "attrrange").text = "range"

In [26]:
tree = ET.ElementTree(metadata)
tree.write(f"{SHP_PATH}{basename}_auto.xml")

In [4]:
def create_xml_file(sf,savedir,basename):
    metadata = ET.Element("metadata")
    eainfo = ET.SubElement(metadata, "eainfo")
    detailed = ET.SubElement(eainfo,"detailed",name=basename)
    attr = ET.SubElement(detailed,"attr")
    
    ET.SubElement(attr, "attrlbl").text = "label"
    ET.SubElement(attr, "attrtype").text = "type"
    # if string with small number of types
    ET.SubElement(attr, "attrrange").text = "range"
    # if a number, just use min/max 
    
    tree = ET.ElementTree(metadata)
    tree.write(savedir+basename+"_auto.xml")

#if raw_input("Create XML file? (Y/N) ")=='Y':
#      create_xml_file(sf,dat_dir,shp_file_base)

Above code works...but that's not the metadata we want to provide.

https://gis.stackexchange.com/questions/209324/editing-arcgis-metadata-elements-using-python

```
import arcpy, os, sys, datetime
import xml
import xml.etree.ElementTree as ET
ws = arcpy.env.workspace = r"path/to/folder"
today = datetime.date.today()
date = today.strftime("%Y%m%d")

#Update the MCMS polygon metadata titles, update date and edition date
for f in os.listdir(ws):
#Find the polygon xml file    
    if f.endswith("A.shp.xml"):
        fpath = os.path.join(ws, f)
#Identify the metadata tree      
        tree = ET.parse(fpath)
        root = tree.getroot()
#Set the title and date variables to the relevant metadata tag index
        editiondate = root[4][0][0]
        reviseddate = root[4][0][5][0]
        title = root[4][0][7]
#Update the tags with the new data
        editiondate.text = today.strftime("%Y-%m-%d")
        reviseddate.text = today.strftime("%Y-%m-%d") + "T00:00:00"
        title.text = "MCMS (polygon)"
#Write the updates to the xml file
        tree.write(fpath)

```

https://github.com/ucd-cws/arcpy_metadata -- find metadata elements

Metadata for Caltrans
DATASET_NAME
TAGS
DESCRIPTION
METHODOLOGY
TOPIC
PUBLISHER_ORGANIZATION
PLACE
FREQUENCY
NEXT_UPDATE
CREATION_DATE
LAST_UPDATE
STATUS
TEMPORAL_COVERAGE_BEGIN
TEMPORAL_COVERAGE_END
DATA_DICTIONARY
DATA_DICTIONARY_TYPE
CONTACT_ORGANIZATION
CONTACT_POSITION
CONTACT_NAME
CONTACT_EMAIL
PUBLIC_ACCESS_LEVEL
ACCESS_CONSTRAINTS
USE_CONSTRAINTS
DATA_LIFE_SPAN
CALTRANS_LINK
DATA_STANDARD
NOTES
GIS_THEME
GIS_HORIZ_ACCURACY
GIS_VERT_ACCURACY
GIS_COORDINATE_SYSTEM_EPSG
GIS_VERT_DATUM_EPSG


In [None]:
#Update the MCMS polygon metadata titles, update date and edition date
for f in os.listdir(ws):
#Find the polygon xml file    
    if f.endswith("A.shp.xml"):
        fpath = os.path.join(ws, f)
#Identify the metadata tree      
        tree = ET.parse(fpath)
        root = tree.getroot()
#Set the title and date variables to the relevant metadata tag index
        metadata_dict = {
            "editiondate": root[4][0][0],
            "reviseddate": root[4][0][5][0],
            "title": root[4][0][7],
        }
    
        overwrite_values = {
            "editiondate": today.strftime("%Y-%m-%d"),
            "reviseddate": today.strftime("%Y-%m-%d") + "T00:00:00",
            "title":  "MCMS (polygon)",
        }
        
        #Update the tags with the new data
        for key, value in metadata_dict.items():
            value["text"] = overwrite_values[key]
 
    #Write the updates to the xml file
            tree.write(fpath)

https://gis.stackexchange.com/questions/71182/programmatically-converting-arbitrary-xml-data-to-shapefile

In [None]:
# https://stackoverflow.com/questions/23013236/how-to-encode-xml-into-esri-shapefiles-using-python
# Requires pyshp: https://pypi.python.org/pypi/pyshp
#
# Conversion for http://daten.berlin.de/datensaetze/liste-der-gedenktafeln-berlin
# File: http://gedenktafeln-in-berlin.de/index.php?id=31&type=123
#

from xml.etree import ElementTree
from datetime import datetime
import shapefile
import os


def get_value(list, index, default):
    value = list[index]
    if value is None:
        value = default
    else:
        value = value.text
        if value is None:
            value = default
        else:
            # value = value.replace(u'\xdf', u' ')
            value = value.encode("utf-8")
    return value



def add_shape(writer, attributes):
    uid = int(get_value(attributes, 0, 0))
    url = get_value(attributes, 1, "")
    tstamp = get_value(attributes, 2, None)
    if tstamp is not None:
        tstamp = datetime.strptime(tstamp, '%d.%m.%Y')
    ortsteil = get_value(attributes, 3, "")
    strasse = get_value(attributes, 4, "")
    longitude = get_value(attributes, 5, None)
    latitude = get_value(attributes, 6, None)
    Name = get_value(attributes, 7, "")
    inhalt = get_value(attributes, 8, "")
    erlauterung = get_value(attributes, 9, "")
    swo = get_value(attributes, 10, "")
    literatur = get_value(attributes, 11, "")
    personen = get_value(attributes, 12, "")
    entfernt = int(get_value(attributes, 13, 0))

    if longitude is not None or latitude is not None:
        longitude = float(longitude)
        latitude = float(latitude)

        # Fix interchanged coordinates
        temp = 0
        if longitude > latitude:
            temp = latitude
            latitude = longitude
            longitude = temp

        # Add coordinates
        writer.point(longitude, latitude)

    # Add attributes
    writer.record(uid, url, tstamp, ortsteil, strasse, Name, inhalt, erlauterung, swo, literatur, personen, entfernt)




xml_file = 'gedenktafeln.xml'
shape_file = 'gedenktafeln.shp'
projection = 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]'

tree = ElementTree.parse(xml_file)
writer = shapefile.Writer(shapefile.POINT)

writer.field('uid', fieldType = 'N', size = 5, decimal = 0)
writer.field('url', fieldType = 'C', size = 255)
writer.field('tstamp', fieldType = 'C', size = 19) # Type 'D' seems to be not working here.
writer.field('ortsteil', fieldType = 'C', size = 200)
writer.field('strasse', fieldType = 'C', size = 200)
writer.field('Name', fieldType = 'C', size = 255)
writer.field('inhalt', fieldType = 'C', size = 255)
writer.field('erlauterung', fieldType = 'C', size = 255)
writer.field('swo', fieldType = 'C', size = 255)
writer.field('literatur', fieldType = 'C', size = 255)
writer.field('personen', fieldType = 'C', size = 255)
writer.field('entfernt', fieldType = 'N', size = 1, decimal = 0)

root = tree.getroot()
shapes = root.getchildren()

for shape in shapes:
    attributes = shape.getchildren()
    add_shape(writer, attributes)

try:
    writer.save(shape_file)
except Exception, e:
    print "ortsteil: " + ortsteil
    print "strasse: " + strasse
    print "Name: " + Name
    print "inhalt: " + inhalt
    print "erlauterung: " + erlauterung
    print "swo: " + swo
    print "literatur: " + literatur
    print "personen: " + personen
    print "entfernt: " + entfernt
    raise

# create the PRJ file
with open(os.path.splitext(shape_file)[0] + os.extsep + 'prj', 'w') as prj:
    prj.write(projection)

In [None]:
#https://gis.stackexchange.com/questions/115833/getting-value-from-metadata-in-python-script-for-attribute
# https://pro.arcgis.com/en/pro-app/latest/arcpy/metadata/metadata-class.htm
#https://pro.arcgis.com/en/pro-app/latest/arcpy/metadata/migrating-from-arcmap-to-arcgis-pro.htm
#https://desktop.arcgis.com/en/arcmap/10.3/analyze/executing-tools/saving-loading-and-recalling-at-the-python-window.htm