In [53]:
import csv
import time
import requests
from arcgis.gis import GIS
from IPython.display import display
from pyproj import Proj, transform

### STEP 1. Find all data links from search result

In [16]:
search_url = 'https://umn.maps.arcgis.com/home/search.html?q=owner%3A%22mapref_umn%22'
search_page = urllib.request.urlopen(search_url).read()
soup = BeautifulSoup(search_page, "html.parser")

In [65]:
gis = GIS()
search_result = gis.content.search(query="owner:mapref_umn", 
                                   item_type="*",
                                   max_items = 100)

### STEP 2. Extract metadata from each content page

In [54]:
# transform coordinates from projected coordinates to geographic coordinates
def convert_coords(extent):
    # spatial reference
    wkid = extent['spatialReference']['latestWkid']
    inProj = Proj(init='epsg:{}'.format(wkid))
    outProj = Proj(init='epsg:4326')   # WGS84
    
    x1 = extent['xmin']
    y1 = extent['ymin']
    x2 = extent['xmax']
    y2 = extent['ymax']
    
    xmin, ymin = transform(inProj, outProj, x1, y1)
    xmax, ymax = transform(inProj, outProj, x2, y2)
    
    return '{},{},{},{}'.format(round(xmin,4),round(ymin,4),round(xmax,4),round(ymax,4))

In [57]:
# convert file size from integer in bytes to a human readable string
def convert_bytes(size):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            return "%3.1f %s" % (size, x)
        size /= 1024.0

    return size

In [63]:
def construct_metadata(item_id):  
    # load content page
    gis = GIS()
    content = gis.content.get(item_id)
    display(content)
    
    # extract values from arcgis online
    title = alternativeTitle = content.title
    description = content.snippet
    language = 'eng'
    creator = content.owner
    resourceClass = 'Datasets'  #?
    isoTopCat = ''
    keyword = '|'.join(content.tags)
    dateIssued = time.strftime('%Y-%m-%d',time.localtime(content.created/1000))
    temporalCoverage = ''
    dateRange = ''
    resourceType = 'Vector data'
    formatElement = 'Shapefile'
    information = 'https://iu.maps.arcgis.com/home/item.html?id='+content.id
    downloadURL = ''
    mapServer = content.url
    featureServer = ''
    imageServer = ''
    idElement = item_id
    identifier = information
    provider = 'University of Minnesota'
    code = '05 xxx ?'
    memberOf = code
    status = 'Active'
    accrualMethod = 'ArcGIS Python API'
    dateAccessioned = time.strftime("%Y-%m-%d")
    rights = ''
    accessRights = 'Public'
    suppressed = 'FALSE'
    child = 'FALSE'
    fileSize = convert_bytes(content.size)
        
    # extract spatial values from ArcGIS REST API
    try:
        q = content.url + '?f=pjson'
        response = requests.get(q)
        data = response.json()

        extent = data['fullExtent']
        bbox = convert_coords(extent)
        spatialCoverage = '?'   # ?
    except:
        bbox = ''
        spatialCoverage = ''

   
    metadata = [title, alternativeTitle, description, language, creator, 
            resourceClass, isoTopCat, keyword, dateIssued, temporalCoverage,
            dateRange, spatialCoverage, bbox, resourceType,
            formatElement, information, downloadURL, mapServer, featureServer,
            imageServer, idElement, identifier, provider, code, memberOf, status,
            accrualMethod, dateAccessioned, rights, accessRights, suppressed, child, fileSize]
    
    return metadata
    



In [64]:
All_Metadata = []
for item in search_result:
    ID = item.id
    metadata = construct_metadata(ID)
    All_Metadata.append(metadata)


### STEP3. Write to a CSV file

In [66]:
fieldnames = ['Title', 'Alternative Title', 'Description', 'Language', 'Creator', 'Resource Class',
              'ISO Topic Categories', 'Keyword', 'Date Issued', 'Temporal Coverage', 'Date Range', 'Spatial Coverage',
              'Bounding Box', 'Resource Type', 'Format', 'Information', 'Download', 'MapServer',
              'FeatureServer', 'ImageServer', 'ID', 'Identifier', 'Provider', 'Code', 'Member Of', 'Status',
              'Accrual Method', 'Date Accessioned', 'Rights', 'Access Rights', 'Suppressed', 'Child Record', "File Size"]

actionDate = time.strftime('%Y%m%d')

In [68]:
with open('reports/metadata_{}.csv'.format(actionDate), 'w') as fw:
    writer = csv.writer(fw)
    writer.writerow(fieldnames)
    writer.writerows(All_Metadata)