# Publish gender data to arcgis online

In [14]:
import sys
import os
from arcgis.gis import GIS
import copy
import requests
import json

# see: https://stackoverflow.com/questions/4383571/importing-files-from-different-folder
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '../scripts')

import utils
import utils_arcgis

# https://volderette.de/jupyter-notebook-tip-multiple-outputs/
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Get data files

In [15]:
data_files = os.listdir('../data/processed/series')
data_dir = 'data/processed/series'
data_files2 = [f for f in data_files if not f.endswith(".csv")]

series_metadata = utils.open_json(
    '../data/external/seriesMetadata.json')

In [16]:
#for f in data_files:
#    print(f)

print(series_metadata[69])

{'code': 'S_0670', 'name': 'Prevalence of obesity among adults, BMI greater than or equal to 30  (age-standardized estimate)', 'themes': [{'code': 'HEA', 'name': 'Health and related services', 'subthemes': [{'code': 'HEA_SUB6', 'name': 'Health risk factors'}]}], 'narratives': ['NH6'], 'tags': ['health']}


## Establish ArcGIS online connection

In [17]:
online_username, gis_online_connection = utils_arcgis.connect_to_arcGIS()
print(gis_online_connection)


Username:  unstats_admin
Password:  ············


GIS @ https://undesa.maps.arcgis.com version:8.3


In [18]:
print(online_username)

unstats_admin


## Test publishing algorithm

In [19]:
def build_series_card(s):
    """ Build series metadata card """

    try:
        s_card = dict()

        s_desc = s['name']

        title = s['name'].replace('%', 'percent').replace(
            ',', ' ').replace('/', ' ')

        s_card['title'] = (title[:250] + '..') if len(title) > 250 else title

        layer_title = s['name'].replace('%', 'percent').replace(',', ' ').replace('/', ' ').replace(';', ' ')

        s_card['layer_title'] = layer_title[:89] if len(
            layer_title) > 88 else layer_title  # this is very important!!!

        s_card['snippet'] = s_card['title']

        
        s_card['description'] =  \
            '<div style="background-color: #f78b33; color:#fff; padding: 15px">' + \
            '<p><strong>Series Name:</strong>'+ s['name'] +'</p>' + \
            '</div>' + \
            '<div style="background-color: #f4f4f4; padding: 15px">' + \
            '<p> </p>' + \
            '</div>'

        series_tags = ['tag1', 'tag2']
        series_tags.append('tag3')

        s_card['tags'] = s['tags'][:]

        return s_card
    except:
        print('Unexpected error:', sys.exc_info()[0])
        return None

In [20]:
def find_online_item(title, owner, gis_online_connection, force_find=True):

    try:

        # Search for this ArcGIS Online Item
        query_string = "title:'{}' AND owner:{}".format(title, owner)
        print('Searching for ' + title)
        # The search() method returns a list of Item objects that match the
        # search criteria
        search_results = gis_online_connection.content.search(query_string)

        if search_results:
            for item in search_results:
                if item['title'] == title:
                    print(' -- Item ' + title + ' found (simple find)')
                    return item

        if force_find:
            user = gis_online_connection.users.get(owner)
            user_items = user.items(folder='Open Data', max_items=800)
            for item in user_items:
                if item['title'] == title:
                    print(' -- Item ' + title + ' found (force find)')
                    return item
            print(' -- Item ' + title + ' not found (force find)')
            return None

        print(' -- Item ' + title + ' not found (simple find)')
        return None

    except:
        print('Unexpected error:', sys.exc_info()[0])
        return None

In [21]:

def set_field_alias(field_name):

    if field_name == 'SERIES':
        return 'Series Code'
    elif field_name == 'SERIES_DESC':
        return 'Series Name'
    elif field_name == 'REF_AREA':
        return 'Geographic Area Code'
    elif field_name == 'REF_AREA_DESC':
        return 'Geographic Area Name'
    elif field_name == 'GEOLEVEL':
        return 'Geographic Area Level'
    elif field_name == 'GEOLEVEL_DESC':
        return 'Geographic Area Level Description'
    elif field_name == 'OBS_VALUE':
        return 'Value'
    elif field_name == 'UNIT_MEASURE':
        return 'Measurement Unit Code'
    elif field_name == 'UNIT_MEASURE_DESC':
        return 'Measurement Unit Description'
    else:
        return utils.camel_case_split(field_name.replace('_', ' ')).replace(' DESC', ' Description').title()


In [22]:

def analyze_csv(item_id, gis_online_connection):
    try:
        sharing_url = gis_online_connection._url + \
            '/sharing/rest/content/features/analyze'
        
        analyze_params = {'f': 'json',
                          'token': gis_online_connection._con.token,
                          'sourceLocale': 'en-us',
                          'filetype': 'csv',
                          'itemid': item_id}

        r = requests.post(sharing_url, data=analyze_params)

        analyze_json_data = json.loads(r.content.decode('UTF-8'))
        
        for field in analyze_json_data['publishParameters']['layerInfo']['fields']:
            field['alias'] = set_field_alias(field['name'])

        # set up some of the layer information for display
        analyze_json_data['publishParameters']['layerInfo']['displayField'] = 'OBS_VALUE'
        
        return analyze_json_data['publishParameters']
    except:
        print('Unexpected error:', sys.exc_info()[0])
        return None


In [53]:
#for d in data_files:
#    print(d)
data_files2[70]

'S_0680.xlsx'

In [52]:
for d in data_files2[70::]:
    series = d.replace('.xlsx','')
    
    s = None
    for m in series_metadata:
        if m['code'] == series:
            s = m
            continue
    print(f's: {s}')        
    xlsx_dataset = '../data/processed/series/' + d

    data_dict = utils.xlsx2dict(xlsx_dataset, 'Sheet1')
    
    print(data_dict[0])
    
    utils.dictList2tsv(data_dict, xlsx_dataset.replace('xlsx','csv'))
    # csv file to be uploaded:
    file = os.path.join(xlsx_dataset.replace('xlsx','csv'))
    print(file)
    
    s_card = build_series_card(s)
    
    if os.path.isfile(file):
        csv_item_properties = copy.deepcopy(s_card)
        csv_item_properties['name'] = s['name'].replace('%', 'percent').replace(
            ',', ' ').replace('/', ' ')
        csv_item_properties['title'] = s['name'].replace('%', 'percent').replace(
            ',', ' ').replace('/', ' ')
        csv_item_properties['type'] = 'CSV'
        csv_item_properties['url'] = ''

        print(f'csv_item_properties = {csv_item_properties}')

        csv_item = find_online_item(
                    csv_item_properties['title'], online_username, gis_online_connection)

        if csv_item is None:
            print('Adding CSV File to ArcGIS Online....')

            csv_item = gis_online_connection.content.add(item_properties=csv_item_properties,
                                                         #thumbnail=thumbnail,
                                                         data=file,
                                                         folder = 'World\'s Women 2020 Data')

            print('Analyze Feature Service....')

            publish_parameters = analyze_csv(csv_item['id'], gis_online_connection)
            publish_parameters['name'] = csv_item_properties['title'][0:80] 
            publish_parameters['layerInfo']['name'] = csv_item_properties['layer_title'][0:80]

            print('Publishing Feature Service....')
            
            csv_lyr = csv_item.publish(publish_parameters=publish_parameters, overwrite=True)

            #print('.......call generate renderer within publish_csv')


s: {'code': 'S_0680', 'name': 'Age-standardized mortality rate attributed to ambient air pollution (deaths per 100,000 population), by sex', 'themes': [{'code': 'ENV', 'name': 'Environment', 'subthemes': [{'code': 'ENV_SUB5', 'name': ''}, {'code': 'HEA_SUB6', 'name': 'Health risk factors'}]}, {'code': 'HEA', 'name': 'Health and related services', 'subthemes': [{'code': 'ENV_SUB5', 'name': ''}, {'code': 'HEA_SUB6', 'name': 'Health risk factors'}]}], 'narratives': ['NN4'], 'tags': ['health', 'environment', 'risk factors']}
{'SERIES': 'S_0680', 'SERIES_DESC': 'Age-standardized mortality rate attributed to ambient air pollution (deaths per 100,000 population), by sex', 'REPORTING_TYPE': 'G', 'REPORTING_TYPE_DESC': 'Global', 'GEOLEVEL': '4', 'GEOLEVEL_DESC': 'Country or Area', 'X': '25.23763153', 'Y': '42.75731323', 'REF_AREA': '100', 'REF_AREA_DESC': 'Bulgaria', 'TIME_PERIOD': '2016', 'SEX': 'M', 'SEX_DESC': 'Male', 'OBS_VALUE': '71', 'UNIT_MEASURE': 'PER_100000_POP', 'UNIT_MEASURE_DESC': 

KeyError: 'type'

In [14]:
print('hello')

hello
