## Pushing all DDH data on GeoWB

In [None]:
from IPython.display import display
from arcgis.gis import GIS
import credentials as crd
import os
import ddh
import urllib
import zipfile
import arcgis

In [None]:
host = 'ddh1stg.prod.acquia-sites.com'

ddh.load(host)

In [None]:
username, password = crd.get_credentials("Staging")
gis = GIS("https://geosdndev.worldbank.org/portal", username, password)

In [None]:
user, pword = crd.get_credentials("Production")
gis = GIS("https://geowb.worldbank.org/portal", user, pword)

### Data from DDH

In [None]:
def get_tags(node_id):
    
    topics  = {'agriculture and food security': ['agriculture', 'food security'],
 'climate change': ['climate change'],
 'economic growth': ['economic growth'],
 'education': ['education'],
 'energy and extractives': ['energy', 'extractives'],
 'environment and natural resources': ['environment', 'natural resources'],
 'financial sector development': ['financial sector development'],
 'fragility, conflict and violence': ['fragility', 'conflict', 'violence'],
 'gender': ['gender'],
 'health, nutrition and population': ['health', 'nutrition', 'population'],
 'information and communication technologies': ['information', 'communication', 'ICT'],
 'jobs': ['jobs'],
 'macroeconomic and structural policies': ['macroeconomic', 'structural policies'],
 'macroeconomic vulnerability and debt': ['macroeconomic vulnerability', 'debt'],
 'poverty': ['poverty'],
 'private sector development': ['private sector', 'private sector development'],
 'public sector management': ['public sector management'],
 'public-private partnerships': ['public-private partnerships'],
 'social development': ['social development'],
 'social protection and labor': ['social protection', 'labor'],
 'trade': ['trade'],
 'transport': ['trabsport'],
 'urban development': ['urban development', 'urban'],
 'water': ['water']}
    
    ds = ddh.dataset.get(node_id)
    
    tops = topics[ddh.taxonomy.get_keywords('field_topic', ds['field_topic']['und'][0]['tid'])[0]]
    
    if ds['field_tags']:
        for val in ds['field_tags']['und']:
            top_val = ddh.taxonomy.get_keywords('field_tags', val['tid'])[0]
            tops.append(top_val)
            
    return tops

In [None]:
def get_item_properties(datasetId, resourceId):
    
    ds = ddh.dataset.get(datasetId)
    
    dj = ddh.dataset.get(resourceId)
    
    tags = get_tags(datasetId)
    
    if ds['title'].strip() == dj['title'].strip():
        title = dj['title']
    else:
        title = ds['title']+'__'+dj['title']
        
    lic = ddh.taxonomy.get_keywords('field_license_wbddh',ds['field_license_wbddh']['und'][0]['tid'])[0]
    try:
        desc = ds['body']['und'][0]['value']
    except:
        desc =  ""
        
    item_properties = {
            "description" : desc,
            "title" : title,
            #"url" : dj['path'],
            "tags" : tags,
            "accessInformation" : ddh.taxonomy.get_keywords('field_wbddh_data_class',dj['field_wbddh_data_class']['und'][0]['tid'])[0],
            "licenseInfo" : lic,
            "access" : ddh.taxonomy.get_keywords('field_wbddh_data_class',dj['field_wbddh_data_class']['und'][0]['tid'])[0]
            }
    
    return item_properties

In [None]:
def publish_shp(url,meta, item_properties):
    
    
    if meta['field_format']['und'][0]['tid'] in ['957', '839', '1369']:
        
        if meta['field_format']['und'][0]['tid'] == '839':
            file_type = 'PDF'
        elif meta['field_format']['und'][0]['tid'] == '957':
            file_type = 'Shapefile'
        elif meta['field_format']['und'][0]['tid'] == '1369':
            file_type = 'GeoJson'
        else:
            file_type = None
                
        if not 'DDH_Datasets' in [folder['title'] for folder in gis.users.me.folders]:
            gis.content.create_folder(folder='DDH_Datasets')
            print("Created DDH_Datasets folder.")
        else:
            #print("The DDH_Datasets folder already exists.")
            pass
        
        
        try:
            if (url.startswith('s3')) and (meta['field_wbddh_data_class']['und'][0]['tid'] == '358'):
                url_z = "http://development-data-hub-s3-public.s3.amazonaws.com/ddhfiles/" + url.split(':')[1][2:]
                try:
                    shpfile = gis.content.add({'access':'org', 'type':'{}'.format(file_type)}, url_z, folder='DDH_Datasets')
                    shpfile.update(item_properties)
                    shpfile.share(org=True)
                    published_service = shpfile.publish()
                    published_service.update(item_properties)
                    published_service.share(org=True)
                    return shpfile.id, published_service.id
                except Exception as e:
                    if e.args[0] == "Job failed.":
                        gis.content.get("{}".format(shpfile.id)).delete()
                        gis.content.get("{}".format(published_service.id)).delete()

            elif (url.startswith('s3')) and (meta['field_wbddh_data_class']['und'][0]['tid'] == '359'):
                url_z = "http://development-data-hub-s3-official.s3.amazonaws.com/ddhfiles/" + url.split(':')[1][2:]
                try:
                    shpfile = gis.content.add({'access':'org', 'type':'{}'.format(file_type)}, url_z, folder='DDH_Datasets')
                    shpfile.update(item_properties)
                    shpfile.share(org=True)
                    published_service = shpfile.publish()
                    published_service.update(item_properties)
                    published_service.share(org=True)
                    return shpfile.id, published_service.id
                except Exception as e:
                    if e.args[0] == "Job failed.":
                        gis.content.get("{}".format(shpfile.id)).delete()
                        gis.content.search('title:{}'.format(item_properties['title'].strip()), item_type = 'Feature Service')[0].delete()
                        
            else:
                try:
                    shpfile = gis.content.add({'access':'org', 'type':'{}'.format(file_type)}, url, folder='DDH_Datasets')
                    shpfile.update(item_properties)
                    shpfile.share(org=True)
                    published_service = shpfile.publish()
                    published_service.update(item_properties)
                    published_service.share(org=True)
                    return shpfile.id, published_service.id
                except Exception as e:
                    if e.args[0] == "Job failed.":
                        gis.content.get("{}".format(shpfile.id)).delete()
                        gis.content.search('title:{}'.format(item_properties['title'].strip()), item_type = 'Feature Service')[0].delete()
                        
        except RuntimeError as e:
            if e.args[0] == "Item '{}' already exists.\n(Error Code: 409)".format(url.split(':')[1][2:]):
                shpfile = gis.content.search("{}".format(data))[0]
                published_service = shpfile.publish()
                published_service.share(org=True)
                return published_service.id, published_service.url
            else:
                print(e.args[0])
                return None, None
            

In [None]:
data_lis, fl_list, data_id = [], [], []
for k in datasets:
    ds = ddh.dataset.get(k)

    if (ds['field_wbddh_data_class']['und'][0]['tid'] in ['358', '359']) and (ds['field_resources']):
        for i in ds['field_resources'].values():
            print('Scnanning {}'.format(k))
            try:
                for j in i:
                    dj = ddh.dataset.get(j['target_id'])
                    if dj['field_wbddh_resource_type']['und'][0]['tid'] in ['443', '986']:
                        if dj['field_link_api']:
                            url = dj['field_link_api']['und'][0]['url']
                            url = url.replace(" ", "%20")
                            item_prop = get_item_properties(k, j['target_id'])
                            dat, fl = publish_shp(url, dj, item_prop)
                            data_id.append(k)
                            data_lis.append(dat) 
                            fl_list.append(fl)
                        elif dj['field_upload']:
                            url = dj['field_upload']['und'][0]['uri']
                            url = url.replace(" ", "%20")
                            item_prop = get_item_properties(k, j['target_id'])
                            dat, fl = publish_shp(url, dj, item_prop)
                            data_id.append(k)
                            data_lis.append(dat) 
                            fl_list.append(fl)
                        else:
                            pass
                    elif dj['field_wbddh_resource_type']['und'][0]['tid'] == '983':
                        item_prop = get_item_properties(k, j['target_id'])
                        item_prop['type'] = 'Feature Service'
                        item_prop['url'] = fl_list[int(j['target_id'])]
                        temp_data = gis.content.add(item_prop, fl_list[int(j['target_id'])], folder='DDH_Datasets')
                        temp_data.share(org=True)
                        print(temp_data.id, temp_data.url)
            except Exception as e:
                #print("Exception occurred. Error {}".format(e.args[0]))
                data_id.append(k)
                data_lis.append(e.args[0]) 
                fl_list.append(e.args[0])

## Handling Feature Layer Collection

In [None]:
from arcgis import GIS
from arcgis import geometry
from arcgis import features as fs
from getpass import getpass as pwd
import pandas as pd

In [None]:
lis = gis.content.search(query = "title : *(ESA EO4SD-Urban) AND owner:DDHPublisher", max_items=50)

### Updating TS data

In [None]:
dat = pd.read_csv(r"C:\Users\wb542830\OneDrive - WBG\DEC\DDH\API\RomaniaHub\FL_back_to_ddh.csv")

In [None]:
dat.sort_values(by='nid', inplace=True)

In [None]:
err_list = ['cannot unpack non-iterable NoneType object',
 'list index out of range',
 "local variable 'published_service' referenced before assignment",
 'tuple index out of range',
 'list indices must be integers or slices, not str',
"'NoneType' object is not iterable"]

In [None]:
for i in set(dat.nid.tolist()):
    if (dat[dat.nid == i]['feature_id'].iloc[0] not in err_list):
        if (dat[dat.nid == i].shape[0]>1):
            print(i)

In [None]:
def update_fl(nid):
    import urllib
    path = r"C:\Users\wb542830\OneDrive - WBG\DEC\DDH\API\RomaniaHub"
    
    ds = ddh.dataset.get(nid)
    nid = str(nid)
    
    
    lis = []
    
    for i in ds['field_resources']['und']:
        if ddh.dataset.get(i['target_id'])['field_format']['und'][0]['tid'] in ['957']:
            lis.append(i['target_id'])
    
    if len(lis) > 1:
        
        try:
            os.mkdir(os.path.join(path, nid))
        except FileExistsError:
            pass
        
        url = []        
        for res in lis:
            try:
                url.append(ddh.dataset.get(res)['field_upload']['und'][0]['uri'])
            except:
                try:
                    url.append(ddh.dataset.get(res)['field_link_api']['und'][0]['url'])
                except Exception as e:
                    print(res, '::', e)

        if ds['field_wbddh_data_class']['und'][0]['tid'] in ['358']:
            base_url = "http://development-data-hub-s3-public.s3.amazonaws.com/ddhfiles/"
        elif ds['field_wbddh_data_class']['und'][0]['tid'] in ['359']:
            base_url = "http://development-data-hub-s3-official.s3.amazonaws.com/ddhfiles/"
        else:
            base_url = None

        for link in url:
            if link.startswith('s3'):
                url_z = base_url + link.split(':')[1][2:]
                urllib.request.urlretrieve(url_z, os.path.join(path, nid, url_z.split(r'/')[-1]))
            else:
                url_z = link
                urllib.request.urlretrieve(url_z, os.path.join(path, nid, url_z.split(r'/')[-1]))

        zip_lis = glob.glob(os.path.join(os.getcwd(), nid)+"\*.zip")

        for file in zip_lis:
            with zipfile.ZipFile(file, 'r') as zip_ref:
                zip_ref.extractall(os.path.join(path, nid))

        write_lis = [fn for fn in glob.glob(os.path.join(path, nid)+"\*") if not fn.endswith('.zip')]

        with zipfile.ZipFile(os.path.join(path, nid, 'updated_{}.zip'.format(nid)),'w') as zip: 
            for file in write_lis:
                os.chdir(os.path.join(path, nid))
                zip.write(os.path.join(file.split("\\")[-1])) 
                os.chdir(path)

In [None]:
def get_item_properties_temp(datasetId):
    
    ds = ddh.dataset.get(datasetId)
    
    tags = get_tags(datasetId)
    
    title = ds['title']

        
    lic = ddh.taxonomy.get_keywords('field_license_wbddh',ds['field_license_wbddh']['und'][0]['tid'])[0]
    try:
        desc = ds['body']['und'][0]['value']
    except:
        desc =  ""
        
    item_properties = {
            "description" : desc,
            "title" : title,
            #"url" : dj['path'],
            "tags" : tags,
            "accessInformation" : ddh.taxonomy.get_keywords('field_wbddh_data_class',ds['field_wbddh_data_class']['und'][0]['tid'])[0],
            "licenseInfo" : lic,
            "access" : ddh.taxonomy.get_keywords('field_wbddh_data_class',ds['field_wbddh_data_class']['und'][0]['tid'])[0]
            }
    
    return item_properties

In [None]:
def publish_fl(nid):
    path = r"C:\Users\wb542830\OneDrive - WBG\DEC\DDH\API\RomaniaHub"
    nid = str(nid)
    if os.path.exists(os.path.join(path, nid)):
        ds = ddh.dataset.get(nid)
        file_path = os.path.join(path, nid, 'updated_{}.zip'.format(nid))
        try:
            shpfile = gis.content.add({'access':'org', 'type':'{}'.format('Shapefile')}, file_path, folder='DDH_Datasets')
            shpfile.update(get_item_properties_temp(nid))
            shpfile.share(org=True)
            published_service = shpfile.publish()
            published_service.share(org=True)
            return shpfile.id, published_service.id
        except Exception as e:
            print(nid, '::', e.args)
            return (0, 0)
    else:
        return 0,0

In [None]:
nodeid_ , shpid_, flid_ = [], [], []
for i in list(set(dat.nid.tolist()))[1072:]:
#for i in [98349, 98480]:
    if (dat[dat.nid == i]['feature_id'].iloc[0] not in err_list):
        if (dat[dat.nid == i].shape[0]>1):
            update_fl(i)
            shid, fid = publish_fl(i)
            if fid != 0:
                nodeid_.append(i)
                shpid_.append(shid)
                flid_.append(fid)

In [None]:
import pandas as pd
dat_f = pd.read_csv("Combined_layers_FL.csv")

In [None]:
def add_fl_ddh(nid, fl_id):
    nid = int(nid)
    ds = ddh.dataset.get(nid)

    rs = ddh.dataset.rs_template()

    rs['title'] = ds['title']+'(Feature Service)'
    rs['field_wbddh_data_class'] = ds['field_wbddh_data_class']['und'][0]['tid']
    url_fl = gis.content.get(fl_id).url
    rs['field_link_api'] = url_fl
    ddh.taxonomy.update(rs, {'field_wbddh_resource_type': 'Related Material'})

    num = len(ds['field_resources']['und'])

    try:
        up_ds = ddh.dataset.append_resource(nid, rs, num)
        print("Updated unique identifier: {0}".format(up_ds))
    except ddh.dataset.APIError as err:
        print('ERROR: {}'.format(err.response))

### Accessing AGOL

In [None]:
gis_out = GIS("https://geowb.maps.arcgis.com/", '', '')

In [None]:
shp = gis_out.content.get("")

In [None]:
shp.share(org=True)

In [None]:
props = {'description': "## Overview:\r\nGlobal results from [gridfinder](https://github.com/carderne/gridfinder) model, produced by ESMAP based on joint work with Facebook and others. Uses night-time lights, road networks and existing grid network data to predict the location of transmission and distribution lines globally. Validated in several countries with ~70% accuracy at 1 km.\r\n\r\n## More information:\r\nBlog with brief overview: https://blogs.worldbank.org/energy/using-night-lights-map-electrical-grid-infrastructure\r\nFull research paper: https://www.nature.com/articles/s41597-019-0347-4\r\nVisualization: https://gridfinder.org/\r\n\r\n## The following data are included:\r\n*   **grid.gpkg**: Vectorized predicted distribution and transmission line network, with existing OpenStreetMap lines tagged in the 'source' column\r\n*   **targets.tif**: Binary aster showing locations predicted to be connected to distribution grid. \r\n*   **lv.tif**: Raster of predicted low-voltage infrastructure in kilometres per cell.",
 'title': 'Derived map of global electricity transmission and distribution lines__grid.gpkg',
 'tags': ['energy', 'extractives', 'energydata.info'],
 'accessInformation': 'public',
 'licenseInfo': 'creative commons attribution 4.0',
 'access': 'public'}

In [None]:
shp.update(props)