## Update a Hosted Table

## AOIs: Update conservation concern on species lookup tables.
We have a lookup table per taxa (reptiles, amphibians, birds and mammlas) with a column **conservation concern** of each species, and we have a separate lookup table with a column **has_image**. We need both columns to be on the same table

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
from copy import deepcopy
import os

In [None]:
# env_path = "../../.env"
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value

In [None]:
aol_password = env['ARCGIS_GRETA_PASS']
aol_username = env['ARCGIS_GRETA_USER']

In [None]:
gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password, profile = "eowilson")

In [None]:
## Tables with conservation concern (cc)
cc_keys = {'amphibians':'eb487fb505e34052b4cb9e02f3f7a22c',
       'reptiles':'38356d976d3f43d7a0d2ab91034b054',
       'mammals':'f6e7514c775442b39274d306b54a5952',
       'birds':'71e61cd2211b4670a28bfb14b3693f66'}
## Tables with has_image (hi)
hi_keys = {'amphibians':'a641a4cd269345dea93b8bcb1cb66676',
       'reptiles':'81c72a2a5ee6413699960b4c4bd9540f',
       'mammals':'84d3c71caf97479d85f620a4ee217d68',
       'birds':'4d8698734b654bb9bb7a61d9af314c76'}

In [None]:
def getHTfromId(item_id):
    item = gis.content.get(item_id)
    flayer = item.tables[0]
    sdf = flayer.query().sdf
    return sdf

In [None]:
## amphibians
cc = getHTfromId(cc_keys['amphibians'])
hi = getHTfromId(hi_keys['amphibians'])

In [None]:
cc.head()

In [None]:
hi.head()

In [None]:
### Add conservation concern to hi table
hi = hi.merge(cc[['SliceNumber','conservation_concern']],how='inner',on='SliceNumber')

In [None]:
### Add new field to Hosted service
## Create fields
def createFieldsToBeAdded(flayer, csv_table):
    flayer_fields = flayer.manager.properties.fields
    template_field = dict(deepcopy(flayer_fields[0]))
    sdf = sdf = flayer.query().sdf
    new_field_names = list(csv_table.columns.difference(sdf.columns))
    
    fields_to_be_added = []
    for new_field_name in new_field_names:
        current_field = deepcopy(template_field)
        dt = csv_table[new_field_name].dtypes
        
        if dt == 'O':
            #put the type to character
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeString'
            current_field['length'] = 45000
        if dt == 'int64':
            #put the type to double
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeDouble'
            #current_field['length'] = 8000      

        current_field['name'] = new_field_name.lower()
        current_field['alias'] = new_field_name
        current_field['nullable'] = True
        current_field['editable'] = True
        fields_to_be_added.append(current_field)
    return fields_to_be_added

In [None]:
item = gis.content.get(hi_keys['amphibians'])
flayer = item.tables[0]
fields_to_be_added = createFieldsToBeAdded(flayer, hi)

In [None]:
fields_to_be_added

In [None]:
flayer.manager.add_to_definition({'fields':fields_to_be_added})

In [None]:
#https://developers.arcgis.com/python/sample-notebooks/updating-features-in-a-feature-layer/
def createFeaturesForUpdate(flayer, csv_table, fields_to_be_added, id_field_in_csv, id_field_in_service):
    fset2 = flayer.query()
    features2 = fset2.features
    features_for_update = []
    for country_id in csv_table[id_field_in_csv]:
        try:
            # get the matching row from csv
            matching_row = csv_table.where(csv_table[id_field_in_csv] == country_id).dropna()

            #print(str(country_id) + " Adding additional attributes for: " + matching_row['iso3'].values[0])

            # get the feature to be updated
            assert  len([f for f in features2 if f.attributes[id_field_in_service] == country_id]),  "id not matched"
            original_feature = [f for f in features2 if f.attributes[id_field_in_service] == country_id][0]
            feature_to_be_updated = deepcopy(original_feature)

            # assign the updated values
            for field in fields_to_be_added:
                feature_to_be_updated.attributes[field['name']] = matching_row[field['name']].values[0]
                #add this to the list of features to be updated
                features_for_update.append(feature_to_be_updated)
    
        except:
            print(f"{country_id} not available in service")
    return features_for_update

In [None]:
features_for_update = createFeaturesForUpdate(flayer = flayer ,
                        csv_table = hi,
                        fields_to_be_added =  fields_to_be_added, 
                        id_field_in_csv = "SliceNumber", 
                        id_field_in_service = "SliceNumber")

In [None]:
flayer.edit_features(updates= features_for_update)

In [None]:
## Tables with conservation concern (cc)
cc_keys = {'birds':'71e61cd2211b4670a28bfb14b3693f66'}
## Tables with has_image (hi)
hi_keys = {'birds':'4d8698734b654bb9bb7a61d9af314c76'}

In [None]:
### reptiles
hi_keys = {'reptiles':'81c72a2a5ee6413699960b4c4bd9540f'}
cc_keys = {'reptiles':'38356d976d3f43d7a0d2ab91034b054b'}

In [None]:
for key in cc_keys:
    ## get df from Hosted tables
    cc = getHTfromId(cc_keys[key])
    hi = getHTfromId(hi_keys[key])
    
    ### bring conservation_concern column to base table
    hi = hi.merge(cc[['SliceNumber','conservation_concern']],how='inner',on='SliceNumber')
    
    ### Create New fields
    item = gis.content.get(hi_keys[key])
    flayer = item.tables[0]
    fields_to_be_added = createFieldsToBeAdded(flayer, hi)
    
    ### Add new fields
    flayer.manager.add_to_definition({'fields':fields_to_be_added})
    
    ### Create features to update
    #### triplicate columns, check what has happened?
    if key == 'birds':
        start = np.arange(0,len(hi), step = 2000)
        for i in start:
            features_for_update = createFeaturesForUpdate(flayer = flayer, 
                                                          csv_table = hi[i:i+2000], 
                                                          fields_to_be_added =  fields_to_be_added, 
                                                          id_field_in_csv = "SliceNumber", 
                                                          id_field_in_service = "SliceNumber")
            flayer.edit_features(updates= features_for_update)
    else: 
        features_for_update = createFeaturesForUpdate(flayer = flayer,
                        csv_table = hi,
                        fields_to_be_added =  fields_to_be_added, 
                        id_field_in_csv = "SliceNumber", 
                        id_field_in_service = "SliceNumber")
    
        ### Update Features
        flayer.edit_features(updates= features_for_update)
    

## Add Conservation Concern field to amphibians table 

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
from copy import deepcopy
import os

In [None]:
# env_path = "../../.env"
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value

In [None]:
aol_password = env['ARCGIS_GRETA_PASS']
aol_username = env['ARCGIS_GRETA_USER']

In [None]:
gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password, profile = "eowilson")

In [None]:
## Function to get table from AGOL as sdf
def getHTfromId(item_id):
    item = gis.content.get(item_id)
    flayer = item.tables[0]
    sdf = flayer.query().sdf
    return sdf

In [None]:
## Get sdf of amphibians tables
cc_key = 'eb487fb505e34052b4cb9e02f3f7a22c' # table with conservation concern
hi_key = 'a641a4cd269345dea93b8bcb1cb66676' # table used by FE

cc = getHTfromId(cc_key) 
hi = getHTfromId(hi_key) 


In [None]:
cc.head(5)

In [None]:
hi.head(5)

In [None]:
### Add conservation concern, common name and synonyms to hi table 
hi = hi.merge(cc[['SliceNumber','conservation_concern']],how='inner',on='SliceNumber')
hi.head()

In [None]:
sum(hi['conservation_concern']==0)

In [None]:
### Add new field to Hosted service
## Create fields
def createFieldsToBeAdded(flayer, csv_table):
    flayer_fields = flayer.manager.properties.fields
    template_field = dict(deepcopy(flayer_fields[0]))
    sdf = sdf = flayer.query().sdf
    new_field_names = list(csv_table.columns.difference(sdf.columns))
    
    fields_to_be_added = []
    for new_field_name in new_field_names:
        current_field = deepcopy(template_field)
        dt = csv_table[new_field_name].dtypes
        
        if dt == 'O':
            #put the type to character
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeString'
            current_field['length'] = 45000
        if dt == 'int64':
            #put the type to double
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeDouble'
            #current_field['length'] = 8000      

        current_field['name'] = new_field_name.lower()
        current_field['alias'] = new_field_name
        current_field['nullable'] = True
        current_field['editable'] = True
        fields_to_be_added.append(current_field)
    return fields_to_be_added

In [None]:
# Get table to be updated and compare both to identify fields to be added
item = gis.content.get(hi_key)
flayer = item.tables[0]
fields_to_be_added = createFieldsToBeAdded(flayer, hi)

In [None]:
fields_to_be_added

In [None]:
flayer.manager.add_to_definition({'fields':fields_to_be_added})

In [None]:
#https://developers.arcgis.com/python/sample-notebooks/updating-features-in-a-feature-layer/
def createFeaturesForUpdate(flayer, csv_table, fields_to_be_added, id_field_in_csv, id_field_in_service):
    fset2 = flayer.query()
    features2 = fset2.features
    features_for_update = []
    for country_id in csv_table[id_field_in_csv]:
        try:
            # get the matching row from csv
            matching_row = csv_table.where(csv_table[id_field_in_csv] == country_id).dropna()

            #print(str(country_id) + " Adding additional attributes for: " + matching_row['iso3'].values[0])

            # get the feature to be updated
            assert  len([f for f in features2 if f.attributes[id_field_in_service] == country_id]),  "id not matched"
            original_feature = [f for f in features2 if f.attributes[id_field_in_service] == country_id][0]
            feature_to_be_updated = deepcopy(original_feature)

            # assign the updated values
            for field in fields_to_be_added:
                feature_to_be_updated.attributes[field['name']] = matching_row[field['name']].values[0]
                #add this to the list of features to be updated
                features_for_update.append(feature_to_be_updated)
    
        except:
            print(f"{country_id} not available in service")
    return features_for_update

In [None]:
features_for_update = createFeaturesForUpdate(flayer = flayer ,
                        csv_table = hi,
                        fields_to_be_added =  fields_to_be_added, 
                        id_field_in_csv = "SliceNumber", 
                        id_field_in_service = "SliceNumber")

In [None]:
flayer.edit_features(updates= features_for_update)

## Add common name and synonyms fields
For some reason, when uploading more than one new field at a time, the resulting table in AGOL is incomplete. Therefore, in this case upload the two new fields separately
### Add common name field

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
from copy import deepcopy
import os

#### Prepare table with common name

In [38]:
# Read table with common names shared by Scott (https://eowilson.maps.arcgis.com/home/item.html?id=b09984e429814a0ea7ea5ed44dd3b609)
cn = pd.read_csv('/Users/sofia/Documents/HE_Data/Amphibian_CRF_species_table.csv') 

In [41]:
# Some values need to be modified
cn.loc[cn['Name'] == 'Acris_gryllus', 'common_name'].iloc[0]

'Southern Cricket Frog, Florida  Cricket Frog (<i>A. g. dorsalis</i>), Southern Cricket Frog (<i>A. g. gryllus</i>)'

In [52]:
# Modify values
import re

cn['common_name_array'] = np.nan

for row in range(0,len(cn)):
    if type(cn['common_name'][row]) == str:
        cn['common_name_array'][row] = cn['common_name'][row].split(',')
        l = len(cn['common_name_array'][row])
        if l>1:
            for i in range(0, l):
                a = re.sub(r"\([^()]*\)", "", ((cn['common_name_array'][row])[i]))
                a = a.rstrip().lstrip()
                cn['common_name_array'][row][i] = a

        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cn['common_name_array'][row] = cn['common_name'][row].split(',')


In [53]:
# It returns a list of strings
(cn.loc[cn['Name'] == 'Acris_gryllus', 'common_name_array'].iloc[0])

['Southern Cricket Frog', 'Florida  Cricket Frog', 'Southern Cricket Frog']

In [247]:
# ## Function that removes parenthesis (and anything inside them), and leading and trailing spaces
# import re
# def make_array(row):
#     row_list = row.split(',')
#     row_array= []
#     for i in row_list: 
#         row_array.append(re.sub(r'\([^()]*\)', '', i).rstrip().lstrip())
#     return row_array

In [248]:
# ## Apply function to all rows in table and create an array of names
# cn['common_name_array']= np.nan
# cn['common_name_array'] = cn['common_name'].apply(lambda row : json.dumps(make_array(row)) if type(row)==str else row)
# cn['synonyms_array']= np.nan
# cn['synonyms_array']= cn['synonyms'].apply(lambda row : json.dumps(make_array(row)) if type(row)==str else row)

When applying this function some characters in the string change

#### Update amphibians table with common names

In [2]:
# Log in AGOL
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value
    
aol_password = env['ARCGIS_GRETA_PASS']
aol_username = env['ARCGIS_GRETA_USER']

gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password, profile = "eowilson")

In [None]:
## Function to get table from AGOL as sdf
def getHTfromId(item_id):
    item = gis.content.get(item_id)
    flayer = item.tables[0]
    sdf = flayer.query().sdf
    return sdf

In [None]:
# Call hosted table to update
hi_key = 'a641a4cd269345dea93b8bcb1cb66676' 
hi = getHTfromId(hi_key)

In [None]:
# Merge both tables to create new field in hosted table
hi = hi.merge(cn[['SliceNumber','common_name_array']],how='inner',on='SliceNumber')
hi.tail()

In [None]:
### Add new field to Hosted service
## Create fields
def createFieldsToBeAdded(flayer, csv_table):
    flayer_fields = flayer.manager.properties.fields
    template_field = dict(deepcopy(flayer_fields[0]))
    sdf = sdf = flayer.query().sdf
    new_field_names = list(csv_table.columns.difference(sdf.columns))
    
    fields_to_be_added = []
    for new_field_name in new_field_names:
        current_field = deepcopy(template_field)
        dt = csv_table[new_field_name].dtypes
        
        if dt == 'O':
            #put the type to character
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeString'
            current_field['length'] = 45000
        if dt == 'int64':
            #put the type to double
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeDouble'
            #current_field['length'] = 8000      

        current_field['name'] = new_field_name.lower()
        current_field['alias'] = new_field_name
        current_field['nullable'] = True
        current_field['editable'] = True
        fields_to_be_added.append(current_field)
    return fields_to_be_added

In [None]:
# Get table to be updated and compare both to identify fields to be added
item = gis.content.get(hi_key)
flayer = item.tables[0]
fields_to_be_added = createFieldsToBeAdded(flayer, hi)

In [None]:
# Check fields to be added
fields_to_be_added

In [None]:
flayer.manager.add_to_definition({'fields':fields_to_be_added})

In [None]:
# Function to create features for update
#https://developers.arcgis.com/python/sample-notebooks/updating-features-in-a-feature-layer/
def createFeaturesForUpdate(flayer, csv_table, fields_to_be_added, id_field_in_csv, id_field_in_service):
    fset2 = flayer.query()
    features2 = fset2.features
    features_for_update = []
    for country_id in csv_table[id_field_in_csv]:
        try:
            # get the matching row from csv
            matching_row = csv_table.where(csv_table[id_field_in_csv] == country_id).dropna()

            #print(str(country_id) + " Adding additional attributes for: " + matching_row['iso3'].values[0])

            # get the feature to be updated
            assert  len([f for f in features2 if f.attributes[id_field_in_service] == country_id]),  "id not matched"
            original_feature = [f for f in features2 if f.attributes[id_field_in_service] == country_id][0]
            feature_to_be_updated = deepcopy(original_feature)

            # assign the updated values
            for field in fields_to_be_added:
                feature_to_be_updated.attributes[field['name']] = matching_row[field['name']].values[0]
                #add this to the list of features to be updated
                features_for_update.append(feature_to_be_updated)
    
        except:
            print(f"{country_id} not available in service")
    return features_for_update

In [None]:
# Create features for update
features_for_update = createFeaturesForUpdate(flayer = flayer ,
                        csv_table = hi,
                        fields_to_be_added =  fields_to_be_added, 
                        id_field_in_csv = "SliceNumber", 
                        id_field_in_service = "SliceNumber")

In [None]:
# Update table
flayer.edit_features(updates= features_for_update)

#### Add synonyms_array field

In [None]:
hi_key = 'a641a4cd269345dea93b8bcb1cb66676' # table used by FE
hi = getHTfromId(hi_key)


In [None]:
hi = hi.merge(cn[['SliceNumber','synonyms_array']],how='inner',on='SliceNumber')

In [None]:
### Add new field to Hosted service
## Create fields
def createFieldsToBeAdded(flayer, csv_table):
    flayer_fields = flayer.manager.properties.fields
    template_field = dict(deepcopy(flayer_fields[0]))
    sdf = sdf = flayer.query().sdf
    new_field_names = list(csv_table.columns.difference(sdf.columns))
    
    fields_to_be_added = []
    for new_field_name in new_field_names:
        current_field = deepcopy(template_field)
        dt = csv_table[new_field_name].dtypes
        
        if dt == 'O':
            #put the type to character
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeString'
            current_field['length'] = 45000
        if dt == 'int64':
            #put the type to double
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeDouble'
            #current_field['length'] = 8000      

        current_field['name'] = new_field_name.lower()
        current_field['alias'] = new_field_name
        current_field['nullable'] = True
        current_field['editable'] = True
        fields_to_be_added.append(current_field)
    return fields_to_be_added

In [None]:
# Get table to be updated and compare both to identify fields to be added
item = gis.content.get(hi_key)
flayer = item.tables[0]
fields_to_be_added = createFieldsToBeAdded(flayer, hi)

In [None]:
fields_to_be_added

In [None]:
flayer.manager.add_to_definition({'fields':fields_to_be_added})

In [None]:
#https://developers.arcgis.com/python/sample-notebooks/updating-features-in-a-feature-layer/
def createFeaturesForUpdate(flayer, csv_table, fields_to_be_added, id_field_in_csv, id_field_in_service):
    fset2 = flayer.query()
    features2 = fset2.features
    features_for_update = []
    for country_id in csv_table[id_field_in_csv]:
        try:
            # get the matching row from csv
            matching_row = csv_table.where(csv_table[id_field_in_csv] == country_id).dropna()

            #print(str(country_id) + " Adding additional attributes for: " + matching_row['iso3'].values[0])

            # get the feature to be updated
            assert  len([f for f in features2 if f.attributes[id_field_in_service] == country_id]),  "id not matched"
            original_feature = [f for f in features2 if f.attributes[id_field_in_service] == country_id][0]
            feature_to_be_updated = deepcopy(original_feature)

            # assign the updated values
            for field in fields_to_be_added:
                feature_to_be_updated.attributes[field['name']] = matching_row[field['name']].values[0]
                #add this to the list of features to be updated
                features_for_update.append(feature_to_be_updated)
    
        except:
            print(f"{country_id} not available in service")
    return features_for_update

In [None]:
features_for_update = createFeaturesForUpdate(flayer = flayer ,
                        csv_table = hi,
                        fields_to_be_added =  fields_to_be_added, 
                        id_field_in_csv = "SliceNumber", 
                        id_field_in_service = "SliceNumber")

In [None]:
flayer.edit_features(updates= features_for_update)