# Add new columns with translations to NRC table
With this notebook, we will add the columnz *sentence_es*, *sentence_fr* and *sentence_pt* to the feature layer containing the information for the NRC https://eowilson.maps.arcgis.com/home/item.html?id=509a8f697e074523a0c0b060b9835c2c#overview

## Table of contents
1. [First steps](#first)
    1. [Import packages](#packages)
    2. [Connect to ESRI](#esri)
    3. [Utils](#utils)
2. [Clone service](#clone) 
3. [Import layer as dataframe](#dataframe)
4. [Translate sentences](#translations)
5. [Update hosted layer](#update)

<a id='first'></a>
## First steps

<a id='packages'></a>
### Import packages

In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
from copy import deepcopy
import os
from deep_translator import GoogleTranslator

<a id='esri'></a>
### Connect to ESRI

In [3]:
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value

In [4]:
aol_password = env['ARCGIS_GRETA_PASS']
aol_username = env['ARCGIS_GRETA_USER']

In [5]:
gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password, profile = "eowilson")

Keyring backend being used (keyring.backends.OS_X.Keyring (priority: 5)) either failed to install or is not recommended by the keyring project (i.e. it is not secure). This means you can not use stored passwords through GIS's persistent profiles. Note that extra system-wide steps must be taken on a Linux machine to use the python keyring module securely. Read more about this at the keyring API doc (http://bit.ly/2EWDP7B) and the ArcGIS API for Python doc (http://bit.ly/2CK2wG8).


<a id='utils'></a>
### Utils

In [6]:
# To convert an existing hosted layer in an spatial dataframe
def getHTfromId(item_id):
    item = gis.content.get(item_id)
    flayer = item.layers[0]
    sdf = flayer.query().sdf
    return sdf

In [7]:
### To create new field(s) in a hosted service
def createFieldsToBeAdded(flayer, csv_table):
    flayer_fields = flayer.manager.properties.fields
    template_field = dict(deepcopy(flayer_fields[0]))
    sdf = sdf = flayer.query().sdf
    new_field_names = list(csv_table.columns.difference(sdf.columns))
    
    fields_to_be_added = []
    for new_field_name in new_field_names:
        current_field = deepcopy(template_field)
        dt = csv_table[new_field_name].dtypes
        
        if dt == 'O':
            #put the type to character
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeString'
            current_field['length'] = 45000
        if dt == 'int64':
            #put the type to double
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeDouble'
            #current_field['length'] = 8000      

        current_field['name'] = new_field_name.lower()
        current_field['alias'] = new_field_name
        current_field['nullable'] = True
        current_field['editable'] = True
        fields_to_be_added.append(current_field)
    return fields_to_be_added

In [8]:
# To identify features to be updated. See https://developers.arcgis.com/python/sample-notebooks/updating-features-in-a-feature-layer/ for reference
def createFeaturesForUpdate(flayer, csv_table, fields_to_be_added, id_field_in_csv, id_field_in_service):
    fset2 = flayer.query()
    features2 = fset2.features
    features_for_update = []
    for country_id in csv_table[id_field_in_csv]:
        try:
            # get the matching row from csv
            matching_row = csv_table.where(csv_table[id_field_in_csv] == country_id).dropna(axis=0, how='all', thresh=None, subset=None, inplace=False)

            #print(str(country_id) + " Adding additional attributes for: " + matching_row['iso3'].values[0])

            # get the feature to be updated
            assert  len([f for f in features2 if f.attributes[id_field_in_service] == country_id]),  "id not matched"
            original_feature = [f for f in features2 if f.attributes[id_field_in_service] == country_id][0]
            feature_to_be_updated = deepcopy(original_feature)

            # assign the updated values
            for field in fields_to_be_added:
                feature_to_be_updated.attributes[field['name']] = matching_row[field['name']].values[0]
                #add this to the list of features to be updated
                features_for_update.append(feature_to_be_updated)
    
        except:
            print(f"{country_id} not available in service")
    return features_for_update

------
<a id='clone'></a>
### Clone service: create a duplicate from the gadm0 layer to test changes
Before updating any layers used in production, it is a good practice to test the notebook using a cloned dataset. In this case, we are going to clone gadm0_precalculated and add a new field called *alternative_names*. Once we are sure it works, we can run the notebook using the ID or the production layer (and delete the cloned dataset)

In [9]:
# nrc = gis.content.get("509a8f697e074523a0c0b060b9835c2c")
# nrc

In [10]:
# cloning_list = [nrc]
# cloning_list

In [11]:
# # Clone items 
# cloned_gadm = gis.content.clone_items(items=cloning_list)

--------
<a id='dataframe'></a>
### Import layer as dataframe

In [24]:
# Bring the desired layer as a dataframe
nrc = getHTfromId('509a8f697e074523a0c0b060b9835c2c') #id of the hosted service (or the cloned service) we want to modify
nrc.head(1)

Unnamed: 0,GID_0,NAME_0,Area_Country,x,y,jpg_url,has_priority,has_raisg,GlobalID,max_highlited_sp,...,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar,filter_similar_mar,ObjectId,SHAPE
0,ABW,Aruba,181.94,-69.97,12.51,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,4.0,...,1648,0,0,1678,0,29970.3,23.4,"{""filter_Area_Country"":[""CUW"",""GRD"",""LVA"",""DMA...",1,"{""x"": -7789024.770805351, ""y"": 1403805.4347732..."


In [25]:
nrc.sentence[0]

'Aruba has high biodiversity rarity of terrestrial land vertebrates at a global scale.  When analysed as single taxons, the rarity of mammals and reptiles is also high.  Most of the country is used for human activities, in its majority by rainfed agriculture. The rarity of marine fish and mammals is also high.'

In [26]:
sum(nrc['sentence'].isna())

0

--------
<a id='translations'></a>
### Translate sentences

In [34]:
# Create new columns
nrc['sentence_es'] = ""
nrc['sentence_fr'] = ""
nrc['sentence_pt'] = ""
nrc.head(1)

Unnamed: 0,GID_0,NAME_0,Area_Country,x,y,jpg_url,has_priority,has_raisg,GlobalID,max_highlited_sp,...,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar,filter_similar_mar,ObjectId,SHAPE,sentence_es,sentence_fr,sentence_pt
0,ABW,Aruba,181.94,-69.97,12.51,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,4.0,...,1678,0,29970.3,23.4,"{""filter_Area_Country"":[""CUW"",""GRD"",""LVA"",""DMA...",1,"{""x"": -7789024.770805351, ""y"": 1403805.4347732...",,,


In [35]:
# Translate sentences to Spanish, French and Portuguese
for i in range(len(nrc)):
    nrc.at[i, 'sentence_es'] = GoogleTranslator(source='en', target='es').translate(nrc.sentence[i])
    nrc.at[i, 'sentence_fr'] = GoogleTranslator(source='en', target='fr').translate(nrc.sentence[i])
    nrc.at[i, 'sentence_pt'] = GoogleTranslator(source='en', target='pt').translate(nrc.sentence[i])

In [36]:
nrc.head(5)

Unnamed: 0,GID_0,NAME_0,Area_Country,x,y,jpg_url,has_priority,has_raisg,GlobalID,max_highlited_sp,...,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar,filter_similar_mar,ObjectId,SHAPE,sentence_es,sentence_fr,sentence_pt
0,ABW,Aruba,181.94,-69.97,12.51,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,4.0,...,1678,0,29970.3,23.4,"{""filter_Area_Country"":[""CUW"",""GRD"",""LVA"",""DMA...",1,"{""x"": -7789024.770805351, ""y"": 1403805.4347732...",Aruba tiene una alta rareza de biodiversidad d...,Aruba a une grande rareté de la biodiversité d...,Aruba tem alta raridade de biodiversidade de v...
1,AFG,Afghanistan,643857.5,66.03,33.83,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,5.0,...,0,0,,,,2,"{""x"": 7350425.977079854, ""y"": 4005997.95753182...",Afganistán tiene una alta rareza de biodiversi...,L'Afghanistan a une grande rareté de la biodiv...,O Afeganistão tem alta raridade de biodiversid...
2,AGO,Angola,1247422.0,17.58,-12.34,https://live.staticflickr.com/3787/13698381215...,1.0,0.0,174ce788-4f67-4ae0-922f-d2ddac87f8c3,24.0,...,1418,2,495859.8,23.4,"{""filter_Area_Country"":[""NFK"",""BMU"",""CCK"",""VEN...",3,"{""x"": 1956996.648145749, ""y"": -1384427.2472995...",Angola tiene una alta rareza de biodiversidad ...,L'Angola a une grande rareté de la biodiversit...,Angola tem uma elevada raridade de biodiversid...
3,AIA,Anguilla,83.3,-63.05,18.21,https://live.staticflickr.com/8063/8194570372_...,1.0,0.0,9f5f24d8-8b21-49a8-8f55-90b47cf63e7b,2.0,...,1533,0,90157.96,23.4,"{""filter_Area_Country"":[""ERI"",""AZE"",""FIN"",""VGB...",4,"{""x"": -7018693.894515898, ""y"": 2062143.3779198...",Anguila tiene una alta rareza de biodiversidad...,Anguilla a une grande rareté de la biodiversit...,Anguilla tem alta raridade de biodiversidade d...
4,ALA,Åland,1506.26,19.97,60.24,https://p1.pxfuel.com/preview/294/670/561/alan...,1.0,0.0,2b45351b-a335-490e-914e-7748d4f41f66,1.0,...,0,0,,,,5,"{""x"": 2223050.231141673, ""y"": 8453366.18052892...","En Åland, la mayor parte del país se utiliza p...","À Åland, la majeure partie du pays est utilisé...","Em Åland, a maior parte do país é usada para a..."


In [40]:
for i in range(len(nrc)):
    nrc.at[i, 'sentence_es'] = nrc.at[i, 'sentence_es'].replace("terrestres terrestres", "terrestres") 
    nrc.at[i, 'sentence_fr'] = nrc.at[i, 'sentence_fr'].replace("terrestres terrestres", "terrestres") 
    nrc.at[i, 'sentence_pt'] = nrc.at[i, 'sentence_pt'].replace("terrestres terrestres", "terrestres") 

--------
<a id='update'></a>
### Update layer

In [42]:
# Call service to be updated and compare it with the sdf we have modified to identify field(s) to be added
item = gis.content.get('509a8f697e074523a0c0b060b9835c2c') #id of layer to update (original or cloned)
flayer = item.layers[0]
fields_to_be_added = createFieldsToBeAdded(flayer, nrc) 
fields_to_be_added # This is the field(s) that is different between the new df and hosted layer

[{'name': 'sentence_es',
  'type': 'esriFieldTypeString',
  'actualType': 'nvarchar',
  'alias': 'sentence_es',
  'sqlType': 'sqlTypeOther',
  'length': 45000,
  'nullable': True,
  'editable': True,
  'visible': True,
  'domain': None,
  'defaultValue': None},
 {'name': 'sentence_fr',
  'type': 'esriFieldTypeString',
  'actualType': 'nvarchar',
  'alias': 'sentence_fr',
  'sqlType': 'sqlTypeOther',
  'length': 45000,
  'nullable': True,
  'editable': True,
  'visible': True,
  'domain': None,
  'defaultValue': None},
 {'name': 'sentence_pt',
  'type': 'esriFieldTypeString',
  'actualType': 'nvarchar',
  'alias': 'sentence_pt',
  'sqlType': 'sqlTypeOther',
  'length': 45000,
  'nullable': True,
  'editable': True,
  'visible': True,
  'domain': None,
  'defaultValue': None}]

In [43]:
# Add the new column (empty) in the hosted service
flayer.manager.add_to_definition({'fields':fields_to_be_added})

{'success': True}

In [44]:
# Identify differences between the field we just created in the hosted service and the column with values in the modified sdf
features_for_update = createFeaturesForUpdate(flayer = flayer ,
                        csv_table = nrc,
                        fields_to_be_added =  fields_to_be_added, 
                        id_field_in_csv = "GID_0", 
                        id_field_in_service = "GID_0")

In [45]:
# Add the values to the hosted service
flayer.edit_features(updates= features_for_update)

{'addResults': [],
 'updateResults': [{'objectId': 1,
   'uniqueId': 1,
   'globalId': None,
   'success': True},
  {'objectId': 1, 'uniqueId': 1, 'globalId': None, 'success': True},
  {'objectId': 1, 'uniqueId': 1, 'globalId': None, 'success': True},
  {'objectId': 2, 'uniqueId': 2, 'globalId': None, 'success': True},
  {'objectId': 2, 'uniqueId': 2, 'globalId': None, 'success': True},
  {'objectId': 2, 'uniqueId': 2, 'globalId': None, 'success': True},
  {'objectId': 3, 'uniqueId': 3, 'globalId': None, 'success': True},
  {'objectId': 3, 'uniqueId': 3, 'globalId': None, 'success': True},
  {'objectId': 3, 'uniqueId': 3, 'globalId': None, 'success': True},
  {'objectId': 4, 'uniqueId': 4, 'globalId': None, 'success': True},
  {'objectId': 4, 'uniqueId': 4, 'globalId': None, 'success': True},
  {'objectId': 4, 'uniqueId': 4, 'globalId': None, 'success': True},
  {'objectId': 5, 'uniqueId': 5, 'globalId': None, 'success': True},
  {'objectId': 5, 'uniqueId': 5, 'globalId': None, 'succes

---
#### 
---