### National Report Cards: species pictures

### 1. Add 2 new columns to vertebrate table:
1. the species url if it has a picture
2. the highlight order for the species. The cirteria for species selection is: 
    1. less stewardship 
    2. smaller range 

In [None]:
import arcgis
from arcgis.gis import GIS
import pandas as pd

In [None]:
# ### Run this just once
# env_path = "/Users/tamarahuete/opt/anaconda3/envs/arcgis/.env"
# env_path
# with open(env_path) as f:
#     env = {}
#     for line in f:
#         env_key, _val = line.split("=")
#         env_value = _val.split("\n")[0]
#         env[env_key] = env_value
# aol_password = env['aol_key']
# aol_username = env['aol_username']


# # here we are using the eowilson arcgis online, change to the appropriate gis online organisation account.
# gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password, profile = "half_earth_profile")

In [None]:
gis = GIS(profile = "half_earth_profile")

In [None]:
import re
def create_strict_reg_exp(to_search):
    try:
        reg_exp = f"^{to_search}$"
        #logging.info(f"regular expression is: {reg_exp}")
    except:
        print("There was a problem with the string.")
    return reg_exp

In [None]:
def findItemGetID(csvName, gis):
    try:
        searched_item = gis.content.search(csvName, item_type = "Feature Layer")
        for i in searched_item:
            reg_exp = create_strict_reg_exp(csvName)
            if re.search(reg_exp, i.title)!= None:    
                #logging.info(f"{csvName} has the id: {i.id}")
                return i.id
    except:
        print("There was a problem finding the item")

In [None]:
def getFlayerFromID(item_id):
    item = gis.content.get(item_id)
    flayer = item.layers[0]
    return flayer

In [None]:
def getSDFfromFlayer(flayer):
    sdf = pd.DataFrame.spatial.from_layer(flayer)
    return sdf

In [90]:
#### Get the ID of the table. No extension needed. Not working for some reason
sp_country_id  = findItemGetID('Terrestrial_vertebrates_by_country_20200617_stewardship',gis)
#sp_country_id ='159c6daf94f0498883186d052f033759'

In [None]:
spc_flayer =getFlayerFromID(sp_country_id)
spc_flayer

In [None]:
#sp_widget_id  = '4688587cf07748b2964e9618b31b66e5'
spw_flayer =getFlayerFromID(sp_widget_id)
spw_flayer

In [None]:
sdf_country = pd.DataFrame.spatial.from_layer(spc_flayer)
#sdf_widget=pd.DataFrame.spatial.from_layer(spw_flayer)

In [None]:
#### returns a sdf from the query
#### For SQL queries very important. Outer "", inner ''
df = spw_flayer.query(where="url_sp like 'https%'").sdf
#takes 20 min

In [None]:
df.head()
# len(df)
# df.shape

In [None]:
#### keep a subset of the table with just necessary columns
df_simple = df[['species_name','url_sp']]
df_simple.head()

In [None]:
## eliminate duplicated fields
df_unique = df_simple.drop_duplicates(keep='first')

In [None]:
df.shape #(1044875, 13)
df_unique.shape #(11425, 2)

In [None]:
sdf_country.head()

In [None]:
sp_with_urls =pd.merge(left=sdf_country, right = df_unique, left_on='species',right_on = 'species_name')

In [None]:
len(sp_with_urls) ### 75449 

In [None]:
sp_with_urls.head()

In [None]:
df_sorted= sp_with_urls.sort_values(by = ['countryname','speciesgroup','stewardship','AREA_KM2'])
df_sorted.head()

In [None]:
### addd new column with ranking
df_sorted['highlight'] = df_sorted.groupby(['countryname','speciesgroup']).cumcount()+1

In [None]:
df_sorted.head(10)

In [None]:
df_sorted.to_csv('species_table_highlights_for_url.csv')

In [None]:
#### upload table back from csv
df_sorted = pd.read_csv('species_table_highlights_for_url.csv')
df_sorted = df_sorted.drop(columns=['Unnamed: 0'])
df_sorted.head()

In [None]:
#### merge with the original table on arcGIS based on the Object ID. Left merge
df_sorted.sort_values(by = ['ObjectId']).head()

In [None]:
df_sorted_reduced = df_sorted[['ObjectId','url_sp','highlight']]
updated =sdf_country.merge(df_sorted_reduced,on='ObjectId',how='left')

In [None]:
updated.head()

In [None]:
len(updated)==len(sdf_country)

In [None]:
### keep a local copy
updated.to_csv('species_table_highlights_for_url_all_ObjectId.csv')

### 2. Add new columns to vertebrate table in ArcGIS
We have a table with the species highlight ranking and urls for pictures. 
Only missing attaching these fields to the original table in ArcGIS
Follow steps in [this notebook](https://github.com/Vizzuality/he-scratchfolder/blob/master/addFieldsToFeatureService.ipynb) and instructions on [this wiki](https://github.com/Vizzuality/sci-team-wiki/wiki/ESRI#adding-new-fields-to-the-service-notebook-with-example)

In [None]:
import arcgis
from arcgis.gis import GIS, GroupManager
from arcgis.features import FeatureLayerCollection
from arcgis.features import FeatureLayer
from copy import deepcopy
from arcgis import geometry
import re
from pprint import pprint
import pandas as pd

In [None]:
gis = GIS(profile = "half_earth_profile")

In [None]:
def createFieldsToBeAdded(flayer, csv_table):
    flayer_fields = flayer.manager.properties.fields
    template_field = dict(deepcopy(flayer_fields[0]))
    sdf = getSDFfromFlayer(flayer)
    new_field_names = list(csv_table.columns.difference(sdf.columns))
    
    fields_to_be_added = []
    for new_field_name in new_field_names:
        current_field = deepcopy(template_field)
        dt = csv_table[new_field_name].dtypes
        
        if dt == 'O':
            #put the type to character
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeString'
            #current_field['length'] = 8000
        if dt == 'float64':
            #put the type to double
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeDouble'
            #current_field['length'] = 8000      

        current_field['name'] = new_field_name.lower()
        current_field['alias'] = new_field_name
        current_field['nullable'] = True
        current_field['editable'] = True
        fields_to_be_added.append(current_field)
    return fields_to_be_added

In [None]:
def createFeaturesForUpdate(flayer, csv_table, fields_to_be_added, id_field_in_csv, id_field_in_service):
    fset2 = flayer.query()
    features2 = fset2.features
    features_for_update = []
    for country_id in csv_table[id_field_in_csv]:
        try:
            # get the matching row from csv
            matching_row = csv_table.where(csv_table[id_field_in_csv] == country_id).dropna()

            #print(str(country_id) + " Adding additional attributes for: " + matching_row['iso3'].values[0])

            # get the feature to be updated
            assert  len([f for f in features2 if f.attributes[id_field_in_service] == country_id]),  "id not matched"
            original_feature = [f for f in features2 if f.attributes[id_field_in_service] == country_id][0]
            feature_to_be_updated = deepcopy(original_feature)

            # assign the updated values
            for field in fields_to_be_added:
                feature_to_be_updated.attributes[field['name']] = matching_row[field['name']].values[0]
                #add this to the list of features to be updated
                features_for_update.append(feature_to_be_updated)
            #print(str(country_id) + " Done additional attributes for: " + matching_row['countryname'].values[0])
    
        except:
            print(f"{country_id} not available in service")
    return features_for_update

In [None]:
item_id  = findItemGetID('Terrestrial_vertebrates_by_country_20200617_stewardship',gis)

In [None]:
flayer = getFlayerFromID(item_id)

In [None]:
flayer

In [None]:
csv_table = df_sorted[['ObjectId', 'url_sp','highlight']]

In [None]:
csv_table.head()

In [None]:
fields_to_be_added = createFieldsToBeAdded(flayer, csv_table)

In [None]:
fields_to_be_added

In [None]:
flayer.manager.add_to_definition({'fields':fields_to_be_added})

In [None]:
### started at 12:30, updating 162710 features, ended 16:00!!! (and failed: Parameters not valid for edit_features, because fields to be added were empty [])
### start at 16:10 - 20:00 
### 8:20 - 11:30
features_for_update = createFeaturesForUpdate(flayer, csv_table, fields_to_be_added, id_field_in_csv = "ObjectId", id_field_in_service = "ObjectId")

In [None]:
len(features_for_update) #150898?

In [None]:
150898 / 8000

In [None]:
len(features_for_update[0:200])
#0:1000
#1001-3000

In [None]:
csv_table.loc[15,:]

In [81]:
import numpy as np
i =68
start =int(round(np.linspace(start=0, stop=len(features_for_update), num=70)[i],0))
end=int(round(np.linspace(start=0, stop=len(features_for_update), num=70)[i+1],0))
print(f'{start}+{end}')
print(end-start)

148711+150898
2187


In [84]:
for i in range(48,68):
    start =int(round(np.linspace(start=0, stop=len(features_for_update), num=70)[i],0))
    end=int(round(np.linspace(start=0, stop=len(features_for_update), num=70)[i+1],0))
    flayer.edit_features(updates= features_for_update[start:end])
    print(f'finished {i} last feature = {end}')

finished 48 last feature = 107159
finished 49 last feature = 109346
finished 50 last feature = 111533
finished 51 last feature = 113720
finished 52 last feature = 115907
finished 53 last feature = 118094
finished 54 last feature = 120281
finished 55 last feature = 122468
finished 56 last feature = 124655
finished 57 last feature = 126842
finished 58 last feature = 129029
finished 59 last feature = 131216
finished 60 last feature = 133403
finished 61 last feature = 135590
finished 62 last feature = 137776
finished 63 last feature = 139963
finished 64 last feature = 142150
finished 65 last feature = 144337
finished 66 last feature = 146524
finished 67 last feature = 148711


In [None]:
flayer.edit_features(updates= features_for_update) ### API cannot handle 160k requests. I need to slip them up in blocks of 2000

In [87]:
for i in range(68,69):
    start =int(round(np.linspace(start=0, stop=len(features_for_update), num=70)[i],0))
    end=int(round(np.linspace(start=0, stop=len(features_for_update), num=70)[i+1],0))
    flayer.edit_features(updates= features_for_update[start:end])
    print(f'finished {i} last feature = {end}')

finished 68 last feature = 150898


In [None]:
### test fields to be deleted from API (I have not tested it)
json = {
    "fields" : [
    {
      "name" : "POP90_SQMI"
    }
  ]
}

result = fl.manager.delete_from_definition(json)
print(result)