In [1]:
from arcgis.gis import GIS
from arcgis.features import FeatureLayerCollection
from arcgis.features import FeatureLayer
import json
import pprint

In [2]:
gis = GIS(profile = "eowilson")

In [3]:
import re
def create_strict_reg_exp(to_search):
    try:
        reg_exp = f"^{to_search}$"
        #logging.info(f"regular expression is: {reg_exp}")
    except:
        print("There was a problem with the string.")
    return reg_exp

In [4]:
def findItemGetID(csvName, gis, item_type = "Feature Layer"):
    try:
        searched_item = gis.content.search(csvName, item_type = item_type)
        for i in searched_item:
            reg_exp = create_strict_reg_exp(csvName)
            if re.search(reg_exp, i.title)!= None:    
                #logging.info(f"{csvName} has the id: {i.id}")
                return i.id
    except:
        print("There was a problem finding the item")

In [5]:
def getFlayerFromID(item_id):
    item = gis.content.get(item_id)
    flayer = item.layers[0]
    return flayer

In [6]:
import pandas as pd
def getSDFfromFlayer(flayer):
    sdf = pd.DataFrame.spatial.from_layer(flayer)
    return sdf

# Get the item and get an SDF

In [7]:
item_id = findItemGetID("gadm_centroid", gis)

In [8]:
flayer = getFlayerFromID(item_id)

In [9]:
sdf = getSDFfromFlayer(flayer)

In [10]:
sdf.shape

(252, 50)

In [None]:
#sdf.head()

# Get nearest countries
The sdf only has the centroids of the countries. We could calculate the distance between all the points: 255 x 255 matrix and then keep the top 20 of closest. Check [this resource](https://kanoki.org/2019/12/27/how-to-calculate-distance-in-python-and-pandas-using-scipy-spatial-and-distance-functions/) to calculate distance. 

In [11]:
from math import radians
import pandas as pd
import numpy as np
from sklearn.neighbors import DistanceMetric

In [12]:
df_coord = pd.DataFrame(data = sdf['GID_0'])

In [13]:
y_list = []
x_list = []
for index, i in sdf.iterrows():
    y_list.append(i['SHAPE'].y)
    x_list.append(i['SHAPE'].x)

In [14]:
df_coord['x'] = x_list
df_coord['y'] = y_list

In [15]:
df_coord['lat'] = np.radians(df_coord['y'])
df_coord['lon'] = np.radians(df_coord['x'])

In [16]:
dist = DistanceMetric.get_metric('haversine')

In [17]:
dist_df = pd.DataFrame(dist.pairwise(df_coord[['lat','lon']].to_numpy())*6373,  columns=df_coord.GID_0.unique(), index=df_coord.GID_0.unique())

In [18]:
dist_df.shape

(252, 252)

Next is to sort the table for each point, 0 will be the same country. So we take the 1:11 and add the row names to a dictionary and boom we have the names of the countries. 

dist_df_sort = dist_df
neighbour_dict = dict.fromkeys(dist_df_sort.columns.values)
for key in neighbour_dict:
    dist_df_sort = dist_df_sort.sort_values(by = [key])
    sub = dist_df_sort[key][0:11]
    vals = sub.index.values.tolist()
    neighbour_dict[key] = json.dumps(vals)
    #break
#neighbour_dict    

In [20]:
#neighbour_dict

# Get shared stewardship countries

## Get iso code from country name

In [None]:
#sp_table = pd.read_csv("/Users/gretacvega/Documents/GitHub/he-scratchfolder/data/Terrestrial_vertebrates_by_country_20200617.csv")

In [None]:
#code_name = sp_table[["countryname", "iso3"]].drop_duplicates()

In [None]:
#code_name.shape

In [None]:
#table_item_id = findItemGetID("stewardship_matrix", gis)
#item = gis.content.get(table_item_id)
#tlayer = item.tables[0]
#tlayer
#tsdf = pd.DataFrame.spatial.from_layer(tlayer)

```{R}
library(reshape2)
library(dplyr)
d = read.csv("NRC_species_data_20200817.csv")
head(d)
d_wide =d %>% 
  select(species, iso3) %>% 
  dcast(iso3~species, fun.aggregate = length)

clean_mat = as.matrix(d_wide[,-1])
dimnames(clean_mat)[[1]] = as.character(d_wide$iso3)
df1.mt <- as.matrix(t(clean_mat))
df1.adj <- t(df1.mt) %*% df1.mt
diag(df1.adj)=0

dim(df1.adj)
write.csv(df1.adj, "country_stewardship_matrix_20200817.csv")
```

In [21]:
tsdf = pd.read_csv("/Users/gretacvega/Documents/GitHub/he-scratchfolder/data/country_stewardship_matrix_20200817.csv")

In [22]:
tsdf.shape

(255, 256)

In [23]:
#tsdf.head()

In [24]:
#tsdf_code = pd.merge(left = code_name, right = tsdf, left_on = "countryname", right_on = "Country_index" ).drop(['Country_index'], axis=1)

In [25]:
tsdf.columns.values

array(['iso3', 'ABW', 'AFG', 'AGO', 'AIA', 'ALA', 'ALB', 'AND', 'ARE',
       'ARG', 'ARM', 'ASM', 'ATA', 'ATF', 'ATG', 'AUS', 'AUT', 'AZE',
       'BDI', 'BEL', 'BEN', 'BES', 'BFA', 'BGD', 'BGR', 'BHR', 'BHS',
       'BIH', 'BLM', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA', 'BRB', 'BRN',
       'BTN', 'BVT', 'BWA', 'CAF', 'CAN', 'CCK', 'CHE', 'CHL', 'CHN',
       'CIV', 'CMR', 'COD', 'COG', 'COK', 'COL', 'COM', 'CPV', 'CRI',
       'CUB', 'CUW', 'CXR', 'CYM', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA',
       'DNK', 'DOM', 'DZA', 'ECU', 'EGY', 'ERI', 'ESH', 'ESP', 'EST',
       'ETH', 'FIN', 'FJI', 'FLK', 'FRA', 'FRO', 'FSM', 'GAB', 'GBR',
       'GEO', 'GGY', 'GHA', 'GIB', 'GIN', 'GLP', 'GMB', 'GNB', 'GNQ',
       'GRC', 'GRD', 'GRL', 'GTM', 'GUF', 'GUM', 'GUY', 'HKG', 'HMD',
       'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IMN', 'IND', 'IOT', 'IRL',
       'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JEY', 'JOR', 'JPN',
       'KAZ', 'KEN', 'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAO',
       'LBN', 'LBR'

In [26]:
tsdf.columns.values[1:256]

array(['ABW', 'AFG', 'AGO', 'AIA', 'ALA', 'ALB', 'AND', 'ARE', 'ARG',
       'ARM', 'ASM', 'ATA', 'ATF', 'ATG', 'AUS', 'AUT', 'AZE', 'BDI',
       'BEL', 'BEN', 'BES', 'BFA', 'BGD', 'BGR', 'BHR', 'BHS', 'BIH',
       'BLM', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA', 'BRB', 'BRN', 'BTN',
       'BVT', 'BWA', 'CAF', 'CAN', 'CCK', 'CHE', 'CHL', 'CHN', 'CIV',
       'CMR', 'COD', 'COG', 'COK', 'COL', 'COM', 'CPV', 'CRI', 'CUB',
       'CUW', 'CXR', 'CYM', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK',
       'DOM', 'DZA', 'ECU', 'EGY', 'ERI', 'ESH', 'ESP', 'EST', 'ETH',
       'FIN', 'FJI', 'FLK', 'FRA', 'FRO', 'FSM', 'GAB', 'GBR', 'GEO',
       'GGY', 'GHA', 'GIB', 'GIN', 'GLP', 'GMB', 'GNB', 'GNQ', 'GRC',
       'GRD', 'GRL', 'GTM', 'GUF', 'GUM', 'GUY', 'HKG', 'HMD', 'HND',
       'HRV', 'HTI', 'HUN', 'IDN', 'IMN', 'IND', 'IOT', 'IRL', 'IRN',
       'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JEY', 'JOR', 'JPN', 'KAZ',
       'KEN', 'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAO', 'LBN',
       'LBR', 'LBY',

In [27]:
tsdf_mat = tsdf[tsdf.columns.values[1:256]]

In [28]:
tsdf_mat  = tsdf_mat.set_index(tsdf['iso3'].values) 

In [29]:
tsdf_mat.columns = tsdf['iso3'].values

In [30]:
tsdf_mat.shape

(255, 255)

In [31]:
tsdf_sort = tsdf_mat
steward_dict = dict.fromkeys(tsdf_sort.columns.values)
for key in steward_dict:
    tsdf_sort = tsdf_sort.sort_values(by = [key], ascending=False)
    sub = tsdf_sort[key][0:11]
    vals = sub.index.values.tolist()
    steward_dict[key] = json.dumps(vals)

In [32]:
#steward_dict

In [33]:
steward_df = pd.DataFrame(steward_dict.items(), columns = ["GID_0","filter_steward"])

In [34]:
steward_df.shape

(255, 2)

In [35]:
#steward_dict

In [36]:
neigh_df = pd.DataFrame(neighbour_dict.items(), columns = ["GID_0","filter_neigh"])

In [37]:
neigh_df.shape

(252, 2)

In [38]:
list1= list(neigh_df['GID_0'])
list2=list(steward_df['GID_0'])
list(set(list2).difference(list1))

['IOT', 'UMI', 'XCA']

In [39]:
pd_dict = pd.merge(left = neigh_df, right = steward_df, left_on = "GID_0", right_on = "GID_0", how = "right")

In [40]:
pd_dict.shape

(255, 3)

In [41]:
#pd_dict[pd.isnull(pd_dict)]
pd_dict[pd_dict['filter_steward'].isnull()]

Unnamed: 0,GID_0,filter_neigh,filter_steward


# Get below and above countries for each field in challenges

In [42]:
fields = ['GID_0', 'NAME_0', 'Area', 'GNI_PPP', 'Population2016', 'prop_protected', 'prop_hm_very_high', 'protection_needed', 'total_endemic', 'N_SPECIES', 'SPI', "continent"]

In [43]:
sdf.columns

Index(['OBJECTID_1', 'GID_0', 'NAME_0', 'jpg_url', 'OBJECTID', 'GID', 'Area',
       'GNI_PPP', 'Protected', 'HM_0', 'HM_low', 'HM_moderate', 'HM_high',
       'Population2016', 'max_amph', 'max_bird', 'max_mamm', 'max_rept',
       'max_cact', 'max_coni', 'max_all', 'sentence', 'gadm_prop_COUNT',
       'amphibians', 'birds', 'mammals', 'nspecies', 'reptiles',
       'total_endemic', 'endemic_amphibians', 'endemic_birds',
       'endemic_mammals', 'endemic_reptiles', 'spi_mean', 'HM_very_high',
       'prop_hm_very_high', 'GlobalID', 'continent', 'has_priority',
       'has_raisg', 'AREA_KM2', 'N_SPECIES', 'SPI', 'prop_protected',
       'protection_needed', 'iso2', 'prop_hm_0', 'filter_similar',
       'max_highlited_sp', 'SHAPE'],
      dtype='object')

In [None]:
#fields = ['GID_0', 'NAME_0', 'Area', 'GNI_PPP', 'Population', 'prop_prote', 'prop_hm_ve', 'protection', 'total_ende', 'N_SPECIES', 'SPI', "continent"]

In [44]:
df = sdf[fields]

In [45]:
#filter_fields = ['Area', 'GNI_PPP', 'Population', 'prop_prote', 'prop_hm_ve', 'protection', 'total_ende', 'N_SPECIES', 'SPI']

In [46]:
filter_fields = ['Area','GNI_PPP','Population2016', 'prop_protected', 'prop_hm_very_high', 'protection_needed', 'total_endemic','N_SPECIES', 'SPI']

In [47]:
df_sort = df
nber_index = 5
max_index = len(df_sort.index) - 1

new_fields = []
for field in filter_fields:
    df_sort = df_sort.sort_values(by = [field]).reset_index(drop=True)
    collapse_list = []
    for index, i in df_sort.iterrows():
        country_gid = df_sort.GID_0[index]
        above_index = index - nber_index
        below_index = index + nber_index + 1
        if above_index < 0:
            below_index = nber_index * 2 
            above_index = 0
        if below_index > max_index:
            above_index = max_index - (nber_index * 2)
            below_index = max_index

        sub_pd = df_sort.GID_0[above_index:below_index]
        val_list = sub_pd.values.tolist()
        collapse_list.append(json.dumps(val_list))

        #val_list_rem = val_list.remove(country_gid)
    filter_field = f"filter_{field}"
    new_fields.append(filter_field)
    df_sort[filter_field] = collapse_list

# Get countries from same continent

In [48]:
df_sort.continent.unique()

array(['Oceania', 'Europe', 'Africa', 'Asia', 'Antarctica',
       'North America', 'South America'], dtype=object)

In [49]:
continent_dict = {}

In [50]:
for continent in df_sort.continent.unique():
    countries = df_sort.loc[df_sort['continent'] == continent].GID_0
    continent_dict[continent] = countries.tolist()

## Add column `filter_continent` to `df_sort`

In [51]:
same_continent_dict = dict.fromkeys(df_sort.GID_0.unique())

In [52]:
for key in same_continent_dict:
    continent_name = df_sort.loc[df_sort['GID_0'] == key, "continent"].to_list()[0]
    vals = continent_dict[continent_name]
    same_continent_dict[key] = json.dumps(vals)

In [53]:
continent_df = pd.DataFrame(same_continent_dict.items(), columns = ["GID_0","filter_continent"])

In [54]:
continent_df.head()

Unnamed: 0,GID_0,filter_continent
0,CCK,"[""CCK"", ""COK"", ""KIR"", ""SLB"", ""WLF"", ""FSM"", ""MH..."
1,GGY,"[""GGY"", ""FRO"", ""SMR"", ""MDA"", ""MNE"", ""XKO"", ""BI..."
2,COM,"[""COM"", ""ATF"", ""STP"", ""MRT"", ""SOM"", ""ESH"", ""LB..."
3,MAC,"[""MAC"", ""YEM"", ""SYR"", ""BHR"", ""XPI"", ""MDV"", ""XS..."
4,COK,"[""CCK"", ""COK"", ""KIR"", ""SLB"", ""WLF"", ""FSM"", ""MH..."


In [55]:
continent_df.shape

(252, 2)

In [56]:
pd_dict = pd.merge(left = continent_df, 
                   right = pd_dict, left_on = "GID_0", right_on = "GID_0", how = "left")

In [57]:
pd_dict.shape

(252, 4)

In [58]:
pd_dict.head()

Unnamed: 0,GID_0,filter_continent,filter_neigh,filter_steward
0,CCK,"[""CCK"", ""COK"", ""KIR"", ""SLB"", ""WLF"", ""FSM"", ""MH...","[""CCK"", ""CXR"", ""SGP"", ""IDN"", ""MYS"", ""VNM"", ""BR...","[""AUS"", ""PNG"", ""CXR"", ""VUT"", ""IDN"", ""SLB"", ""NC..."
1,GGY,"[""GGY"", ""FRO"", ""SMR"", ""MDA"", ""MNE"", ""XKO"", ""BI...","[""GGY"", ""JEY"", ""GBR"", ""FRA"", ""BEL"", ""IMN"", ""IR...","[""FRA"", ""DEU"", ""JEY"", ""GBR"", ""NLD"", ""RUS"", ""DN..."
2,COM,"[""COM"", ""ATF"", ""STP"", ""MRT"", ""SOM"", ""ESH"", ""LB...","[""COM"", ""MYT"", ""MOZ"", ""MDG"", ""MWI"", ""TZA"", ""KE...","[""MDG"", ""MYT"", ""TZA"", ""MOZ"", ""KEN"", ""SYC"", ""SO..."
3,MAC,"[""MAC"", ""YEM"", ""SYR"", ""BHR"", ""XPI"", ""MDV"", ""XS...","[""MAC"", ""HKG"", ""XPI"", ""TWN"", ""PHL"", ""LAO"", ""XS...","[""CHN"", ""HKG"", ""VNM"", ""LAO"", ""TWN"", ""MMR"", ""TH..."
4,COK,"[""CCK"", ""COK"", ""KIR"", ""SLB"", ""WLF"", ""FSM"", ""MH...","[""COK"", ""NIU"", ""PYF"", ""ASM"", ""WSM"", ""TON"", ""TK...","[""FJI"", ""PYF"", ""ASM"", ""KIR"", ""WSM"", ""TON"", ""NI..."


In [59]:
pd_dict[pd_dict['filter_continent'].isnull()]

Unnamed: 0,GID_0,filter_continent,filter_neigh,filter_steward


In [60]:
pd_dict[pd_dict['filter_neigh'].isnull()]

Unnamed: 0,GID_0,filter_continent,filter_neigh,filter_steward


# Create the filter_similar field

In [61]:
new_fields

['filter_Area',
 'filter_GNI_PPP',
 'filter_Population2016',
 'filter_prop_protected',
 'filter_prop_hm_very_high',
 'filter_protection_needed',
 'filter_total_endemic',
 'filter_N_SPECIES',
 'filter_SPI']

In [62]:
new_fields.append("filter_neigh")
new_fields.append("filter_steward")
new_fields.append("filter_continent")
new_fields

['filter_Area',
 'filter_GNI_PPP',
 'filter_Population2016',
 'filter_prop_protected',
 'filter_prop_hm_very_high',
 'filter_protection_needed',
 'filter_total_endemic',
 'filter_N_SPECIES',
 'filter_SPI',
 'filter_neigh',
 'filter_steward',
 'filter_continent']

In [63]:
df_sort.shape

(252, 21)

In [64]:
df_filter = pd.merge(left = df_sort, right = pd_dict, left_on = "GID_0", right_on = "GID_0", how = "left")

In [65]:
df_filter.shape

(252, 24)

In [66]:
similar_list = []
for index, i in df_filter.iterrows():
    filter_dict = i[new_fields].to_dict()   
    vals = json.dumps(filter_dict).replace('NaN','"NaN"').replace('"[', '[').replace(']"', ']').replace('\\', '')
    similar_list.append(vals)
    #similar_list.append(json.loads(json.dumps(filter_dict)))    
df_filter['filter_similar'] = similar_list

In [67]:
df_upload = df_filter[["GID_0", "filter_similar"]]

In [68]:
df_upload.shape

(252, 2)

# Update service: create field to be added and add features for update



In [None]:
from copy import deepcopy
def createFieldsToBeAdded(flayer, csv_table):
    flayer_fields = flayer.manager.properties.fields
    template_field = dict(deepcopy(flayer_fields[0]))
    sdf = getSDFfromFlayer(flayer)
    new_field_names = list(csv_table.columns.difference(sdf.columns))
    
    fields_to_be_added = []
    for new_field_name in new_field_names:
        current_field = deepcopy(template_field)
        dt = csv_table[new_field_name].dtypes
        
        if dt == 'O':
            #put the type to character
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeString'
            current_field['length'] = 8000
        if dt == 'float64':
            #put the type to double
            current_field['sqlType'] = 'sqlTypeOther'
            current_field['type'] = 'esriFieldTypeDouble'
            #current_field['length'] = 8000      

        current_field['name'] = new_field_name.lower()
        current_field['alias'] = new_field_name
        current_field['nullable'] = True
        current_field['editable'] = True
        fields_to_be_added.append(current_field)
    return fields_to_be_added

In [73]:
from copy import deepcopy
def createFeaturesForUpdate(flayer, csv_table, fields_to_be_added, id_field_in_csv, id_field_in_service):
    fset2 = flayer.query()
    features2 = fset2.features
    features_for_update = []
    for country_id in csv_table[id_field_in_csv]:
        try:
            # get the matching row from csv
            matching_row = csv_table.where(csv_table[id_field_in_csv] == country_id).dropna()

            #print(str(country_id) + " Adding additional attributes for: " + matching_row['iso3'].values[0])

            # get the feature to be updated
            assert  len([f for f in features2 if f.attributes[id_field_in_service] == country_id]),  "id not matched"
            original_feature = [f for f in features2 if f.attributes[id_field_in_service] == country_id][0]
            feature_to_be_updated = deepcopy(original_feature)

            # assign the updated values
            for field in fields_to_be_added:
                feature_to_be_updated.attributes[field['name']] = matching_row[field['name']].values[0]
                #add this to the list of features to be updated
                features_for_update.append(feature_to_be_updated)
            #print(str(country_id) + " Done additional attributes for: " + matching_row['countryname'].values[0])
    
        except:
            print(f"{country_id} not available in service")
    return features_for_update

In [None]:
fields_to_be_added = createFieldsToBeAdded(flayer, df_upload)

In [None]:
fields_to_be_added

In [None]:
flayer.manager.add_to_definition({'fields':fields_to_be_added})

In [None]:
features_for_update = createFeaturesForUpdate(flayer, df_upload, fields_to_be_added, "GID_0", "GID_0")

In [None]:
flayer.edit_features(updates = features_for_update)

# Update service: replace values in existing field

In [69]:
def getFieldsToBeUpdated(flayer, csv_table, id_field = "GID_0"):
    id_field_list = [id_field]
    flayer_fields = flayer.manager.properties.fields
    sdf = getSDFfromFlayer(flayer)
    field_names = list(csv_table.columns[csv_table.columns.isin(sdf.columns)].difference(id_field_list))
    fields_to_be_changed = []
    for field in flayer_fields:
        #print(field.name,field_names)
        if any(field.name in s for s in field_names):
            #print(field)
            fields_to_be_changed.append(field)
    return fields_to_be_changed

In [70]:
fields_to_be_changed = getFieldsToBeUpdated(flayer, csv_table = df_upload, id_field = "GID_0")

In [71]:
fields_to_be_changed

[{
   "name": "filter_similar",
   "type": "esriFieldTypeString",
   "alias": "filter_similar",
   "sqlType": "sqlTypeOther",
   "length": 0,
   "nullable": true,
   "editable": true,
   "visible": true,
   "domain": null,
   "defaultValue": null
 }]

In [74]:
features_for_update = createFeaturesForUpdate(flayer = flayer, csv_table = df_upload, fields_to_be_added = fields_to_be_changed, id_field_in_csv = "GID_0", id_field_in_service = "GID_0")

In [78]:
for f in features_for_update:
    print(f)

{"geometry": {"x": 96.82865905716984, "y": -12.184194048816266}, "attributes": {"OBJECTID_1": 41, "GID_0": "CCK", "NAME_0": "Cocos Islands", "jpg_url": "https://upload.wikimedia.org/wikipedia/commons/e/e6/Cocos_%28Keeling%29_Islands_2017_%2834%29.jpg", "OBJECTID": 41, "GID": "CCK", "Area": 5, "GNI_PPP": null, "Protected": 0, "HM_0": 5, "HM_low": 0, "HM_moderate": 0, "HM_high": 0, "Population2016": 549, "max_amph": 0, "max_bird": 1, "max_mamm": 0, "max_rept": 1, "max_cact": 0, "max_coni": 0, "max_all": 1, "sentence": "In Cocos Islands less than a quarter of the country is used for human activities, in its majority by urban use. ", "gadm_prop_COUNT": 7, "amphibians": 0, "birds": 16, "mammals": 0, "nspecies": 20, "reptiles": 4, "total_endemic": 0, "endemic_amphibians": 0, "endemic_birds": 0, "endemic_mammals": 0, "endemic_reptiles": 0, "spi_mean": 41.0028174603175, "HM_very_high": 0, "prop_hm_very_high": 0, "GlobalID": "66165570-9dda-43b4-8f3e-ed0f8c2bd38f", "continent": "Oceania", "has_p

In [79]:
flayer.edit_features(updates = features_for_update)

{'addResults': [],
 'updateResults': [{'objectId': 41,
   'uniqueId': 41,
   'globalId': '66165570-9dda-43b4-8f3e-ed0f8c2bd38f',
   'success': True},
  {'objectId': 82,
   'uniqueId': 82,
   'globalId': '789f0fb5-368e-4dab-a624-6acd83157d17',
   'success': True},
  {'objectId': 51,
   'uniqueId': 51,
   'globalId': '7b10595e-8e8f-483a-b9e2-fcbc823a72d4',
   'success': True},
  {'objectId': 134,
   'uniqueId': 134,
   'globalId': '78bf02bd-9891-435d-9ec9-5c6036bbd49a',
   'success': True},
  {'objectId': 49,
   'uniqueId': 49,
   'globalId': 'e412b775-d873-4c7d-b18a-b22fa400b895',
   'success': True},
  {'objectId': 120,
   'uniqueId': 120,
   'globalId': '66b06090-9d4a-4a8e-8715-68fc12b14f30',
   'success': True},
  {'objectId': 246,
   'uniqueId': 246,
   'globalId': 'b38ca183-713a-4e60-ab57-d12e2ff837d2',
   'success': True},
  {'objectId': 196,
   'uniqueId': 196,
   'globalId': '27f5781c-8c56-4532-8d54-baee09c7254b',
   'success': True},
  {'objectId': 212,
   'uniqueId': 212,
   '

# update the `filter_similar` field
- Get the service data
- Get the field and from json format make it a table `filter_df`
- Recalculate the similar values for very high human encroachment
- Get the info of countries from the same continent
- Replace information in service for `filter_similar`

In [None]:
sdf.head()

In [None]:
sdf.columns

In [None]:
sdf_filter = sdf[["GID_0","filter_similar", 'prop_hm_very_high', 'continent']]

In [None]:
sdf_filter.head()

In [None]:
d = json.loads(sdf_filter.filter_similar[0])

In [None]:
filter_list = []
for key in d:
    filter_list.append(key)

In [None]:
filter_list

In [None]:
sdf_filter = sdf_filter.reindex(sdf_filter.columns.tolist() + filter_list, axis=1) 

In [None]:
for index, i in sdf_filter.iterrows():
    try:
        similar_dict = json.loads(i.filter_similar)    
        #print(similar_dict)
        for key in similar_dict:
            #print(similar_dict[key])
            sdf_filter[key][index] = similar_dict[key]
    except:
            print(f"{sdf_filter['GID_0'][index]} not available in service")

In [None]:
sdf_filter.head()

In [None]:
filter_fields = ['prop_hm_very_high'] 

In [None]:
df_sort = sdf_filter
nber_index = 5
max_index = len(df_sort.index) - 1

new_fields = []
for field in filter_fields:
    df_sort = df_sort.sort_values(by = [field]).reset_index(drop=True)
    collapse_list = []
    for index, i in df_sort.iterrows():
        country_gid = df_sort.GID_0[index]
        above_index = index - nber_index
        below_index = index + nber_index + 1
        if above_index < 0:
            below_index = nber_index * 2 
            above_index = 0
        if below_index > max_index:
            above_index = max_index - (nber_index * 2)
            below_index = max_index

        sub_pd = df_sort.GID_0[above_index:below_index]
        val_list = sub_pd.values.tolist()
        collapse_list.append(json.dumps(val_list))

        #val_list_rem = val_list.remove(country_gid)
    filter_field = f"filter_{field}"
    new_fields.append(filter_field)
    df_sort[filter_field] = collapse_list

In [None]:
df_sort.head()

In [None]:
#df_sort.filter_prop_hm_very_high['GID_0' == "BEL"]
df_sort.loc[df_sort['GID_0'] == "BEL"].filter_prop_hm_very_high

In [None]:
sdf_filter.loc[sdf_filter['GID_0'] == "BEL"].filter_prop_hm_very_high

## Create dictionary with the countries  OLD


In [None]:
df_sort.loc[df_sort['continent'] == "Australia"]

In [None]:
df_sort.loc[df_sort['continent'] == "Australia", "continent"] = "Oceania"

In [None]:
continent_dict = {}

In [None]:
df_sort.continent.unique()

In [None]:
for continent in df_sort.continent.unique():
    countries = sdf_filter.loc[sdf_filter['continent'] == continent].GID_0
    #print(countries.tolist)
    continent_dict[continent] = countries.tolist()


In [None]:
print(continent_dict)

## add column `filter_continent` to `df_sort`
use continent_dict

## redo `filter_similar`
```
similar_list = []
for index, i in df_filter.iterrows():
    filter_dict = i[new_fields].to_dict()
    
    similar_list.append(json.dumps(filter_dict).replace('"[', '[').replace(']"', ']').replace('\\', ''))
    #similar_list.append(json.loads(json.dumps(filter_dict)))    
df_filter['filter_similar'] = similar_list
```



In [None]:
#df_sort.head()

In [None]:
df_sort = df_sort.reindex(df_sort.columns.tolist() + ['filter_continent'], axis=1) 

In [None]:
for index, i in df_sort.iterrows():
    i.filter_continent = continent_dict[i.continent]
    #print(i.GID_0)

## why some countries don't have the filter?