# Parameter tuning for Random Forest (RF) classification

### Classification with settings from parameter testing for RF algorithm. 
### Using Google Earth Engine Python API and NICFI Normalized Analytic Basemap from December 2022

Author: Finn Geiger\
Date: April 6th 2023\
Contact:
- https://github.com/finn-geiger
- https://www.linkedin.com/in/finn-geiger-b1329a20b/

### 1 Import and setup
#### 1.1 Importing the required libraries and packages

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import geemap
import ee
import os
import time
import pandas as pd
from tabulate import tabulate
#%pip install tabulate


The following classes and landcover IDs will be used:

In [2]:
info = {'Class name': ['Informal', 'Formal', 'Industrial', 'Roads', 'Vacant land', 'Vegetation', 'Water-bodies'],
        'landcover ID': [1, 2, 3, 4, 5, 6, 7]}

print(tabulate(info, headers='keys', tablefmt='fancy_grid'))

╒══════════════╤════════════════╕
│ Class name   │   landcover ID │
╞══════════════╪════════════════╡
│ Informal     │              1 │
├──────────────┼────────────────┤
│ Formal       │              2 │
├──────────────┼────────────────┤
│ Industrial   │              3 │
├──────────────┼────────────────┤
│ Roads        │              4 │
├──────────────┼────────────────┤
│ Vacant land  │              5 │
├──────────────┼────────────────┤
│ Vegetation   │              6 │
├──────────────┼────────────────┤
│ Water-bodies │              7 │
╘══════════════╧════════════════╛


##### When first using the GEE Python API the user must authenticate and initialize the environment by using the following two lines of codes:

In [3]:
#ee.Authenticate() 
#ee.Initialize()

In [4]:
# creating the map
Map = geemap.Map()

# loading the interactive map
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

#### 1.2 Importing the datasets from GEE assets and data catalog and clipping the basemap to the AOI

In [5]:
# Loading the Base scene
nicfi = ee.ImageCollection('projects/planet-nicfi/assets/basemaps/africa')

# Filter basemaps by date and get the first image from filtered results
basemap_2022_12 = nicfi.filter(ee.Filter.date('2022-12-01','2023-01-01')).first()

# Visualizing the scene
vis_params = {"bands":["R","G","B"],"min":64,"max":5454,"gamma":1.8}

# Adding the basemap to the map
Map.centerObject(basemap_2022_12, 4)
Map.addLayer(basemap_2022_12, vis_params, '2022-12 mosaic')

In [6]:
# Loading the AOI and Masking the base scene
vis_params_aoi = {'color': 'blue'}
aoi_windhoek = ee.FeatureCollection('users/s85315/masterthesis/Study_Area_Windhoek')

# Adding the AOI to the map
Map.addLayer(aoi_windhoek, vis_params_aoi, 'AOI')
Map.centerObject(aoi_windhoek, 12)

In [7]:
# clipping the basescene to the AOI
basescene = basemap_2022_12.clipToCollection(aoi_windhoek)
Map.addLayer(basescene, vis_params, 'clipped')

In [8]:
# importing training data samples and adding them to the map
# Transforming the Geometries into FeatureCollections and applying properties
TS_Informal_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Informal_RPoints')
TS_Formal_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Formal_RPoints')
TS_Industrial_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Industrial_RPoints')
TS_Roads_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Roads_RPoints')
TS_VacantLand_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_VacantLand_RPoints')
TS_Vegetation_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Vegetation_RPoints')
TS_Waterbodies_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Waterbodies_RPoints')

# adding the samples to the map
# Map.addLayer(TS_Informal_Points, {'color': 'c43c39'}, 'Informal Training Data', False)
# Map.addLayer(TS_Formal_Points, {'color': 'e5b636'}, 'Formal Training Data', False)
# Map.addLayer(TS_Industrial_Points, {'color': '2f2f2f'}, 'Industrial Training Data', False)
# Map.addLayer(TS_Roads_Points, {'color': 'aaaaaa'}, 'Roads Training Data', False)
# Map.addLayer(TS_VacantLand_Points, {'color': 'b08e7a'}, 'Vacant land Training Data', False)
# Map.addLayer(TS_Vegetation_Points, {'color': '85b66f'}, 'Vegetation Training Data', False)
# Map.addLayer(TS_Waterbodies_Points, {'color': 'a5bfdd'}, 'Waterbodies Training Data', False)

# merging all FeatureCollections into one layer
training_samples = TS_Informal_Points.merge(TS_Formal_Points).merge(TS_Industrial_Points).merge(TS_Roads_Points).merge(TS_VacantLand_Points).merge(TS_Vegetation_Points).merge(TS_Waterbodies_Points)

### 2 Classification with RF

#### 2.1 Applying training samples on the base scene

In [9]:
# adding the training samples to the basescene
training = basescene.sampleRegions(**{
    'collection': training_samples, 
    'properties': ['landcover'], 
    'scale': 4.77
})

#### 2.2 Configuration and creation of the empty RF classifier

In [10]:
# creating variable for parameter settings
# Mtry will be set to default
# Ntree values between [50:550] will be tested using a step size of 50
RF_param = 550

classifier_params = {
              'numberOfTrees':RF_param, # 	Ntree; The number of decision trees to create.
              'variablesPerSplit':None, # Mtry; The number of variables per split. If unspecified, uses the square root of the number of variables.
              'minLeafPopulation':1, # smallest sample size possible per leaf
              'bagFraction':0.5, #The fraction of input to bag per tree.
              'maxNodes':None, # the number of individual decision tree models
              'seed': 0}  # The randomization seed.


# creating the classifier using RF
classifier = ee.Classifier.smileRandomForest(**classifier_params).train(**{
  'features': training,  
  'classProperty': 'landcover', 
  'inputProperties': basescene.bandNames()
})

#### 2.3 classifying the basescene and visualizing the product

In [11]:
# classifying the basescene using the created classifier
classified_basescene = basescene.classify(classifier)

# creating the visualization parameters
palette = ['c43c39', 'e5b636', '2f2f2f', 'aaaaaa', 'b08e7a', '85b66f', 'a5bfdd']
vis_params_classified = {'min': 1, 'max': 7, 'palette': palette}


Map.addLayer(classified_basescene, vis_params_classified, 'classified basescene')


#### 2.4 Exporting the results

##### 2.4.1 Exporting to Google Drive

In [12]:
# converting the FeatureCollection to Geometry for export
aoi_geom = aoi_windhoek.geometry()

# exporting to Google drive with GEE API
export = ee.batch.Export.image.toDrive(**{
    'image': classified_basescene,
    'description': 'classified_basescene_RF', # TODO: change name here
    'folder': 'masterthesis/classification_results', # TODO: change name here
    'scale': 4.77,
    'region': aoi_geom
})

# starting the process
export.start()

# tracking the process
while export.active():
  print('Polling for task (id: {}).'.format(export.id))
  time.sleep(5)

Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id: Q62ULKVGQVCLYR2N6X4WGM4Q).
Polling for task (id

##### 2.4.2 Exporting to Asset

In [13]:
# exporting to Google Asset
export = ee.batch.Export.image.toAsset(**{
  'image': classified_basescene,
  'description': 'Export classified map',
  'assetId': 'users/s85315/masterthesis/classification_results/classified_basescene_RF', # TODO: change name here
  'scale': 4.77,
  'region': aoi_geom
})

# starting the process
export.start()

# tracking the process
while export.active():
  print('Polling for task (id: {}).'.format(export.id))
  time.sleep(5)

Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id: BIRKY6CYMRRBMXBJ7SUIC3YW).
Polling for task (id

##### Resources for code snippets

https://colab.research.google.com/github/csaybar/EEwPython/blob/dev/10_Export.ipynb \
https://worldbank.github.io/OpenNightLights/tutorials/mod6_6_RF_classifier.html \
https://towardsdatascience.com/how-to-easily-create-tables-in-python-2eaea447d8fd \
https://developers.google.com/earth-engine/apidocs/ee-classifier-smilerandomforest