# Parameter tuning for Classification and Regression Tree (CART) classification

### Testing described parameter settings for CART algorithm using Google Earth Engine Python API and NICFI Normalized Analytic Basemap from December 2022

Author: Finn Geiger\
Date: March 30th 2023\
Contact:
- https://github.com/finn-geiger
- https://www.linkedin.com/in/finn-geiger-b1329a20b/

### 1 Import and setup
#### 1.1 Importing the required libraries and packages

In [17]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import geemap
import ee
import os
import time
from tabulate import tabulate
#%pip install tabulate


The following classes and landcover IDs will be used:

In [21]:
info = {'Class name': ['not used','Informal', 'Formal', 'Industrial', 'Roads', 'Vacant land', 'Vegetation', 'Water-bodies'],
        'landcover ID': [0, 1, 2, 3, 4, 5, 6, 7]}

print(tabulate(info, headers='keys', tablefmt='fancy_grid'))

╒══════════════╤════════════════╕
│ Class name   │   landcover ID │
╞══════════════╪════════════════╡
│ not used     │              0 │
├──────────────┼────────────────┤
│ Informal     │              1 │
├──────────────┼────────────────┤
│ Formal       │              2 │
├──────────────┼────────────────┤
│ Industrial   │              3 │
├──────────────┼────────────────┤
│ Roads        │              4 │
├──────────────┼────────────────┤
│ Vacant land  │              5 │
├──────────────┼────────────────┤
│ Vegetation   │              6 │
├──────────────┼────────────────┤
│ Water-bodies │              7 │
╘══════════════╧════════════════╛


##### When first using the GEE Python API the user must authenticate and initialize the environment by using the following two lines of codes:

In [2]:
#ee.Authenticate() 
#ee.Initialize()

In [3]:
# creating the map
Map = geemap.Map()

# loading the interactive map
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

#### 1.2 Importing the datasets from GEE assets and data catalog and clipping the basemap to the AOI

In [4]:
# Loading the Base scene
nicfi = ee.ImageCollection('projects/planet-nicfi/assets/basemaps/africa')

# Filter basemaps by date and get the first image from filtered results
basemap_2022_12 = nicfi.filter(ee.Filter.date('2022-12-01','2023-01-01')).first()

# Visualizing the scene
vis_params = {"bands":["R","G","B"],"min":64,"max":5454,"gamma":1.8}

# Adding the basemap to the map
Map.centerObject(basemap_2022_12, 4)
Map.addLayer(basemap_2022_12, vis_params, '2022-12 mosaic')

In [5]:
# Loading the AOI and Masking the base scene
vis_params_aoi = {'color': 'blue'}
aoi_windhoek = ee.FeatureCollection('users/s85315/masterthesis/Study_Area_Windhoek')

# Adding the AOI to the map
Map.addLayer(aoi_windhoek, vis_params_aoi, 'AOI')
Map.centerObject(aoi_windhoek, 12)

In [6]:
# clipping the basescene to the AOI
basescene = basemap_2022_12.clipToCollection(aoi_windhoek)
Map.addLayer(basescene, vis_params, 'clipped')

In [7]:
# importing training data samples and adding them to the map
# Transforming the Geometries into FeatureCollections and applying properties
TS_Informal_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Informal_RPoints')
TS_Formal_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Formal_RPoints')
TS_Industrial_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Industrial_RPoints')
TS_Roads_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Roads_RPoints')
TS_VacantLand_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_VacantLand_RPoints')
TS_Vegetation_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Vegetation_RPoints')
TS_Waterbodies_Points = ee.FeatureCollection('users/s85315/masterthesis/TrainingSamples/TS_Waterbodies_RPoints')

# adding the samples to the map
# Map.addLayer(TS_Informal_Points, {'color': 'c43c39'}, 'Informal Training Data', False)
# Map.addLayer(TS_Formal_Points, {'color': 'e5b636'}, 'Formal Training Data', False)
# Map.addLayer(TS_Industrial_Points, {'color': '2f2f2f'}, 'Industrial Training Data', False)
# Map.addLayer(TS_Roads_Points, {'color': 'aaaaaa'}, 'Roads Training Data', False)
# Map.addLayer(TS_VacantLand_Points, {'color': 'b08e7a'}, 'Vacant land Training Data', False)
# Map.addLayer(TS_Vegetation_Points, {'color': '85b66f'}, 'Vegetation Training Data', False)
# Map.addLayer(TS_Waterbodies_Points, {'color': 'a5bfdd'}, 'Waterbodies Training Data', False)

# merging all FeatureCollections into one layer
training_samples = TS_Informal_Points.merge(TS_Formal_Points).merge(TS_Industrial_Points).merge(TS_Roads_Points).merge(TS_VacantLand_Points).merge(TS_Vegetation_Points).merge(TS_Waterbodies_Points)

### 2 Classification with CART
#### 2.1 Applying training samples on the base scene

In [8]:
# adding the training samples to the basescene
training = basescene.sampleRegions(**{
    'collection': training_samples, 
    'properties': ['landcover'], 
    'scale': 4.77
})

#### 2.2 Configuration and creation of the empty CART classifier

In [9]:
# creating variable for parameter settings
classifier_params = {'maxNodes': 10, # the number of individual decision tree models
              'minLeafPopulation':1} # smallest sample size possible per leaf

# creating the classifier using CART
classifier = ee.Classifier.smileCart(**classifier_params).train(**{
  'features': training,  
  'classProperty': 'landcover', 
  'inputProperties': basescene.bandNames()
})

#### 2.3 classifying the basescene and visualizing the product

In [10]:
# classifying the basescene using the created classifier
classified_basescene = basescene.classify(classifier)

# creating the visualization parameters
palette = ['c43c39', 'e5b636', '2f2f2f', 'aaaaaa', 'b08e7a', '85b66f', 'a5bfdd']
vis_params_classified = {'min': 1, 'max': 7, 'palette': palette}


Map.addLayer(classified_basescene, vis_params_classified, 'classified basescene')


#### 2.4 Exporting the results

##### 2.4.1 Exporting to Google Drive

In [11]:
# converting the FeatureCollection to Geometry for export
aoi_geom = aoi_windhoek.geometry()

# exporting to Google drive with GEE API
# export = ee.batch.Export.image.toDrive(**{
#     'image': classified_basescene,
#     'description': 'classified_map', # TODO: change name here
#     'folder': 'data', # TODO: change name here
#     'scale': 4.77,
#     'region': aoi_geom
# })

# # starting the process
# export.start()

# # tracking the process
# while export.active():
#   print('Polling for task (id: {}).'.format(export.id))
#   time.sleep(5)

##### 2.4.2 Exporting to Asset

In [12]:
# exporting to Google Asset
# export = ee.batch.Export.image.toAsset(**{
#   'image': classified_basescene,
#   'description': 'Export classified map',
#   'assetId': 'users/s85315/masterthesis/Testing/export_basescene', # TODO: change name here
#   'scale': 100,
#   'region': aoi_geom
# })

# # starting the process
# export.start()

# # tracking the process
# while export.active():
#   print('Polling for task (id: {}).'.format(export.id))
#   time.sleep(5)

### 3 Accuracy assessment

#### 3.1 Importing validation samples from GEE Assets

In [13]:
# Transforming the Geometries into FeatureCollections and applying properties
VS_Informal_Points = ee.FeatureCollection('users/s85315/masterthesis/ValidationSamples/VS_Informal_RPoints')
VS_Formal_Points = ee.FeatureCollection('users/s85315/masterthesis/ValidationSamples/VS_Formal_RPoints')
VS_Industrial_Points = ee.FeatureCollection('users/s85315/masterthesis/ValidationSamples/VS_Industrial_RPoints')
VS_Roads_Points = ee.FeatureCollection('users/s85315/masterthesis/ValidationSamples/VS_Roads_RPoints')
VS_VacantLand_Points = ee.FeatureCollection('users/s85315/masterthesis/ValidationSamples/VS_VacantLand_RPoints')
VS_Vegetation_Points = ee.FeatureCollection('users/s85315/masterthesis/ValidationSamples/VS_Vegetation_RPoints')
VS_Waterbodies_Points = ee.FeatureCollection('users/s85315/masterthesis/ValidationSamples/VS_Waterbodies_RPoints')

validation_samples = VS_Informal_Points.merge(VS_Formal_Points).merge(VS_Industrial_Points).merge(VS_Roads_Points).merge(VS_VacantLand_Points).merge(VS_Vegetation_Points).merge(VS_Waterbodies_Points)


##### 3.2 Applying the validation samples to the basescene

In [14]:
# applying the validation samples to the classified map
validation = classified_basescene.sampleRegions(**{
  'collection': validation_samples,
  'properties': ['landcover'],
  'tileScale': 16,
  'scale': 4.77,
})

##### 3.3 Generating the error matrix and printing information

In [15]:
basescene_error_matrix = validation.errorMatrix('landcover', 'classification')

# printing statistics
print('Confusion Matrix', basescene_error_matrix.getInfo())
print('Overall Accuracy', basescene_error_matrix.accuracy().getInfo())
print('Producers Accuracy', basescene_error_matrix.producersAccuracy().getInfo())
print('Consumers Accuracy', basescene_error_matrix.consumersAccuracy().getInfo())
print('Kappa', basescene_error_matrix.kappa().getInfo())

Confusion Matrix [[0, 0, 0, 0, 0, 0, 0, 0], [0, 15, 6, 2, 0, 7, 0, 0], [0, 2, 22, 0, 0, 6, 0, 0], [0, 2, 18, 3, 2, 5, 0, 0], [0, 0, 0, 0, 15, 15, 0, 0], [0, 2, 1, 1, 0, 23, 3, 0], [0, 0, 0, 0, 0, 1, 24, 5], [0, 0, 0, 0, 0, 0, 3, 27]]
Overall Accuracy 0.6142857142857143
Producers Accuracy [[0], [0.5], [0.7333333333333333], [0.1], [0.5], [0.7666666666666667], [0.8], [0.9]]
Consumers Accuracy [[0, 0.7142857142857143, 0.46808510638297873, 0.5, 0.8823529411764706, 0.40350877192982454, 0.8, 0.84375]]
Kappa 0.55


##### 3.3.1 Visualizing the error matrix

In [45]:
error_matrix = basescene_error_matrix.getInfo()

header_error_matrix = ['not used','Informal', 'Formal', 'Industrial', 'Roads', 'Vacant land', 'Vegetation', 'Water-bodies']

print(tabulate(error_matrix, headers=header_error_matrix, tablefmt='fancy_grid', showindex=header_error_matrix))

shape: 8
╒══════════════╤════════════╤════════════╤══════════╤══════════════╤═════════╤═══════════════╤══════════════╤════════════════╕
│              │   not used │   Informal │   Formal │   Industrial │   Roads │   Vacant land │   Vegetation │   Water-bodies │
╞══════════════╪════════════╪════════════╪══════════╪══════════════╪═════════╪═══════════════╪══════════════╪════════════════╡
│ not used     │          0 │          0 │        0 │            0 │       0 │             0 │            0 │              0 │
├──────────────┼────────────┼────────────┼──────────┼──────────────┼─────────┼───────────────┼──────────────┼────────────────┤
│ Informal     │          0 │         15 │        6 │            2 │       0 │             7 │            0 │              0 │
├──────────────┼────────────┼────────────┼──────────┼──────────────┼─────────┼───────────────┼──────────────┼────────────────┤
│ Formal       │          0 │          2 │       22 │            0 │       0 │             6 │        

##### 3.3.2 Producer's and consumer's accuracy

In [51]:
# creating the lists 
producers = basescene_error_matrix.producersAccuracy().getInfo()
consumers = basescene_error_matrix.consumersAccuracy().getInfo()

header_producers = ['Class name', "Producer's Accuracy"]
header_consumers = ["Consumers's Accuracy"]

print(tabulate(producers, headers=header_producers, tablefmt='fancy_grid',  showindex=header_error_matrix))
print(tabulate(consumers, headers=header_error_matrix, tablefmt='fancy_grid',  showindex=header_consumers))


╒══════════════╤═══════════════════════╕
│ Class name   │   Producer's Accuracy │
╞══════════════╪═══════════════════════╡
│ not used     │              0        │
├──────────────┼───────────────────────┤
│ Informal     │              0.5      │
├──────────────┼───────────────────────┤
│ Formal       │              0.733333 │
├──────────────┼───────────────────────┤
│ Industrial   │              0.1      │
├──────────────┼───────────────────────┤
│ Roads        │              0.5      │
├──────────────┼───────────────────────┤
│ Vacant land  │              0.766667 │
├──────────────┼───────────────────────┤
│ Vegetation   │              0.8      │
├──────────────┼───────────────────────┤
│ Water-bodies │              0.9      │
╘══════════════╧═══════════════════════╛
╒══════════════════════╤════════════╤════════════╤══════════╤══════════════╤══════════╤═══════════════╤══════════════╤════════════════╕
│                      │   not used │   Informal │   Formal │   Industrial │    Roads

##### 3.3.3 Overall Accuracy and Kappa Coefficent

In [65]:
# defining the variables
overall_accuracy = basescene_error_matrix.accuracy().getInfo()
overall_print = str(round(overall_accuracy * 100, 2))
kappa = basescene_error_matrix.kappa().getInfo()

# printing out vaLues
print("\033[1m" + "Overall Accuracy " + overall_print + " %" + "\033[0m")
print("\033[1m" + "Kappa coefficent " + str(kappa) + "\033[0m")

[1mOverall Accuracy 61.43 %[0m
[1mKappa coefficent 0.55[0m


##### Resources for code snippets

https://colab.research.google.com/github/csaybar/EEwPython/blob/dev/10_Export.ipynb \
https://worldbank.github.io/OpenNightLights/tutorials/mod6_6_RF_classifier.html \
https://towardsdatascience.com/how-to-easily-create-tables-in-python-2eaea447d8fd