# Accuracy Assessment of Supervised Classification using Random Forest (RF)

### Using Google Earth Engine Python API and NICFI Normalized Analytic Basemap from December 2022

Author: Finn Geiger\
Date: April 7th 2023\
Contact:
- https://github.com/finn-geiger
- https://www.linkedin.com/in/finn-geiger-b1329a20b/

### 1 Import and setup
#### 1.1 Importing the required libraries and packages

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import geemap
import ee
import os
import time
import pandas as pd
from tabulate import tabulate
#%pip install tabulate


The following classes and landcover IDs will be used:

In [2]:
info = {'Class name': ['Informal', 'Formal', 'Industrial', 'Roads', 'Vacant land', 'Vegetation', 'Water-bodies'],
        'landcover ID': [1, 2, 3, 4, 5, 6, 7]}

print(tabulate(info, headers='keys', tablefmt='fancy_grid'))

╒══════════════╤════════════════╕
│ Class name   │   landcover ID │
╞══════════════╪════════════════╡
│ Informal     │              1 │
├──────────────┼────────────────┤
│ Formal       │              2 │
├──────────────┼────────────────┤
│ Industrial   │              3 │
├──────────────┼────────────────┤
│ Roads        │              4 │
├──────────────┼────────────────┤
│ Vacant land  │              5 │
├──────────────┼────────────────┤
│ Vegetation   │              6 │
├──────────────┼────────────────┤
│ Water-bodies │              7 │
╘══════════════╧════════════════╛


##### When first using the GEE Python API the user must authenticate and initialize the environment by using the following two lines of codes:

In [3]:
#ee.Authenticate() 
#ee.Initialize()

In [4]:
# creating the map
Map = geemap.Map()

# loading the interactive map
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

#### 1.2 Importing the datasets from GEE assets and data catalog and clipping the basemap to the AOI

In [5]:
# Loading the Base scene
nicfi = ee.ImageCollection('projects/planet-nicfi/assets/basemaps/africa')

# Filter basemaps by date and get the first image from filtered results
basemap_2022_12 = nicfi.filter(ee.Filter.date('2022-12-01','2023-01-01')).first()

# Visualizing the scene
vis_params = {"bands":["R","G","B"],"min":64,"max":5454,"gamma":1.8}

# Adding the basemap to the map
Map.centerObject(basemap_2022_12, 4)
Map.addLayer(basemap_2022_12, vis_params, '2022-12 mosaic')

In [6]:
# Loading the AOI and Masking the base scene
vis_params_aoi = {'color': 'blue'}
aoi_windhoek = ee.FeatureCollection('users/s85315/masterthesis/Study_Area_Windhoek')

# Adding the AOI to the map
Map.addLayer(aoi_windhoek, vis_params_aoi, 'AOI')
Map.centerObject(aoi_windhoek, 12)

In [7]:
# clipping the basescene to the AOI
basescene = basemap_2022_12.clipToCollection(aoi_windhoek)
Map.addLayer(basescene, vis_params, 'clipped')

#### 1.3 Importing the classified basescene with RF

In [9]:
classified_basescene = ee.Image('users/s85315/masterthesis/classification_results/classified_basescene_RF')

# creating the visualization parameters
palette = ['c43c39', 'e5b636', '2f2f2f', 'aaaaaa', 'b08e7a', '85b66f', 'a5bfdd']
vis_params_classified = {'min': 1, 'max': 7, 'palette': palette}


Map.addLayer(classified_basescene, vis_params_classified, 'classified basescene')


### 2 Accuracy assessment

#### 2.1 Importing validation samples from GEE Assets

In [10]:
# Importing merged validation samples for 2022
validation_samples = ee.FeatureCollection('users/s85315/masterthesis/ValidationSamples/VS_classification_all_classes_2022')

#### 2.2 Applying the validation samples to the basescene

In [11]:
# applying the validation samples to the classified map
validation = classified_basescene.sampleRegions(**{
  'collection': validation_samples,
  'properties': ['landcover'],
  'tileScale': 16,
  'scale': 4.77,
})

#### 2.3 Generating the error matrix and printing information

In [12]:
basescene_error_matrix = validation.errorMatrix('landcover', 'classification')

# printing statistics
print('Confusion Matrix', basescene_error_matrix.getInfo())
print('Overall Accuracy', basescene_error_matrix.accuracy().getInfo())
print('Producers Accuracy', basescene_error_matrix.producersAccuracy().getInfo())
print('Consumers Accuracy', basescene_error_matrix.consumersAccuracy().getInfo())
print('Kappa', basescene_error_matrix.kappa().getInfo())

Confusion Matrix [[0, 0, 0, 0, 0, 0, 0, 0], [0, 29, 8, 1, 8, 2, 0, 0], [0, 4, 30, 3, 2, 0, 0, 0], [0, 4, 8, 19, 3, 0, 0, 0], [0, 2, 2, 3, 40, 1, 0, 0], [0, 3, 0, 1, 2, 105, 1, 0], [0, 0, 0, 0, 0, 0, 33, 5], [0, 0, 0, 0, 0, 0, 1, 30]]
Overall Accuracy 0.8171428571428572
Producers Accuracy [[0], [0.6041666666666666], [0.7692307692307693], [0.5588235294117647], [0.8333333333333334], [0.9375], [0.868421052631579], [0.967741935483871]]
Consumers Accuracy [[0, 0.6904761904761905, 0.625, 0.7037037037037037, 0.7272727272727273, 0.9722222222222222, 0.9428571428571428, 0.8571428571428571]]
Kappa 0.7772097510517888


##### 2.3.1 Visualizing the error matrix

In [13]:
# creating a Pandas Dataframe for the error matrix
error_matrix = basescene_error_matrix.getInfo()
df_error_matrix = pd.DataFrame(error_matrix)

# deleting the first row and column since GEE add's a class with the landcover ID 0 by default.
df_error_matrix.columns = ['not used','Informal', 'Formal', 'Industrial', 'Roads', 'Vacant land', 'Vegetation', 'Water-bodies']
df_error_matrix = df_error_matrix.drop(df_error_matrix.columns[0],axis=1)
df_error_matrix.drop(index=df_error_matrix.index[0], axis=0, inplace=True)

# calculating row and column sum of points
column_total = df_error_matrix.sum()
column_total.name = 'Total'
df_error_matrix.loc[8] = column_total
df_error_matrix['Total'] = df_error_matrix.sum(axis=1)

header_error_matrix = ['Informal', 'Formal', 'Industrial', 'Roads', 'Vacant land', 'Vegetation', 'Water-bodies', 'Total']
df_error_matrix['Names'] = header_error_matrix
df_error_matrix = df_error_matrix.set_index('Names')


print(tabulate(df_error_matrix, headers=header_error_matrix, tablefmt='fancy_grid', showindex=header_error_matrix))
df_error_matrix.to_csv("./accuracies/basescene_2022/RF_Error_Matrix.csv", sep=';', index=True)

╒══════════════╤════════════╤══════════╤══════════════╤═════════╤═══════════════╤══════════════╤════════════════╤═════════╕
│              │   Informal │   Formal │   Industrial │   Roads │   Vacant land │   Vegetation │   Water-bodies │   Total │
╞══════════════╪════════════╪══════════╪══════════════╪═════════╪═══════════════╪══════════════╪════════════════╪═════════╡
│ Informal     │         29 │        8 │            1 │       8 │             2 │            0 │              0 │      48 │
├──────────────┼────────────┼──────────┼──────────────┼─────────┼───────────────┼──────────────┼────────────────┼─────────┤
│ Formal       │          4 │       30 │            3 │       2 │             0 │            0 │              0 │      39 │
├──────────────┼────────────┼──────────┼──────────────┼─────────┼───────────────┼──────────────┼────────────────┼─────────┤
│ Industrial   │          4 │        8 │           19 │       3 │             0 │            0 │              0 │      34 │
├───────

##### 2.3.2 Producer's and consumer's accuracy

In [14]:
# creating the lists 
producers = basescene_error_matrix.producersAccuracy().getInfo()
df_producers = pd.DataFrame(producers)
df_producers.drop(index=df_producers.index[0], axis=0, inplace=True)

class_names = ['Informal', 'Formal', 'Industrial', 'Roads', 'Vacant land', 'Vegetation', 'Water-bodies']
df_producers['class names'] = class_names
df_producers.columns = ["Producer Accuracy", "Class name"]
df_producers['Producer Accuracy'] = df_producers['Producer Accuracy'].multiply(100).round(2)


print(tabulate(df_producers, headers=["Producer's Accuracy [%]", "Class name"], tablefmt='fancy_grid',  showindex=False))
df_producers.to_csv("./accuracies/basescene_2022/RF_Producers_Accuracy.csv", sep=';', index=False)

╒═══════════════════════════╤══════════════╕
│   Producer's Accuracy [%] │ Class name   │
╞═══════════════════════════╪══════════════╡
│                     60.42 │ Informal     │
├───────────────────────────┼──────────────┤
│                     76.92 │ Formal       │
├───────────────────────────┼──────────────┤
│                     55.88 │ Industrial   │
├───────────────────────────┼──────────────┤
│                     83.33 │ Roads        │
├───────────────────────────┼──────────────┤
│                     93.75 │ Vacant land  │
├───────────────────────────┼──────────────┤
│                     86.84 │ Vegetation   │
├───────────────────────────┼──────────────┤
│                     96.77 │ Water-bodies │
╘═══════════════════════════╧══════════════╛


In [15]:
# creating a dataframe from the list of consumer accuracies and remove landcover ID 0
consumers = basescene_error_matrix.consumersAccuracy().getInfo()
df_consumers = pd.DataFrame(consumers)
df_consumers = df_consumers.drop(df_consumers.columns[0],axis=1)
df_consumers.columns = class_names

# reshaping the dataframe from wide to long format:
df_consumers_long = pd.melt(df_consumers, var_name='Class name', value_name="Consumer Accuracy")
df_consumers_long = df_consumers_long[['Consumer Accuracy', 'Class name']]
df_consumers_long['Consumer Accuracy'] = df_consumers_long['Consumer Accuracy'].multiply(100).round(2)


print(tabulate(df_consumers_long, headers=["Consumers's Accuracy [%]", "Class name"], tablefmt='fancy_grid',  showindex=False))
df_consumers_long.to_csv("./accuracies/basescene_2022/RF_Consumers_Accuracy.csv", sep=';', index=False)

╒════════════════════════════╤══════════════╕
│   Consumers's Accuracy [%] │ Class name   │
╞════════════════════════════╪══════════════╡
│                      69.05 │ Informal     │
├────────────────────────────┼──────────────┤
│                      62.5  │ Formal       │
├────────────────────────────┼──────────────┤
│                      70.37 │ Industrial   │
├────────────────────────────┼──────────────┤
│                      72.73 │ Roads        │
├────────────────────────────┼──────────────┤
│                      97.22 │ Vacant land  │
├────────────────────────────┼──────────────┤
│                      94.29 │ Vegetation   │
├────────────────────────────┼──────────────┤
│                      85.71 │ Water-bodies │
╘════════════════════════════╧══════════════╛


##### 2.3.3 Overall Accuracy and Kappa Coefficent

In [16]:
# defining the variables
overall_accuracy = basescene_error_matrix.accuracy().getInfo()
overall_print = str(round(overall_accuracy * 100, 2))
kappa = basescene_error_matrix.kappa().getInfo()

df_overall_kappa = pd.DataFrame()

# printing out vaLues
print("\033[1m" + "Overall Accuracy " + overall_print + " %" + "\033[0m")
print("\033[1m" + "Kappa coefficent " + str(round(kappa, 2)) + "\033[0m")

[1mOverall Accuracy 81.71 %[0m
[1mKappa coefficent 0.78[0m


##### Resources for code snippets

https://colab.research.google.com/github/csaybar/EEwPython/blob/dev/10_Export.ipynb \
https://worldbank.github.io/OpenNightLights/tutorials/mod6_6_RF_classifier.html \
https://towardsdatascience.com/how-to-easily-create-tables-in-python-2eaea447d8fd \
https://developers.google.com/earth-engine/apidocs/ee-classifier-smilecart