![Digital Earth Pacific](../dep.png)

# Point-based Land Cover Classification Model v.1

Training a model on point data to produce raster predictions on landcover classifications.

In [None]:
import os
import sys
sys.path.insert(0, '..')

import pandas as pd
import geopandas as gpd
import numpy as np
import rioxarray
import xarray as xr
from ipyleaflet import Map, LayersControl, basemaps

import depal as dep
from model import add_image_values, get_model_prediction, get_overlay

import warnings
warnings.filterwarnings('ignore')

In [None]:
dep.init()

#### Load and Summarise Training Data

In [None]:
# Training data from Nick
training_data = gpd.read_file("datapoints_010423.geojson")
#training_data

In [None]:
# This is more for reference, but shows the relative number of each class.
# Some class imbalance is usually expected, but you'll need more points for rare classes
# (or you may need to combine some of the water classes).

# Notice for instance in results below how poorly ocean is discriminated.
summary = training_data.LULC_class.value_counts()
pd.DataFrame(summary.items())

#### Classes Cleanup and Alignment

In [None]:
#remove non-land classes (LULC_class)
remove_list = ['Shallow_ocean', 'Coral_reef', 'Seagrass', 'Deep_ocean']
for r in remove_list:
    training_data = training_data[training_data["LULC_class"] != r]

summary = training_data.LULC_class.value_counts()
pd.DataFrame(summary.items())

#### Load Sentinel 2 Data (Annual Landcover Mosaic)

In [None]:
aoi = dep.get_country_admin_boundary("Tonga", "Island Group", "Tongatapu")
training_image = dep.get_landcover_mosaic(aoi, year="2023", resolution=10, coastal_clip=False)

In [None]:
d = add_image_values(training_data, training_image)

#### Training the Model using a Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict

model = RandomForestClassifier(max_depth=4, class_weight="balanced")
#model = GradientBoostingClassifier()
#model = HistGradientBoostingClassifier() #For coastal clipped mosaic

# Convert the dataframe data to numpy arrays
y = d.LULC_code.to_numpy()
bands = ["blue", "green", "red", "rededge", "nir"]
X = d.loc[:, bands].to_numpy()

# A coarse way to assess model fit, this returns a list of loss metric values 
# (particular metric depends on classifier) for a 10-fold cross validation. 
# Alternatively you could split into train and test (and validation) sets, 
#but for this few number of points this may be the best approach.
cv = cross_val_score(model, X, y, cv=10)
cv.mean()

#### Classes Color Map

In [None]:
# Lookup table for values <-> names, used in a couple of places elsewhere
spc_lookup = pd.DataFrame([
[1, 'Forest_land', '#064a00'],
[2, 'Cropland', '#b67e00' ],
[3, 'Grassland', '#d7ffa0'],
[4, 'Wetland', '#73ffd2'],
[5, 'Settlements', '#bd0007'],
[6, 'Bare_land','#919191'],
[7, 'Surface_water','#71a8ff'],
[8, 'Shallow_ocean','#4a8ffc'],
[9, 'Seagrass','#fc4aea'],
[10, 'Coral_reef', '#8f0e82'],
[11, 'Deep_ocean','#00299f'],
[12, 'Shrubs', '#759f00'],
[13, 'African_tulip', '#1e121a']], columns=['LULC_code', 'LULC_class', 'color']).set_index('LULC_code')


# This is for making a color ramp (see below)
# It's a list of colors indexed by the class values
x = ['#ffffff'] * (max(spc_lookup.index) + 1)
for i in spc_lookup.index:
  x[i] = spc_lookup.loc[i,'color']

#x

#### Accuracy Assessment

In [None]:
# Create predictions from the cv model to see how well they did.
cv_predictions = cross_val_predict(model, X, y, cv=10)

# Assess accuracy of the predictions versus label data. I do the lookup so the rows and columns have reasonable names
y_class = spc_lookup.loc[y.astype('int')].LULC_class.values
pred_y_class = spc_lookup.loc[cv_predictions.astype('int')].LULC_class.values
pd.crosstab(y_class, pred_y_class)

In [None]:
# Fit the model on the full data set
s2_model = model.fit(X, y)

#### Raster predictions
Predict classes on the entire image dataset, not just training points

In [None]:
s2_predictions = get_model_prediction(s2_model, training_image).rio.clip(aoi.to_crs(training_image.rio.crs))

# Save the image to disk
s2_predictions.rio.to_raster('pred_s2_mosaic_v1.tif', driver='COG', overwrite=True)

In [None]:
# Plot output as image
%matplotlib inline

from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

plt.figure(figsize=(19.2, 14.3))
cmap = ListedColormap(x)
plot = plt.imshow(s2_predictions, cmap=cmap)

# add legend
legend_patches = [ mpatches.Patch(color=spc_lookup.loc[i].color, label=spc_lookup.loc[i].LULC_class) 
                  for i in spc_lookup.index 
                   if i in np.unique(s2_predictions)
                 ]
plt.legend(handles=legend_patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0. )
plt.grid(False)
plt.show()

#### Interactive Landcover Output Display

In [None]:
overlay = get_overlay(s2_predictions, cmap)
center = [20,-180]
zoom = 18
m = Map(basemap=basemaps.Esri.WorldImagery, center=center, zoom=zoom, interpolation="nearest", scroll_wheel_zoom=True)
control = LayersControl(position='topright')
m.add_control(control)
m.add(overlay)
m.layout.width = '80%'
m.layout.height = '500px'
m.fit_bounds(overlay.bounds)
m

In [None]:
dep.cleanup()