# Lab 2, Question 6

(6) Exercise: using an SVM model and the Landcare NZ 2024 landcover database, produce a landcover map of Great Barrier (Aotea) Island for 2025 (based off the Austral summer of 24/25).

Your map should be presented at a publication quality level with all the usual map components (scale, legend, north arrow, data attribution).

You will need to provide performance statistics of the model within your figure.

*   Here you can access the landcover database: https://lris.scinfo.org.nz/layer/104400-lcdb-v50-land-cover-database-version-50-mainland-new-zealand/. You will need to explore for yourself how to extract this data and then upload it to colab, then how to plug it into the SVM algorithim. I have provided some starter code below.

An intial workflow to get the data into the state you need it in to then use it as training data might look like:
- Download the ZIP manually from their browser, having set your area of interest and used the 'Export' tool top right.
- Upload it to Colab.
- Unzip it and load with GeoPandas.
(25 pts)


_______________________________________________________________________________


### Setup

In [2]:
if 'google.colab' in str(get_ipython()):
    from google.colab import userdata
    EE_PROJECT_ID = userdata.get('EE_PROJECT_ID') 
else:
    from dotenv import load_dotenv
    import os
    load_dotenv()  # take environment variables
    EE_PROJECT_ID = os.getenv('EE_PROJECT_ID')

# Set up GEE API
import ee
ee.Authenticate()
ee.Initialize(project=EE_PROJECT_ID) #<- Remember to change this to your own project's name!

In [3]:
import tempfile
import urllib.request

from IPython.display import Image

import geemap
import geopandas as gpd
import pandas as pd
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from sklearn.metrics import confusion_matrix, classification_report

# A. Read Data

### A.1. Load LCDB v5.0 - Land Cover Database version 5.0, Mainland, New Zealand for 2024

In [39]:
# # Code to get you started
# import zipfile
# import geopandas as gpd

# # Upload the ZIP manually using the Colab UI
# from google.colab import files
# uploaded = files.upload()  # <- Expects a ZIP

# Unzip
# with zipfile.ZipFile("LCDB_v5.zip", 'r') as zip_ref: #<- Check file names
#     zip_ref.extractall("lcdb")

# Read shapefile
gdf = gpd.read_file("../dataset/lris-lcdb-v50-land-cover-great-barrier-SHP/lcdb-v50-land-cover-database-version-50-mainland-new-zealand.shp")

In [41]:
df_2018 = gdf[['Name_2018', 'Class_2018', 'geometry', 'LCDB_UID']]

# # original class of 2018
# classes_2018 = [0, 1, 2, 5, 6, 10, 12, 14, 16, 15, 20, 21, 22, 30, 33, 40, 41, 43, 44, 45, 46, 47, 50, 51, 52, 54, 55, 56, 58, 80, 81, 70, 64, 68, 69, 71]
# class_mapped_2018 = list(range(1, 37)) # Creates a list [1, 2, 3, ..., 36], to map from original

classes_2018 = df_2018['Class_2018'].unique().tolist()
classes_2018.sort()  # Sort the unique classes
class_mapped_2018 = list(range(1, len(classes_2018) + 1))  # Creates a list [1, 2, ..., len(classes_2018)]

class_property = 'class_mapped_2018'
df_2018[class_property] = df_2018['Class_2018'].map(dict(zip(classes_2018, class_mapped_2018)))
df_2018

Unnamed: 0,Name_2018,Class_2018,geometry,LCDB_UID,class_mapped_2018
0,Indigenous Forest,69,"POLYGON ((1820024.501 6001139.3, 1820024.88 60...",lcdb1000168553,20
1,Sand or Gravel,10,"POLYGON ((1820044.213 6001276.335, 1820054.749...",lcdb1000009545,5
2,Indigenous Forest,69,"POLYGON ((1818865.334 6000862.483, 1818892.667...",lcdb1000168544,20
3,Sand or Gravel,10,"POLYGON ((1818859.62 6001650.91, 1818879.751 6...",lcdb1000009547,5
4,Indigenous Forest,69,"POLYGON ((1819208.841 5997374.363, 1819193.973...",lcdb1000168423,20
...,...,...,...,...,...
600,Broadleaved Indigenous Hardwoods,54,"POLYGON ((1815221.771 5992743.223, 1815178.566...",lcdb1000122604,18
601,Broadleaved Indigenous Hardwoods,54,"POLYGON ((1815222.725 5993078.074, 1815241.205...",lcdb1000122593,18
602,Broadleaved Indigenous Hardwoods,54,"POLYGON ((1815479.996 5993366.704, 1815438.725...",lcdb1000122624,18
603,High Producing Exotic Grassland,40,"POLYGON ((1815617.885 5995904.409, 1815612.303...",lcdb1000452551,12


In [6]:
seed = 42  # Random seed for reproducibility

# # Coordinates for Mount Hobson
# point = ee.Geometry.Point([175.3785, -36.1830])
aoi = ee.Geometry.Rectangle([175.27, -36.01, 175.57, -36.36])

# bands = ['B2', 'B3', 'B4', 'B8'] # Sentinel-2 bands: Blue, Green, Red, NIR
bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']

lcdb = geemap.geopandas_to_ee(df_2018[[class_property, 'geometry']])

# rasterizes the data
lcdb_image = lcdb.reduceToImage(
    properties=[class_property],
    reducer=ee.Reducer.first()
).rename(class_property)

def mask_s2_clouds(image):
    """Masks clouds in your S2 images via QA60 band"""
    qa = image.select('QA60')
    cloudBitMask = 1 << 10
    cirrusBitMask = 1 << 11
    mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(
    qa.bitwiseAnd(cirrusBitMask).eq(0))
    return image.updateMask(mask).divide(10000).select(bands)

s2 = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
           .filterBounds(aoi)
           .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)))

s2_2018 = (s2
           .filterDate('2018-01-01', '2018-12-31')
           .map(mask_s2_clouds)
           .median()
           .clip(aoi))

# `lcdb` only covers land. Need to mark unlabled areas in `s2_2018`.
s2_2018_lc = s2_2018.updateMask(lcdb_image)
training_image = s2_2018_lc.addBands(lcdb_image)

# Combine the bands of interest with the land cover data
# The `class_property` band will be used as our label
training_image = s2_2018_lc.addBands(lcdb_image)

### Visualize

In [7]:
# # Add the layers to the map.
# # point = ee.Geometry.Point([175.3785, -36.1830])
# viz_params = {
#     'bands': ['B4', 'B3', 'B2'],  # RGB bands
#     'min': 0,
#     'max': 0.3,
#     'gamma': 1.4  # Adjust gamma for better visualization
# }

# Map = geemap.Map(center=[-36.1830, 175.3785], zoom=11)
# Map.addLayer(aoi, {}, 'AOI')

# Map.add_layer(
#     s2_2018, viz_params, 'S2 2018 Median RGB'
# )
# Map.add_layer(
#     s2_2018_lc, viz_params, 'S2 2018 (Land Cover)'
# )
# Map.add_layer(
#     lcdb_image.randomVisualizer(), {}, 'LCDB v5.0 (2018)'
# )
# Map

### Training Data 

In [8]:
# Sample the image to create training data
# This will create a FeatureCollection where each feature has the pixel values 
# for the bands and the corresponding land cover class.
training_data = training_image.select(bands + [class_property]).stratifiedSample(
    classBand=class_property,
    region=aoi,
    scale=10,  # The spatial resolution to sample at
    numPoints=150,  # The number of data points for each class
    seed=seed,  # Random seed for reproducibility
    geometries=True  # Include the geometry of each sampled pixel
)

In [9]:
# first_10 = training_data.limit(10).getInfo()

# print('\nFirst 10 training samples:')
# for i, feature in enumerate(first_10['features']):
#     props = feature['properties']
#     print(f"Sample {i+1}: Class={props[class_property]}, B2={props['B2']}, B3={props['B3']}, B4={props['B4']}, B8={props['B8']}")

# # Count number of samples per class (server-side)
# class_counts = training_data.reduceColumns(
#     reducer=ee.Reducer.frequencyHistogram(),
#     selectors=[class_property])

# print('\nClass distribution in sample:')
# print(class_counts.getInfo())

In [10]:
# Add random column
training_data = training_data.randomColumn('random')

# Split
train_set = training_data.filter(ee.Filter.lt('random', 0.7))
valid_set = training_data.filter(ee.Filter.And(ee.Filter.gte('random', 0.7), ee.Filter.lt('random', 0.9)))
test_set = training_data.filter(ee.Filter.gte('random', 0.9))

In [32]:
classifier = ee.Classifier.smileRandomForest(numberOfTrees=10).train(
    features=train_set,
    classProperty=class_property,
    inputProperties=bands)

In [30]:
display('Results of trained classifier', classifier.explain())

'Results of trained classifier'

In [33]:
# Get a confusion matrix and overall accuracy for the training sample.
train_accuracy = classifier.confusionMatrix()
display('Training overall accuracy', train_accuracy.accuracy())

'Training overall accuracy'

In [34]:
# Classify validation and test sets
val_classified = valid_set.classify(classifier)
test_classified = test_set.classify(classifier)

# Function to export data for confusion matrix
def fc_to_lists(fc, classProp, predProp):
    values = fc.aggregate_array(classProp).getInfo()
    preds = fc.aggregate_array(predProp).getInfo()
    return values, preds

# Get predicted vs actual from validation set
y_true, y_pred = fc_to_lists(val_classified, class_property, 'classification')


In [35]:
# Confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=list(range(1, 37)))
report = classification_report(y_true, y_pred, labels=list(range(1, 37)), target_names=[str(l) for l in classes_2018])

# Pretty-print
print("Confusion Matrix:")
print(pd.DataFrame(cm, index=[f"Actual {l}" for l in classes_2018],
                       columns=[f"Pred {l}" for l in classes_2018]))
print("\nClassification Report:")
print(report)

Confusion Matrix:
           Pred 0  Pred 1  Pred 2  Pred 5  Pred 6  Pred 10  Pred 12  Pred 14  \
Actual 0        0       0       0       0       0        0        0        0   
Actual 1        0      14       1       0       2        1        2        0   
Actual 2        0       0      22       0       0        1        0        0   
Actual 5        0       0       0       0       0        0        0        0   
Actual 6        0       0       0       0      26        0        0        0   
Actual 10       0       2       0       0       4        7        2        0   
Actual 12       0       0       0       0       0        0        6        0   
Actual 14       0       0       0       0       0        0        0        0   
Actual 16       0       2       0       0       2        1        0        0   
Actual 15       0       0       0       0       0        0        0        0   
Actual 20       0       0       0       0       0        0        0        0   
Actual 21       0     

In [36]:

# Evaluate test performance
test_matrix = test_classified.errorMatrix(class_property, 'classification')
# print("Confusion Matrix:")
# print(pd.DataFrame(test_matrix.getInfo(), index=[f"Actual {l}" for l in classes_2018],
#                        columns=[f"Pred {l}" for l in classes_2018]))
# print("\nClassification Report:")
# print(report)

# print('Error matrix:')
# print(test_matrix.getInfo())

# Compute accuracy metrics from the error matrix.
print('Overall accuracy:', test_matrix.accuracy().getInfo())
# print('Consumer\'s accuracy:')
# print(test_matrix.consumersAccuracy().getInfo())
# print('Producer\'s accuracy:')
# print(test_matrix.producersAccuracy().getInfo())
print('Kappa:', test_matrix.kappa().getInfo())

Overall accuracy: 0.5391849529780565
Kappa: 0.5156182212581345


In [37]:
# Austral summer of 24/25
s2_2025 = (s2
           .filterDate('2024-09-01', '2025-07-31')
           .map(mask_s2_clouds)
           .median()
           .clip(aoi))

s2_2025_classified = s2_2025.updateMask(lcdb_image).classify(classifier)

s2_2018_classified = s2_2018_lc.classify(classifier)

In [38]:
# Add the layers to the map.
# point = ee.Geometry.Point([175.3785, -36.1830])
viz_params = {
    'bands': ['B4', 'B3', 'B2'],  # RGB bands
    'min': 0,
    'max': 0.3,
    'gamma': 1.4  # Adjust gamma for better visualization
}

# Classified image viz, fresh set up again to make sure we have exactly what we want displayed.
classified_map = geemap.Map()
classified_map.centerObject(aoi, 11)
classified_map.addLayer(s2_2025, viz_params, '2025 RGB')
classified_map.addLayer(s2_2025_classified.randomVisualizer(), {}, '2025 Classified')
classified_map.addLayer(s2_2018_lc, viz_params, '2018 RGB')
classified_map.addLayer(s2_2018_classified.randomVisualizer(), {}, '2018 Classified')
classified_map.addLayer(lcdb_image.randomVisualizer(), {} ,'LCDB v5.0')
classified_map

Map(center=[-36.184963233625986, 175.41999999999973], controls=(WidgetControl(options=['position', 'transparen…