# Question 5

(5) Exercise: modify the code so that you feed into your random forest classifier that has been trained on 2020 data, on other years of Sentinel 2 data than 2020. Produce a publication quality figure that presents the following:
*   Landcover maps of Wellington for 2018, 2022 and 2024
*   The Test accuracy averages of the RF classifier for 2020.
*   Include a sentence in your figure caption that explains why you cannot state the accuracy of the classifier for years other than 2020.

(15 pts)
_______________________________________________________________________________


### Setup

In [None]:
if 'google.colab' in str(get_ipython()):
    from google.colab import userdata
    EE_PROJECT_ID = userdata.get('EE_PROJECT_ID') 
else:
    from dotenv import load_dotenv
    import os
    load_dotenv()  # take environment variables
    EE_PROJECT_ID = os.getenv('EE_PROJECT_ID')

# Set up GEE API
import ee
ee.Authenticate()
ee.Initialize(project=EE_PROJECT_ID) #<- Remember to change this to your own project's name!

In [None]:
import tempfile
import urllib.request

from IPython.display import Image

import geemap
import geopandas as gpd
import pandas as pd
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from sklearn.metrics import confusion_matrix, classification_report

# A. The model

### A.1. Preparation using 2020 data

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd

# Define area of interest (e.g., Wellington, NZ)
aoi = ee.Geometry.Rectangle([174.6, -41.4, 174.9, -41.2]).buffer(1000).bounds()  # Buffer by 1km

# Set up your filter Sentinel-2 imagery
# A function that masks clouds in your S2 images via QA band
def mask_s2_clouds(image):
    qa = image.select('QA60')
    cloudBitMask = 1 << 10
    cirrusBitMask = 1 << 11
    mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(
           qa.bitwiseAnd(cirrusBitMask).eq(0))
    return image.updateMask(mask).divide(10000).select(['B2', 'B3', 'B4', 'B8'])

s2 = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
      .filterBounds(aoi)
      .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)))
      

# Now build the clean collection with selected bands
s2_2020 = s2.filterDate('2020-01-01', '2020-12-31').map(mask_s2_clouds).median().clip(aoi)

# Define valid WorldCover classes (10 to 100, spaced by 10)
valid_classes = ee.List.sequence(10, 100, 10) # 100 is included
# list(range(10, 100, 10)) # 100 in not included

# Need to change this to sequential list to avoid the algs thinking there are 99 classes rather than 9
remap_to = ee.List.sequence(1, 10)

# Load and mask WorldCover map to valid classes only
landcover = ee.Image('ESA/WorldCover/v100/2020').select('Map').clip(aoi)
landcover_masked = landcover.updateMask(
    landcover.remap(valid_classes, ee.List.repeat(1, 10)))

# Remap labels: create a new band with remapped values (10 → 1, ..., 100 → 10)
landcover_remapped = landcover.remap(valid_classes, remap_to).rename('Map_remapped')

# Add landcover original and remapped as bands to Sentinel-2 image
training_data = s2_2020.addBands(landcover_remapped)

# Sample the image
bands = ['B2', 'B3', 'B4', 'B8']
sample = training_data.select(bands + ['Map_remapped']).sample(
    region=aoi,
    scale=10,
    numPixels=5000,
    seed=2,
    geometries=True)



### A.2 Train the model

In [None]:
# Add random column
sample = sample.randomColumn('random')

# Split
train = sample.filter(ee.Filter.lt('random', 0.7))
valid = sample.filter(ee.Filter.And(ee.Filter.gte('random', 0.7), ee.Filter.lt('random', 0.9)))
test = sample.filter(ee.Filter.gte('random', 0.9))

# Train the RF model
classifier = ee.Classifier.smileRandomForest(numberOfTrees=100).train(
    features=train,
    classProperty='Map_remapped',
    inputProperties=bands)

# B. Classify

### B.1. Classify images from 2018, 2022, and 2024

In [None]:
years = [2018, 2022, 2024]
years_images = []
years_classified = []
years_urls = []

# ESA_classes = landcover.get('Map_class_names').getInfo()
# ESA_palette = landcover.get('Map_class_palette').getInfo()
ESA_classes = [
    'Tree cover',
    'Shrubland',
    'Grassland',
    'Cropland',
    'Built-up',
    'Bare / sparse vegetation',
    'Snow and ice',
    'Permanent water bodies',
    'Herbaceous wetland',
    # 'Mangroves',
    'Moss and lichen'
]

# BASED IMAGE
ESA_palette = [
    '006400',
    'ffbb22',
    'ffff4c',
    'f096ff',
    'fa0000',
    'b4b4b4',
    'f0f0f0',
    '0064c8',
    '0096a0',
    # '00cf75',
    'fae6a0',
]

vis_params = {
    'min': 1,
    'max': 10,
    'palette': ESA_palette
}

thumb_params = {
    'dimensions': 512,
    'region': aoi,
    'format': 'png',
}

for idx, year in enumerate(years):
    # Load and preprocess Sentinel-2 imagery for the given year
    image = s2.filterDate(f'{year}-01-01', f'{year}-12-31').map(mask_s2_clouds).median().clip(aoi)
    years_images.append(image)
    
    classified = image.select(bands).classify(classifier)
    
    # Classify the image using the trained classifier
    # years_classified.append(classified)

    classified_rgb = classified.visualize(**vis_params)
    years_classified.append(classified_rgb)
    
    # Generate URL for the classified image
    url = years_classified[idx].getThumbURL(thumb_params)
    years_urls.append(url)

### B.2. Show the images and map

In [None]:
display(Image(url=years_urls[0]))

In [None]:
vis_classified_params = {
    'min': 1,
    'max': 10,
    'palette': ESA_palette
}

vis_rgb_params = vis_classified_params | { 'palette': ['B4', 'B3', 'B2'] }

# Get coordinates of regions
coords = aoi.coordinates().getInfo()[0]
lons = [pt[0] for pt in coords]
lats = [pt[1] for pt in coords]
xmin, xmax, ymin, ymax = min(lons), max(lons), min(lats), max(lats)

# C. Validation

In [None]:
# Classify image
classified = s2_2020.select(bands).classify(classifier)

# Validate
validated = valid.classify(classifier, 'predicted')

# Function to export data for confusion matrix
# fc = featurecollection
def fc_to_lists(fc, classProp, predProp):
    values = fc.aggregate_array(classProp).getInfo()
    preds = fc.aggregate_array(predProp).getInfo()
    return values, preds

# Get predicted vs actual from validation set
y_true, y_pred = fc_to_lists(validated, 'Map_remapped', 'predicted')

# Labels for original classes
label_map = {i + 1: valid_classes.get(i).getInfo() for i in range(10)}
label_names = [label_map[i + 1] for i in range(10)]

# Confusion matrix
# cm = confusion_matrix(y_true, y_pred, labels=list(range(1, 11)))
report = classification_report(y_true, y_pred, labels=list(range(1, 11)), target_names=[str(l) for l in label_names], output_dict=True)

# # Pretty-print
# print("Confusion Matrix:")
# print(pd.DataFrame(cm, index=[f"Actual {l}" for l in label_names],
#                        columns=[f"Pred {l}" for l in label_names]))
print("\nClassification Report:")
print(report)

# D. Figure

In [None]:
# !pip install rasterio

### D.1. Prepare all images

In [None]:
import rasterio
import zipfile
import requests
import io
download_url = landcover.getDownloadURL({
    'scale': 10,  # 10-meter resolution
    'region': aoi
})

classified_imgs = []

for idx, year in enumerate(years):
    # Download the image for the year
    url = years_urls[idx]
    with tempfile.NamedTemporaryFile(suffix=".png") as f:
        urllib.request.urlretrieve(url, f.name)
        img = mpimg.imread(f.name)
        classified_imgs.append(img)

# BASED IMAGE
res = requests.get(download_url, stream=True)
# Read the GeoTIFF data from memory using rasterio
with zipfile.ZipFile(io.BytesIO(res.content)) as zip_file:
    file_list = [f for f in zip_file.namelist() if f.endswith('.tif')]
    if not file_list:
        raise ValueError("No GeoTIFF file found in the downloaded zip archive.")
    geotiff_data = zip_file.read(file_list[0])
    with rasterio.open(io.BytesIO(geotiff_data)) as src:
        raster_array = src.read(1)
        # Get the extent for the plot
        extent = [src.bounds.left, src.bounds.right, src.bounds.bottom, src.bounds.top]

### D.2. Prepare Validation data

In [None]:
rows = ['accuracy', 'macro avg', 'weighted avg']
cols = ['precision', 'recall', 'f1-score']
df_report = pd.DataFrame(report)[rows]

df_plot = df_report.transpose()[cols]
df_plot

In [None]:

import matplotlib.colors as mcolors

layout = [['A', 'A', 'D', 'D'],
          ['B', 'B', 'E', 'E'],
          ['C', 'C', 'F', 'F']]

fig, axes = plt.subplot_mosaic(layout, figsize=(12, 12), height_ratios=[1,1,1])

class_axe = [axes['A'], axes['D'], axes['B']]
based_axe = axes['E']
ledgend_axe = axes['F']
report_axe = axes['C']

colors = ['#' + v for v in ESA_palette]
# Create the custom colormap
cmap = mcolors.ListedColormap(colors)

for idx, axe in enumerate(class_axe):
    axe.set_title(f'Classified image of {years[idx]}')
    axe.imshow(classified_imgs[idx], extent=[xmin, xmax, ymin, ymax], cmap=cmap, vmin=1, vmax=10)
    axe.axis('off')  # Hide the axes

# LEDGEND
# Loop through the labels and colors to create squares and text
for i, (color, class_name) in enumerate(zip(ESA_palette, ESA_classes)):
    spacing = 0.14
    # Create a colored square patch
    y_position = (10 - i) * spacing
    rect = patches.Rectangle((0.1, y_position), 0.07, 0.1, facecolor=f'#{color}', edgecolor='none')
    ledgend_axe.add_patch(rect)

    # Add the text label beside the square
    # The y-coordinate is also based on y_position
    ledgend_axe.text(0.2, y_position + 0.05, class_name, va='center', ha='left', fontsize=9)

    ledgend_axe.set_xlim(0, 1)
    ledgend_axe.set_ylim(0, 1.5)
    ledgend_axe.axis('off')
ledgend_axe.set_title('Classes Colors')
    
# Based image
based_axe.imshow(raster_array, extent=extent, cmap=cmap, vmin=10, vmax=100)
based_axe.set_title('ESA WorldCover 10m v100 (2020)')
based_axe.axis('off')  # Hide the axes

# REPORT TABLE
report_axe.axis('off') # Hide the axes
report_axe.set_title('The accuracy averages of the classifier.')
explain = '''
Must use 2020 dataset, same year with the training data.
Otherwise the results is not reflect the ground truth which lead to inaccuracy.
For example, an area in 2020 is forest, but in 2022 is grassland.
'''
report_axe.text(0.5, 0.5, explain, ha='center', va='center', fontsize=10)

# formant float values in the table
df_plot = df_plot.applymap(lambda x: f'{x:.4f}' if isinstance(x, (int, float)) else x)

# Create the table
table = report_axe.table(
    cellText=df_plot.values,
    colLabels=df_plot.columns.str.capitalize(),
    rowLabels=df_plot.index.str.capitalize(),
    loc='best',
    cellLoc='center'
)

table.scale(0.8, 0.8)  # Adjust table size

# Make headers bold
for (i, j), cell in table.get_celld().items():
    if i == 0 or j == -1: # Row 0 is the header, column -1 is the index
        cell.set_text_props(weight='bold', color='w')
        cell.set_facecolor('teal') # Header color

caption = '''
Figure x: Wellington Image Classification in year 2018, 2022, and 2024
Using a Random Forest model training on 2020 Sentinel-2 data and ESA WorldCover (2020).
'''
plt.figtext(0.5, -0.03, caption, wrap=True, horizontalalignment='center', fontsize=12)

plt.tight_layout()
plt.show() 