<a href="https://colab.research.google.com/github/ck1972/Geospatial-Deep-Learning/blob/main/Mod2_Lab2b_Modeling_AGBD_ANN_Mafungautsi_GitHub.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Lab 2b. Introduction to Shallow Neural Networks for Biomass Modeling: A Simple ANN Approach**
## **Setup**
### Install libraries

In [None]:
# Install some packages
!pip install rasterio
!pip install earthpy

In [None]:
# Import libraries and modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import rasterio
import earthpy.plot as ep

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load and prepare data
### Define variables and data paths

In [None]:
# Define predictor and target variables, and the data paths
FEATURES = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B11', 'B12', 'NDVI', 'SAVI', 'RESI']
LABEL = ['agbd']
SAMPLE_PATH = '/content/drive/My Drive/Maf_Datasets/TA_AGDD_GEDI_L4A_2022.csv' # With filtered agbd
IMAGE_PATH = '/content/drive/My Drive/Maf_Datasets/S2_predictors_2022.tif'

### Load and visualize Sentinel-2 image

In [None]:
# Load image
image = rasterio.open(IMAGE_PATH)
bandNum = image.count
height = image.height
width = image.width
crs = image.crs
transform = image.transform
shape = (height, width)

image_vis = []
for x in [5, 6, 4]:
  image_vis.append(image.read(x))
image_vis = np.stack(image_vis)

plot_size = (8, 8)
ep.plot_rgb(
  image_vis,
  figsize=plot_size,
  stretch=True,
)

##  Load sample data

In [None]:
# Read sample
samples = pd.read_csv(SAMPLE_PATH)[FEATURES + LABEL]
samples

## Split and prepare data

In [None]:
# Split into train and test
train, test = train_test_split(samples, test_size=0.2, shuffle=True)

# Get varibles input and output
train_input = train[FEATURES].to_numpy()
test_input = test[FEATURES].to_numpy()
train_output = train[LABEL].to_numpy().astype(float) / 90
test_output = test[LABEL].to_numpy().astype(float) / 90

# Show the data shape
print(f'Train features: {train_input.shape}\nTest features: {test_input.shape}\nTrain label: {train_output.shape}\nTest label: {test_output.shape}')

## Build and train a shallow ANN model
### ANN model architecture

In [None]:
# Define the shallow neural network model
# One hidden layer with 8 neurons, using ReLU activation and Adam optimizer
model = MLPRegressor(hidden_layer_sizes=(8,),
                     activation='relu',
                     solver='adam',
                     learning_rate_init=0.01,
                     max_iter=500,
                     random_state=42)

### Train the model

In [None]:
# Train the model
model.fit(train_input, train_output.ravel())

### Predict on test data

In [None]:
# Predict on test data
predictions = model.predict(test_input)

# Rescale back to original AGBD scale
true_agbd = test_output * 90
pred_agbd = predictions * 90

### Evaluate model performance

In [None]:
# Evaluate model performance
mse = mean_squared_error(true_agbd, pred_agbd)
rmse = np.sqrt(mse)
r2 = r2_score(true_agbd, pred_agbd)

print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

# Plot true vs predicted AGBD
plt.figure(figsize=(6,6))
plt.scatter(true_agbd, pred_agbd, alpha=0.5, edgecolor='k')
plt.plot([true_agbd.min(), true_agbd.max()], [true_agbd.min(), true_agbd.max()], 'r--')
plt.xlabel('True AGBD')
plt.ylabel('Predicted AGBD')
plt.title('True vs Predicted AGBD (ANN - scikit-learn)')
plt.grid(True)
plt.show()

## Predict AGBD map

In [None]:
# Read and reshape image input
image_input = []
for x in range(12):  # 12 predictors
    image_input.append(image.read(x + 1))  # rasterio is 1-indexed

image_input = np.stack(image_input)  # Shape: (12, height, width)
image_input_flat = image_input.reshape(12, -1).T  # Shape: (height*width, 12)

# Handle NaNs
image_input_flat = np.nan_to_num(image_input_flat, nan=0.0)  # Replace NaNs with 0

# Predict and rescale
prediction_flat = model.predict(image_input_flat) * 90  # Back to AGBD
prediction_map = prediction_flat.reshape(shape[0], shape[1])  # Reshape to image

# Visualize
ep.plot_bands(prediction_map, cmap='YlGn', figsize=(10, 10), title='Predicted AGBD Map')


## Save AGB map

In [None]:
# Define output path
output_path = '/content/drive/MyDrive/Maf_Datasets/ANN_agbd_2022.tif'

# Save prediction_map to GeoTIFF
with rasterio.open(
    output_path,
    'w',
    driver='GTiff',
    height=prediction_map.shape[0],
    width=prediction_map.shape[1],
    count=1,
    dtype='float32',
    crs=image.crs,
    transform=image.transform
) as dst:
    dst.write(prediction_map.astype('float32'), 1)

print(f"Predicted AGBD map saved to: {output_path}")

## Explainble ML

In [None]:
# Import the shap library
import shap

# Select a background dataset (a small sample from training data)
background = train_input[np.random.choice(train_input.shape[0], 1000, replace=False)]

# KernelExplainer is model-agnostic, works for any black-box model
explainer = shap.KernelExplainer(model.predict, background)

# Explain predictions on a subset of test data
shap_values = explainer.shap_values(test_input[:1000])  # Reduce size if slow

### Plot global SHAP values

In [None]:
# SHAP summary plot
shap.summary_plot(shap_values, test_input[:1000], feature_names=FEATURES)

### Plot local SHAP values

In [None]:
# Get SHAP values for a single test input (e.g., index 100)
index = 100
single_input = test_input[index]
single_shap_values = shap_values[index]

# Create a SHAP Explanation object
explanation = shap.Explanation(
    values=single_shap_values,
    base_values=explainer.expected_value,
    data=single_input,
    feature_names=FEATURES
)

# Generate waterfall plot
shap.plots.waterfall(explanation)