<a href="https://colab.research.google.com/github/ck1972/Geospatial-ML-Python/blob/main/Lab_10a_Final_Project_Template_Aboveground_Biomass_Estimation_Using_Machine_Learning_in_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Lab 10a. Final Project Template: Aboveground Biomass Estimation Using Machine Learning in Python**

In [None]:
# ========================
# FINAL PROJECT TEMPLATE
# Title: Estimating Aboveground Biomass Density (AGBD) Using ML
# ========================

# STEP 1: Set up the environment
!pip install shap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shap
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# Optional: Mount Google Drive if data is stored there
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# STEP 2: Load and explore your dataset
# Replace with your file path
data_path = '/content/drive/MyDrive/AGBD/final_dataset.csv'
df = pd.read_csv(data_path)

# Preview the data
df.head()
df.info()
df.describe()

In [None]:
# STEP 3: Define features and target
# Example features and target column
features = ['NDVI', 'SLAVI', 'CCCI', 'Elevation', 'LandCover']
target = 'AGBD'

X = df[features]
y = df[target]

In [None]:
# STEP 4: Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# STEP 5: Train a Random Forest Regression model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the model
joblib.dump({'model': model, 'features': features}, '/content/drive/MyDrive/AGBD/rf_model_agbd.pkl')

In [None]:
# STEP 6: Evaluate the model
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f'R² Score: {r2:.3f}')
print(f'RMSE: {rmse:.2f}')

In [None]:
# STEP 7: Visualize predictions
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel("Actual AGBD")
plt.ylabel("Predicted AGBD")
plt.title("Actual vs Predicted AGBD")
plt.grid(True)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.show()


In [None]:
# STEP 8: Explain the model with SHAP
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

# Summary plot
shap.summary_plot(shap_values, X_test, plot_type="bar")


In [None]:
# STEP 9: Document and Export
# Create a summary of results
summary = {
    'R² Score': r2,
    'RMSE': rmse,
    'Features Used': features,
    'Model Type': 'Random Forest Regressor'
}
summary_df = pd.DataFrame([summary])
summary_df.to_csv('/content/drive/MyDrive/AGBD/model_summary.csv', index=False)

# Optional: Save predictions
results_df = X_test.copy()
results_df['Actual_AGBD'] = y_test
results_df['Predicted_AGBD'] = y_pred
results_df.to_csv('/content/drive/MyDrive/AGBD/predictions.csv', index=False)
