In [None]:
# Import Libraries & Authenticate GEE
import ee
import geemap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

ee.Authenticate()
ee.Initialize(project='nitk25')


In [None]:
# Define Study Area

Map = geemap.Map()

# Example location (can be changed)
point = ee.Geometry.Point([74.85, 12.95])  # Karnataka region
basin = point.buffer(20000)  # 20 km buffer

Map.centerObject(basin, 9)
Map.addLayer(basin, {}, "Watershed")
Map


In [None]:
# Rainfall Data (CHIRPS)

rain = ee.ImageCollection("UCSB-CHG/CHIRPS/DAILY") \
        .filterDate("2020-01-01", "2020-12-31") \
        .filterBounds(basin) \
        .sum() \
        .rename("Rainfall")

Map.addLayer(rain, {"min": 0, "max": 2000}, "Rainfall")


In [None]:
# DEM and Slope

dem = ee.Image("USGS/SRTMGL1_003").rename("Elevation")

slope = ee.Terrain.slope(dem).rename("Slope")

Map.addLayer(slope, {"min": 0, "max": 60}, "Slope")


In [None]:
# Land Use / Land Cover

lulc = ee.Image("ESA/WorldCover/v100/2020").rename("LULC")

Map.addLayer(lulc, {}, "LULC")


In [None]:
# Simple runoff assumption (same logic as runoff project)

runoff = rain.multiply(0.3).rename("Runoff")

Map.addLayer(runoff, {"min": 0, "max": 600}, "Runoff")


In [None]:
# Stack All Variables

stack = rain.addBands([runoff, slope, lulc])

In [None]:
# Sample Data from Basin

samples = stack.sample(
    region=basin,
    scale=1000,
    numPixels=1500,
    geometries=False
)

df = geemap.ee_to_df(samples)
df.head()


In [None]:
df.tail()

In [None]:
# Define Sediment Load (Target Variable)

df["Sediment_Load"] = 0.05 * df["Runoff"] * df["Slope"]

df.head()


In [None]:
df.tail()

In [None]:
# Prepare ML Dataset

X = df[["Rainfall", "Runoff", "Slope", "LULC"]]
y = df["Sediment_Load"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# Train Random Forest Model

model = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)


In [None]:
# Prediction & Evaluation

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("RÂ² Score:", r2)
print("MAE:", mae)


In [None]:
# Sample-wise Result Checking

comparison = X_test.copy()
comparison["Actual_Sediment"] = y_test.values
comparison["Predicted_Sediment"] = y_pred

comparison.head(10)


In [None]:
# Error Calculation

comparison["Error"] = comparison["Actual_Sediment"] - comparison["Predicted_Sediment"]
comparison["Absolute_Error"] = abs(comparison["Error"])

comparison.head()

In [None]:
# Scatter Plot (Actual vs Predicted)

plt.scatter(comparison["Actual_Sediment"], comparison["Predicted_Sediment"])
plt.xlabel("Actual Sediment Load")
plt.ylabel("Predicted Sediment Load")
plt.title("Actual vs Predicted Sediment Load")
plt.show()


In [None]:
# Feature Importance

importance = pd.DataFrame({
    "Feature": X.columns,
    "Importance": model.feature_importances_
})

importance.sort_values(by="Importance", ascending=False)


In [None]:
from google.colab import output
output.clear()