<a href="https://colab.research.google.com/github/jaalvalcan/GEE_index_sets/blob/main/Biomass%2Bembeded%20dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import ee
import geemap
import xarray as xr
import xee  # CRITICAL: Registers the 'ee' engine with xarray
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
ee.Authenticate()
ee.Initialize(project='ee-jaalvalcan')


In [None]:
# 3. DATA LOADING
# If you are in Colab, define a static ROI first or use the map tool
roi = ee.Geometry.BBox(-62.0, -4.0, -61.0, -3.0)

# GEDI Biomass (Target)
biomass = ee.Image("LARSE/GEDI/GEDI04_B_002").select('MU')

# Satellite Embeddings (Predictors)
emb = (ee.ImageCollection("GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL")
       .filterDate('2019', '2021')
       .filterBounds(roi)
       .mean())

stack = emb.addBands(biomass)

# 4. CONVERT TO XARRAY
ds = xr.open_dataset(
    stack,
    engine='ee',
    crs='EPSG:4326',
    scale=0.01, # Roughly 1.1km
    geometry=roi
)

# Clean and convert to DataFrame
ds = ds.squeeze('time').drop_vars('time') * 1
df = ds.to_dataframe().dropna()

# 5. MACHINE LEARNING PREPARATION
scaler_x = StandardScaler()
scaler_y = StandardScaler()

# Predictors (X) and Target (y)
X = df.drop('MU', axis=1)
y = df['MU'].values.reshape(-1, 1) # Force 2D for scaler

X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42
)

# 6. NEURAL NETWORK MODEL
model = Sequential([
    Dense(64, activation='relu', input_dim=X_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

# 7. PREDICTION & MAPPING
# Generate prediction for the whole area
df_full = ds.to_dataframe().fillna(0) # Fill NAs for the predictor set
X_full_scaled = scaler_x.transform(df_full.drop('MU', axis=1))

# Predict and inverse scale
predictions = model.predict(X_full_scaled)
df_full['biomass_pred'] = scaler_y.inverse_transform(predictions)

# Convert back to Xarray for plotting
res = df_full.to_xarray().sortby(['lat', 'lon'])
res.biomass_pred.plot(robust=True, cmap='viridis')