# Optimized SAE + DNN Pipeline for AGB Estimation
This notebook implements an optimized Sparse Autoencoder (SAE) + DNN workflow with the following improvements:
- Dynamic layer sizing
- EarlyStopping for AE and DNN
- Validation splits
- L1 regularization tuning
- Dropout for DNN
- Easily tunable latent dimension

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l1


# -------------------------
# Autoencoder Class
# -------------------------
class AutoencoderFeatureExtractor:
    def __init__(self, input_dim, latent_dim=64, l1_reg=1e-5, hidden_1=None, hidden_2=None):
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.l1_reg = l1_reg
        self.hidden_1 = hidden_1 if hidden_1 else max(128, input_dim // 2)
        self.hidden_2 = hidden_2 if hidden_2 else max(64, input_dim // 4)
        self.encoder = None
        self.autoencoder = self.build_model()

    def build_model(self):
        input_layer = Input(shape=(self.input_dim,))
        # Encoder
        x = Dense(self.hidden_1, activation='relu')(input_layer)
        x = Dense(self.hidden_2, activation='relu')(x)
        bottleneck = Dense(self.latent_dim, activation='relu', activity_regularizer=l1(self.l1_reg))(x)
        # Decoder
        x = Dense(self.hidden_2, activation='relu')(bottleneck)
        x = Dense(self.hidden_1, activation='relu')(x)
        output_layer = Dense(self.input_dim, activation='linear')(x)

        autoencoder = Model(inputs=input_layer, outputs=output_layer)
        self.encoder = Model(inputs=input_layer, outputs=bottleneck)

        autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        return autoencoder

    def train(self, X, epochs=200, batch_size=32, validation_split=0.1):
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        self.autoencoder.fit(
            X, X,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            shuffle=True,
            callbacks=[early_stop],
            verbose=1
        )

    def extract_features(self, X):
        return self.encoder.predict(X)


# -------------------------
# DNN Builder and Trainer
# -------------------------
def build_dnn(input_dim, hidden_1=128, hidden_2=64, dropout_rate=0.2, lr=0.001):
    model = Sequential()
    model.add(Dense(hidden_1, activation='relu', input_dim=input_dim))
    model.add(Dropout(dropout_rate))
    model.add(Dense(hidden_2, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=Adam(learning_rate=lr), loss='mse')
    return model


def train_dnn(model, X_train, y_train, X_val, y_val, epochs=200, batch_size=32):
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stop],
        verbose=1
    )
    return model


# -------------------------
# Pipeline Function
# -------------------------
def run_sae_dnn_pipeline(data_path, target_col='Target', latent_dim=64):
    # Load data
    df = pd.read_csv(data_path)
    df = df.drop(columns=[col for col in ['origin', 'Origin', 'status', 'Status'] if col in df.columns])
    X = df.drop(columns=[target_col]).values
    y = df[target_col].values

    # Scale features
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    # Split train/test
    X_train_raw, X_test_raw, y_train, y_test = train_test_split(X_scaled, y, test_size=0.1, random_state=42)

    # Train Autoencoder
    ae = AutoencoderFeatureExtractor(input_dim=X_train_raw.shape[1], latent_dim=latent_dim)
    ae.train(X_train_raw)

    # Extract latent features
    X_train_latent = ae.extract_features(X_train_raw)
    X_test_latent = ae.extract_features(X_test_raw)

    # Split latent features for DNN validation
    X_train_final, X_val, y_train_final, y_val = train_test_split(X_train_latent, y_train, test_size=0.1, random_state=42)

    # Train DNN
    dnn = build_dnn(input_dim=X_train_final.shape[1])
    dnn = train_dnn(dnn, X_train_final, y_train_final, X_val, y_val)

    # Evaluate
    y_pred = dnn.predict(X_test_latent).flatten()
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)

    print(f"R²: {r2:.4f}, MAE: {mae:.2f}, MSE: {mse:.2f}")
    return r2, mae, mse


# Example usage:
# run_sae_dnn_pipeline('/content/4_tvol_lidarLiveQMinMax.csv', latent_dim=128)


In [None]:

# Example Usage for LiDAR Q dataset
result = run_sae_dnn_pipeline('/content/4_tvol_lidarLiveQMinMax.csv', latent_dim=128)
print("Final Results:", result)
