<a href="https://colab.research.google.com/github/hentzrafael/tcc2-carbon-estimator/blob/main/carbon_unet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Carbon estimation with Deep Learning


## Dependencies

In [1]:
pip install rasterio mlflow tensorboard segmentation_models_pytorch

Collecting rasterio
  Downloading rasterio-1.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting mlflow
  Downloading mlflow-3.6.0-py3-none-any.whl.metadata (31 kB)
Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.5.0-py3-none-any.whl.metadata (17 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting mlflow-skinny==3.6.0 (from mlflow)
  Downloading mlflow_skinny-3.6.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.6.0 (from mlflow)
  Downloading mlflow_tracing-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (

## Constants

In [2]:
input_file =  '/content/drive/MyDrive/TCC/data/reprojected.tif'
target_file = '/content/drive/MyDrive/TCC/data/ceda2022Amazon.tif'
patch_size = 1024
stride = 128
batch_size = 16
epochs = 50
SELECTED_MODEL = 'segformer'

In [3]:
import torch
torch.cuda.empty_cache()
import mlflow
import mlflow.pytorch
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
import torchvision.utils as vutils
import os
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset
import rasterio
from rasterio.windows import Window
import argparse
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import io
from PIL import Image
from tqdm import tqdm
import segmentation_models_pytorch as smp

## Dataset

In [4]:
class GeoTiffPatchDatasetShuffled(Dataset):
    _cached_inputs = None
    _cached_targets = None
    _cached_meta = None

    def __init__(self, input_path, target_path, patch_size=256, stride=256,
                 split="train", seed=42, transform=None):
        self.patch_size = patch_size
        self.stride = stride
        self.transform = transform
        self.split = split
        self.seed = seed

        # Load or reuse
        if GeoTiffPatchDatasetShuffled._cached_inputs is None:
            with rasterio.open(input_path) as src_in:
                GeoTiffPatchDatasetShuffled._cached_inputs = src_in.read().astype(np.float32)
                GeoTiffPatchDatasetShuffled._cached_meta = (src_in.width, src_in.height, src_in.meta.copy())

            with rasterio.open(target_path) as src_tgt:
                GeoTiffPatchDatasetShuffled._cached_targets = src_tgt.read(1).astype(np.float32)

        self.input_img = GeoTiffPatchDatasetShuffled._cached_inputs
        self.target_img = GeoTiffPatchDatasetShuffled._cached_targets
        self.width, self.height, self.input_meta = GeoTiffPatchDatasetShuffled._cached_meta

        # Precompute only patch coordinates
        coords = [
            (left, top)
            for top in range(0, self.height - patch_size + 1, stride)
            for left in range(0, self.width - patch_size + 1, stride)
        ]

        # Shuffle and split
        rng = np.random.default_rng(seed)
        rng.shuffle(coords)
        n_total = len(coords)
        n_train = int(0.7 * n_total)
        n_val = int(0.15 * n_total)
        if split == "train":
            self.coords = coords[:n_train]
        elif split == "val":
            self.coords = coords[n_train:n_train + n_val]
        elif split == "test":
            self.coords = coords[n_train + n_val:]
        else:
            self.coords = coords

    def __len__(self):
        return len(self.coords)

    def __getitem__(self, idx):
        left, top = self.coords[idx]
        ps = self.patch_size
        input_patch = self.input_img[:, top:top+ps, left:left+ps]
        target_patch = self.target_img[top:top+ps, left:left+ps]

        is_valid = (~np.isnan(target_patch)) & (target_patch > 0)
        mask = is_valid.astype(np.float32)

        # # 2. Impute Invalid Values: Replace NaNs and all non-positive values with 0.
        # #    This makes the data safe for log1p.
        target_patch[~is_valid] = 0.0
        # # Optional: Use np.nan_to_num(target_patch, nan=0.0) just to be absolutely sure.

        # # 3. Apply the Safe Log Transformation
        # # target_patch now contains only values >= 0, making log1p safe.
        # target_patch = np.log1p(target_patch)

        # Normalize inline
        input_patch = input_patch.copy()

        min_val = -3.0 # Assumindo que -3 √© o pior outlier
        max_val = 1.0

        input_patch[3:] = (input_patch[3:] - min_val) / (max_val - min_val)
        input_patch = np.clip(input_patch, 0.0, 1.0)


        # # Mask
        # mask = np.ones_like(target_patch, dtype=np.float32)
        # target_patch = np.where(mask, target_patch, 0.0)


        input_tensor = torch.from_numpy(input_patch)
        target_tensor = torch.from_numpy(target_patch).float().unsqueeze(0)
        mask_tensor = torch.from_numpy(mask).unsqueeze(0)

        if self.transform:
            input_tensor, target_tensor, mask_tensor = self.transform(
                input_tensor, target_tensor, mask_tensor
            )

        return input_tensor, target_tensor, mask_tensor, torch.tensor((left, top))


## Utils Functions

In [5]:
# -----------------------
# Loss and training functions
# -----------------------
def masked_mse_loss(pred, target, mask):
    # 1. Calcule a diferen√ßa (residual)
    diff = (pred - target)

    # 2. **CRITICAL: Eleve a diferen√ßa ao quadrado** (Erro Quadr√°tico)
    squared_error = diff * diff

    # 3. Aplique a m√°scara ao erro quadr√°tico
    masked_squared_error = squared_error * mask

    # 4. Garanta a divis√£o segura (para evitar NaN se o mask.sum() for 0)
    denominator = torch.sum(mask)

    # 5. Retorne a M√©dia do Erro Quadr√°tico
    # Usamos torch.clamp para garantir que o denominador seja sempre >= 1e-8,
    # prevenindo a divis√£o por zero.
    return torch.sum(masked_squared_error) / torch.clamp(denominator, min=1e-8)


def masked_smooth_l1_loss(pred, target, mask):
    criterion = nn.SmoothL1Loss(reduction='none')
    diff = criterion(pred, target)
    return torch.sum(diff * mask) / torch.sum(mask)

import torch
import torch.nn as nn
import torch.nn.functional as F

def combined_structural_loss(pred, target, mask, lambda_grad=0.005):
    """
    Calcula uma Loss Combinada: Smooth L1 Loss (Conte√∫do) + Gradient Loss (Estrutura).

    Par√¢metros:
        pred (Tensor): Predi√ß√£o do modelo (carbono).
        target (Tensor): Alvo de carbono (Ground Truth).
        mask (Tensor): M√°scara de pixels v√°lidos (1) e inv√°lidos (0).
        lambda_grad (float): Peso dado √† Gradient Loss. Comece com 0.1 ou 0.05.

    Retorna:
        Tensor: O valor total da loss combinada.
    """

    # --- 1. LOSS DE CONTE√öDO (Smooth L1) ---
    content_criterion = nn.SmoothL1Loss(reduction='none')
    content_loss_per_pixel = content_criterion(pred, target)

    sum_content_loss = torch.sum(content_loss_per_pixel * mask)
    denominator = torch.sum(mask)

    # Garantia de divis√£o segura para evitar NaN
    content_loss = sum_content_loss / torch.clamp(denominator, min=1e-8)

    # ----------------------------------------------------
    # --- 2. LOSS ESTRUTURAL (Gradient Loss - MSE sobre a Magnitude do Gradiente) ---

    # Define os kernels Sobel (usados para encontrar bordas/gradientes)
    sobel_x = torch.tensor([[[[-1., 0., 1.], [-2., 0., 2.], [-1., 0., 1.]]]],
                           dtype=torch.float32,
                           device=pred.device)
    sobel_y = torch.tensor([[[[-1., -2., -1.], [0., 0., 0.], [1., 2., 1.]]]],
                           dtype=torch.float32,
                           device=pred.device)

    # Calcula o gradiente em X e Y para a predi√ß√£o e o alvo
    grad_x_pred = F.conv2d(pred, sobel_x, padding=1)
    grad_y_pred = F.conv2d(pred, sobel_y, padding=1)

    grad_x_target = F.conv2d(target, sobel_x, padding=1)
    grad_y_target = F.conv2d(target, sobel_y, padding=1)

    # Calcula a magnitude do gradiente (aproxima√ß√£o da intensidade da borda)
    grad_pred_magnitude = torch.sqrt(grad_x_pred**2 + grad_y_pred**2)
    grad_target_magnitude = torch.sqrt(grad_x_target**2 + grad_y_target**2)

    # Calcula o Erro Quadr√°tico M√©dio (MSE) entre as magnitudes dos gradientes
    grad_diff = (grad_pred_magnitude - grad_target_magnitude)
    grad_mse_per_pixel = grad_diff**2

    # Aplica a m√°scara (o MSE entre gradientes deve ser minimizado apenas onde h√° dados v√°lidos)
    sum_grad_loss = torch.sum(grad_mse_per_pixel * mask)
    gradient_loss = sum_grad_loss / torch.clamp(denominator, min=1e-8)

    # --- 3. COMBINA√á√ÉO ---
    total_loss = content_loss + lambda_grad * gradient_loss

    return total_loss

def train_one_epoch(model, loader, optimizer, device):
    model.train()
    total_loss = 0.0
    for inputs, targets, masks, _ in tqdm(loader):
        inputs, targets, masks = inputs.to(device), targets.to(device), masks.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = masked_smooth_l1_loss(outputs, targets, masks)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * inputs.size(0)
    return total_loss / len(loader.dataset)


def validate_one_epoch(model, loader, device):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for inputs, targets, masks, _ in tqdm(loader):
            inputs, targets, masks = inputs.to(device), targets.to(device), masks.to(device)
            outputs = model(inputs)
            loss = masked_smooth_l1_loss(outputs, targets, masks)
            total_loss += loss.item() * inputs.size(0)
    return total_loss / len(loader.dataset)



def figure_to_array(fig):
    """Convert a matplotlib figure to a NumPy RGB array."""
    buf = io.BytesIO()
    fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
    buf.seek(0)
    img = Image.open(buf).convert("RGB")
    return np.array(img)

def visualize_sample(inputs, targets, preds, step, max_samples=3):
    """Save sample patches (input RGB, target, prediction)"""
    os.makedirs("samples", exist_ok=True)
    samples_logged = 0

    for i in range(min(max_samples, inputs.size(0))):
        inp = inputs[i, :3].cpu().numpy().transpose(1, 2, 0)  # RGB bands
        inp = (inp - inp.min()) / (inp.max() - inp.min() + 1e-8)
        tgt = targets[i, 0].cpu().numpy()
        pred = preds[i, 0].cpu().numpy()

        fig, axs = plt.subplots(1, 3, figsize=(9, 3))
        axs[0].imshow(inp)
        axs[0].set_title("Input RGB")
        axs[1].imshow(tgt, cmap="viridis")
        axs[1].set_title("Target")
        axs[2].imshow(pred, cmap="viridis")
        axs[2].set_title("Prediction")
        for ax in axs: ax.axis("off")

        fig.tight_layout()
        path = f"samples/sample_{step}_idx{i}.png"
        plt.savefig(path)
        plt.close(fig)

        img_array = figure_to_array(fig)
        # Log image to TensorBoard
        writer.add_image(f"Samples/Epoch_{step}_Sample_{i}", img_array.transpose(2, 0, 1), global_step=step)

        # Also store the sample in MLflow
        mlflow.log_image(Image.fromarray(img_array), f"samples/Epoch_{step}.png")
        samples_logged += 1

    print(f"Logged {samples_logged} sample images for epoch {step}")

def log_alignment_check(dataset, writer=None, step=0, mlflow_log=True):
    """
    Logs an overlay of input RGB and target mask to TensorBoard and MLflow.
    Helps visually check if patches are spatially aligned.
    """
    idx = np.random.randint(0, len(dataset))
    input_patch, target_patch, _, __ = dataset[idx]

    # Convert tensors if necessary
    if torch.is_tensor(input_patch):
        input_patch = input_patch.cpu().numpy()
    if torch.is_tensor(target_patch):
        target_patch = target_patch.cpu().numpy()

    # Prepare RGB (only first 3 bands)
    rgb = np.clip(input_patch[:3].transpose(1, 2, 0), 0, 1)

    # Handle single-channel target (1, H, W) or (H, W)
    if target_patch.ndim == 3 and target_patch.shape[0] == 1:
        target_patch = target_patch[0]
    elif target_patch.ndim == 3:
        target_patch = target_patch[0]

    # Normalize target for visualization
    tgt_norm = (target_patch - np.nanmin(target_patch)) / (np.nanmax(target_patch) - np.nanmin(target_patch) + 1e-8)

    # Create figure
    fig, ax = plt.subplots(1, 3, figsize=(12, 4))
    ax[0].imshow(rgb)
    ax[0].set_title("Input RGB")
    ax[1].imshow(tgt_norm, cmap="viridis")
    ax[1].set_title("Target")
    ax[2].imshow(rgb)
    ax[2].imshow(tgt_norm, cmap="viridis", alpha=0.4)
    ax[2].set_title("Overlay")
    for a in ax:
        a.axis("off")
    plt.tight_layout()

    # Log to TensorBoard
    if writer:
        writer.add_figure("Alignment_Check", fig, global_step=step)

    # Log to MLflow
    if mlflow_log:
        import mlflow
        buf = io.BytesIO()
        fig.savefig(buf, format="png", bbox_inches="tight")
        buf.seek(0)
        pil_img = Image.open(buf)
        mlflow.log_image(pil_img, artifact_file="alignment_check.png")

    plt.close(fig)




# -----------------------
# Main
# -----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")



Using device: cuda


## Model definitions

In [6]:
MODELS = {
    'unetplus': smp.UnetPlusPlus(
      encoder_name="resnet152",        # backbone
      encoder_weights="imagenet",            # set to "imagenet" if you want pretrained weights
      in_channels=5,                   # your dataset has 5 input channels
      classes=1,
    ),
    'dpt': smp.DPT(
        encoder_name='tu-maxvit_large_tf_512.in21k_ft_in1k',
        in_channels=5,
        classes=1,
    ),
    'segformer': smp.Segformer(
        encoder_name="mit_b5",
        encoder_weights="imagenet",
        in_channels=5,
        classes=1,
    ),
    'unet': smp.Unet(
      encoder_name="resnet152",        # backbone
      encoder_weights="imagenet",            # set to "imagenet" if you want pretrained weights
      in_channels=5,                   # your dataset has 5 input channels
      classes=1,
    ),
}
model = MODELS[SELECTED_MODEL] # Change this to control which model to use
model = model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/850M [00:00<?, ?B/s]



config.json:   0%|          | 0.00/135 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

## Dataset loading in memory

In [7]:
# Dataset and loader
train_dataset = GeoTiffPatchDatasetShuffled(input_file, target_file, split='train')
val_dataset = GeoTiffPatchDatasetShuffled(input_file, target_file, split='val')
test_dataset = GeoTiffPatchDatasetShuffled(input_file, target_file, split='test')

## Experiment setup

In [8]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)


# --- Setup experiment tracking ---
experiment_name = "Carbon_Tracking"
mlflow.set_experiment(experiment_name)

run_name = f"carbon_run_{SELECTED_MODEL}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
log_dir = os.path.join("/content/drive/MyDrive/TCC/runs", run_name)
writer = SummaryWriter(log_dir=log_dir)

  return FileStore(store_uri, store_uri)
2025/11/15 03:21:42 INFO mlflow.tracking.fluent: Experiment with name 'Carbon_Tracking' does not exist. Creating a new experiment.


In [None]:
with mlflow.start_run(run_name=run_name):
    mlflow.log_param("patch_size", patch_size)
    mlflow.log_param("stride", stride)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("learning_rate", 1e-4)
    log_alignment_check(train_dataset, writer, step=0, mlflow_log=True)
    # mlflow.log_param("model", "UNet_5_to_1")


    best_val_loss = float("inf")

    mlflow.log_param("model", SELECTED_MODEL)

    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(1, epochs + 1):
        print(f"Epoch {epoch}/{epochs}")
        train_loss = train_one_epoch(model, train_loader, optimizer, device)
        print(f"Train Loss: {train_loss:.6f}")

        val_loss = validate_one_epoch(model, val_loader, device)
        print(f"Validation Loss: {val_loss:.6f}")

        writer.add_scalar("Loss/train", train_loss, epoch)
        writer.add_scalar("Loss/val", val_loss, epoch)
        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("val_loss", val_loss, step=epoch)

        if epoch % 5 == 0 or epoch == epochs:  # every 5 epochs or last
            model.eval()
            with torch.no_grad():
                val_inputs, val_targets, val_masks, _ = next(iter(val_loader))
                val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
                preds = model(val_inputs)
                visualize_sample(val_inputs, val_targets, preds, epoch)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_path = f"/content/drive/MyDrive/TCC/{SELECTED_MODEL}.pt"
            torch.save(model.state_dict(), best_model_path)
            example = torch.randn(1, 5, patch_size, patch_size).cpu().numpy()
            mlflow.pytorch.log_model(model, name="model",input_example=example)
            mlflow.log_artifact(best_model_path)
            print(f"‚úÖ Saved new best model: val_loss={best_val_loss:.6f}")

    mlflow.log_metric("best_val_loss", best_val_loss)

    print("Training complete.")
    print(f"Best validation loss: {best_val_loss:.6f}")

writer.close()

Epoch 1/50


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 560/560 [03:34<00:00,  2.61it/s]


Train Loss: 221.090565


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 120/120 [00:16<00:00,  7.21it/s]


Validation Loss: 216.618419


  "inputs": [
    [
      [
        [
          .... Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: Input type (double) and bias type (float) should be the same


‚úÖ Saved new best model: val_loss=216.618419
Epoch 2/50


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 560/560 [03:29<00:00,  2.68it/s]


Train Loss: 208.063626


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 120/120 [00:15<00:00,  7.87it/s]


Validation Loss: 202.643257


  "inputs": [
    [
      [
        [
          .... Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: Input type (double) and bias type (float) should be the same


‚úÖ Saved new best model: val_loss=202.643257
Epoch 3/50


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 560/560 [03:28<00:00,  2.68it/s]


Train Loss: 192.837313


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 120/120 [00:15<00:00,  7.84it/s]


Validation Loss: 189.601349


  "inputs": [
    [
      [
        [
          .... Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: Input type (double) and bias type (float) should be the same


‚úÖ Saved new best model: val_loss=189.601349
Epoch 4/50


  1%|          | 5/560 [00:02<03:49,  2.42it/s]

In [None]:
import torch
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from torch.utils.tensorboard import SummaryWriter
import mlflow

def evaluate_model(model, test_loader, device, writer=None, step=None):
    """
    Evaluate a trained model on the test dataset, logging metrics to TensorBoard and MLflow.
    """
    model.eval()
    preds, targets, masks = [], [], []

    with torch.no_grad():
        for batch in test_loader:
            inputs = batch[0].to(device, non_blocking=True)
            y_true = batch[1].to(device, non_blocking=True)
            mask_batch = batch[2].to(device, non_blocking=True)

            outputs = model(inputs)
            preds.append(outputs.detach().cpu().numpy())
            targets.append(y_true.detach().cpu().numpy())
            masks.append(mask_batch.detach().cpu().numpy())

    # Stack all predictions and targets
    preds = np.concatenate(preds, axis=0).flatten()
    targets = np.concatenate(targets, axis=0).flatten()
    masks = np.concatenate(masks, axis=0).flatten()

    valid_indices = masks == 1
    preds_valid = preds[valid_indices]
    targets_valid = targets[valid_indices]

    # Compute metrics
    # targets_valid = np.expm1(targets_valid)
    # preds_valid = np.expm1(preds_valid)

    mse = mean_squared_error(targets_valid, preds_valid)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(targets_valid, preds_valid)
    r2 = r2_score(targets_valid, preds_valid)

    # Log metrics
    print(f"\n--- Test Metrics ---")
    print(f"RMSE: {rmse:.6f}")
    print(f"MAE : {mae:.6f}")
    print(f"R¬≤  : {r2:.6f}")

    # ‚úÖ TensorBoard logging
    if writer is not None:
        writer.add_scalar('Test/RMSE', rmse, step or 0)
        writer.add_scalar('Test/MAE', mae, step or 0)
        writer.add_scalar('Test/R2', r2, step or 0)
        writer.flush()

    # ‚úÖ MLflow logging
    mlflow.log_metric("test_rmse", rmse)
    mlflow.log_metric("test_mae", mae)
    mlflow.log_metric("test_r2", r2)

    return rmse, mae, r2

rmse, mae, r2 = evaluate_model(model, test_loader, device, writer)
print("Evaluation completed.")


In [None]:
mlflow.end_run()

In [None]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params}")

Trainable parameters: 67163153


In [None]:
def predict_geotiff(model, input_file, target_file, output_file, patch_size, stride, batch_size, device, run_name="EvaluationRun"):
    # --- Start MLflow run ---
    with mlflow.start_run(run_name=run_name):
        mlflow.log_param("patch_size", patch_size)
        mlflow.log_param("stride", stride)
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("model_checkpoint", os.path.basename(output_file))

        # Create dataset for inference (no train/val split needed)
        inference_dataset = GeoTiffPatchDatasetShuffled(
            input_file, target_file,
            patch_size=patch_size, stride=stride, split='all' # Use split='all' for inference
        )
        inference_loader = DataLoader(inference_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)


        # Prepare output array
        output_arr = np.zeros((inference_dataset.height, inference_dataset.width), dtype=np.float32)
        count_arr = np.zeros((inference_dataset.height, inference_dataset.width), dtype=np.int32) # To handle overlaps

        model.eval()
        mae_list, rmse_list = [], []

        with torch.no_grad():
            for inputs, targets, masks, coords in tqdm(inference_loader, desc="Predicting patches"):
                inputs, targets = inputs.to(device), targets.to(device)
                preds = model(inputs)

                outputs_np = preds.squeeze(1).cpu().numpy()
                targets_np = targets.squeeze(1).cpu().numpy()

                # Compute metrics for this batch
                # Apply mask before computing metrics
                masked_outputs = outputs_np * masks.squeeze(1).cpu().numpy()
                masked_targets = targets_np * masks.squeeze(1).cpu().numpy()

                mae = np.sum(np.abs(masked_outputs - masked_targets)) / np.sum(masks.cpu().numpy())
                rmse = np.sqrt(np.sum((masked_outputs - masked_targets) ** 2) / np.sum(masks.cpu().numpy()))

                mae_list.append(mae)
                rmse_list.append(rmse)


                # Merge patch predictions into the mosaic
                for i in range(outputs_np.shape[0]):
                    left, top = coords[i][0].item(), coords[i][1].item()
                    output_arr[top : top + patch_size, left : left + patch_size] += outputs_np[i]
                    count_arr[top : top + patch_size, left : left + patch_size] += 1

        # Final averaged map
        output_arr /= np.maximum(count_arr, 1)

        # --- Compute global metrics ---
        mae_global = float(np.mean(mae_list))
        rmse_global = float(np.mean(rmse_list))
        mlflow.log_metric("MAE", mae_global)
        mlflow.log_metric("RMSE", rmse_global)

        print(f"‚úÖ MAE: {mae_global:.4f}, RMSE: {rmse_global:.4f}")

        # --- Save predicted GeoTIFF ---
        output_meta = inference_dataset.input_meta.copy()
        output_meta.update({'count': 1, 'dtype': 'float32'})

        with rasterio.open(output_file, 'w', **output_meta) as dst:
            dst.write(output_arr, 1)

        print(f"Prediction saved to {output_file}")
        mlflow.log_artifact(output_file)

        # --- Visualization ---
        fig, axes = plt.subplots(1, 3, figsize=(15, 5))
        sample_idx = 0
        rgb = inputs[sample_idx, :3].cpu().numpy().transpose(1, 2, 0)
        tgt = targets[sample_idx, 0].cpu().numpy()
        pred = preds[sample_idx, 0].cpu().numpy()

        axes[0].imshow(rgb)
        axes[0].set_title("Input RGB")
        axes[1].imshow(tgt, cmap='viridis')
        axes[1].set_title("Target")
        axes[2].imshow(pred, cmap='viridis')
        axes[2].set_title("Prediction")

        plt.tight_layout()
        fig_path = "evaluation_sample.png"
        plt.savefig(fig_path, dpi=300)
        mlflow.log_artifact(fig_path)
        plt.close(fig)

        print("üßæ Evaluation artifacts logged to MLflow.")

In [None]:
import os
import numpy as np
import rasterio
from rasterio.windows import Window
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import segmentation_models_pytorch as smp

# Main prediction function
# def predict_geotiff(model, input_file, target_file, output_file, patch_size, stride, batch_size, device, name):
#     # Create dataset for inference (no train/val split needed)
#     inference_dataset = GeoTiffPatchDatasetShuffled(input_file, target_file, patch_size=patch_size, stride=stride, split='all')
#     inference_loader = DataLoader(inference_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

#     # Prepare output array
#     output_arr = np.zeros((inference_dataset.height, inference_dataset.width), dtype=np.float32)
#     count_arr = np.zeros((inference_dataset.height, inference_dataset.width), dtype=np.int32) # To handle overlaps

#     model.eval()
#     with torch.no_grad():
#         for inputs, targets, masks, coords in tqdm(inference_loader, desc="Predicting patches"):
#             inputs = inputs.to(device)
#             outputs = model(inputs)

#             # Move predictions to CPU and convert to numpy
#             outputs_np = outputs.squeeze(1).cpu().numpy() # Remove channel dimension

#             # Place predictions into the output array
#             for i in range(outputs_np.shape[0]):
#                 left, top = coords[0][i], coords[1][i] # Access coordinates correctly
#                 output_arr[top : top + patch_size, left : left + patch_size] += outputs_np[i]
#                 count_arr[top : top + patch_size, left : left + patch_size] += 1

#     # Average overlapping predictions
#     output_arr /= np.maximum(count_arr, 1) # Avoid division by zero

#     # Save the output GeoTIFF
#     output_meta = inference_dataset.input_meta.copy()
#     output_meta.update({
#         'count': 1,  # Single band output
#         'dtype': 'float32'
#     })

#     with rasterio.open(output_file, 'w', **output_meta) as dst:
#         dst.write(output_arr, 1)

#     print(f"Prediction saved to {output_file}")


# --- Main Execution ---
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Load the trained model
    # model = smp.DeepLabV3Plus(
    #   encoder_name="resnet101",        # backbone
    #   encoder_weights="imagenet",            # set to "imagenet" if you want pretrained weights
    #   in_channels=5,                   # your dataset has 5 input channels
    #   classes=1,                       # single regression/segmentation output
    # )
    # model = model.to(device)
    model.load_state_dict(torch.load(f'/content/drive/MyDrive/TCC/{SELECTED_MODEL}.pt'))
    print("Model loaded successfully.")

    input_file =  '/content/drive/MyDrive/TCC/data/input2021.tif'

    # Define output file path
    output_file = f'/content/drive/MyDrive/TCC/data/amazonPrediction2021{SELECTED_MODEL}.tif'

    # Run prediction
    predict_geotiff(model, input_file, target_file, output_file, patch_size, stride, batch_size, device,'Biomass_Prediction_Evaluation')

Using device: cuda
Model loaded successfully.


Predicting patches:   3%|‚ñé         | 174/6243 [02:33<1:25:26,  1.18it/s]

In [None]:
!cp -r ./mlruns/* /content/drive/MyDrive/TCC/mlruns/segformer