# Model Training: Landcover.ai Dataset

Train semantic segmentation models on the Landcover.ai v1 dataset.

**Dataset**: Landcover.ai v1
- 41 high-resolution GeoTIFFs tiled to 512Ã—512
- 5 classes: Background, Building, Woodland, Water, Road
- Train/Val/Test splits provided

**Models**: DeepLab, ResNet-UNet, SegFormer

**Prerequisites**:
- Run `01_preprocess_landcover.ipynb` first to generate tiles and class weights

## 1. Setup and Configuration

In [1]:
import sys
import os
import json
import numpy as np
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
import torchvision.transforms.v2 as v2
from torchvision import tv_tensors

# ============================================================
# CONFIGURATION - Edit these paths and hyperparameters
# ============================================================

# Paths
DATA_ROOT = Path('../data/landcover.ai.v1')
WEIGHTS_DIR = Path('../weights')
PLOTS_DIR = Path('../plots/landcover')
EXPERIMENTS_DIR = Path('./experiments')

# Ensure directories exist
WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
PLOTS_DIR.mkdir(parents=True, exist_ok=True)
EXPERIMENTS_DIR.mkdir(parents=True, exist_ok=True)

# Training hyperparameters
BATCH_SIZE = 16
NUM_EPOCHS = 50
INIT_LR = 5e-5
WEIGHT_DECAY = 0.01
NUM_WORKERS = 0  # Set >0 for multiprocessing (may cause issues on Windows)

# Model selection: 'deeplab', 'resnet_unet', 'segformer'
MODEL_NAME = 'deeplab'
EXPERIMENT_NAME = f'{MODEL_NAME}_landcover_v1'

# Class definitions
CLASS_NAMES = ['background', 'building', 'woodland', 'water', 'road']
NUM_CLASSES = len(CLASS_NAMES)

# Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Device: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
print(f"\nUsing device: {device}")
print(f"\nData root: {DATA_ROOT.absolute()}")
print(f"Model: {MODEL_NAME}")
print(f"Experiment: {EXPERIMENT_NAME}")

PyTorch: 2.10.0+cu126
CUDA available: True
Device: NVIDIA GeForce RTX 4060
Memory: 8.0 GB

Using device: cuda

Data root: c:\vscode workspace\ml-mangrove\DroneClassification\human_infra\03_model_training\..\data\landcover.ai.v1
Model: deeplab
Experiment: deeplab_landcover_v1


## 2. Data Augmentation

In [2]:
class Rotate90Only(v2.Transform):
    """Random 90-degree rotations (0, 90, 180, 270 degrees)."""
    
    def __init__(self):
        super().__init__()

    def _transform_image(self, img: torch.Tensor, k):
        if k == 0:
            return img
        hdim, wdim = -2, -1
        if k == 1:   # 90 degrees
            return img.transpose(hdim, wdim).flip(wdim)
        elif k == 2: # 180 degrees
            return img.flip(hdim).flip(wdim)
        elif k == 3: # 270 degrees
            return img.transpose(hdim, wdim).flip(hdim)
        return img

    def forward(self, img: torch.Tensor, mask=None):
        k = random.randint(0, 3)
        img = self._transform_image(img, k)
        if mask is not None:
            mask = self._transform_image(mask, k)
            return img, mask
        return img


# Training augmentation pipeline
train_augmentation = v2.Compose([
    v2.ToImage(),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomVerticalFlip(p=0.5),
    v2.RandomResizedCrop(size=512, scale=(0.8, 2.0), ratio=(0.9, 1.1)),
    Rotate90Only(),
])

print("Augmentation pipeline:")
print("  - Random horizontal flip (p=0.5)")
print("  - Random vertical flip (p=0.5)")
print("  - Random resized crop (scale 0.8-2.0)")
print("  - Random 90-degree rotation")

Augmentation pipeline:
  - Random horizontal flip (p=0.5)
  - Random vertical flip (p=0.5)
  - Random resized crop (scale 0.8-2.0)
  - Random 90-degree rotation


## 3. Dataset Class

In [3]:
class LandcoverAIDataset(Dataset):
    """
    Landcover.ai dataset loader for 512x512 tiles.
    
    Args:
        root_dir: Path to dataset root containing 'output' folder
        split_file: Path to text file listing tile names
        augment: Whether to apply augmentation (True for training)
    """
    
    # ImageNet normalization
    MEAN = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    STD = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    
    def __init__(self, root_dir, split_file, augment=False):
        self.output_dir = Path(root_dir) / 'output'
        self.augment = augment
        
        # Load tile names from split file
        with open(split_file, 'r') as f:
            self.tile_names = [line.strip() for line in f if line.strip()]
        
        print(f"Loaded {len(self.tile_names)} tiles from {Path(split_file).name}")
    
    def __len__(self):
        return len(self.tile_names)
    
    def __getitem__(self, idx):
        tile_name = self.tile_names[idx]
        image_path = self.output_dir / f"{tile_name}.jpg"
        mask_path = self.output_dir / f"{tile_name}_m.png"
        
        # Load image
        image = np.array(Image.open(image_path))
        if image.ndim == 3 and image.shape[2] == 4:
            image = image[:, :, :3]  # Drop alpha channel
        
        # Load mask
        mask = np.array(Image.open(mask_path))
        if mask.ndim == 3:
            mask = mask[:, :, 0]
        mask = mask.astype(np.uint8)
        
        # Convert to PIL for transforms
        image_pil = Image.fromarray(image)
        mask_pil = Image.fromarray(mask)
        
        # Apply augmentation or just convert to tensor
        if self.augment:
            image_t, mask_t = train_augmentation(image_pil, mask_pil)
        else:
            image_t = v2.ToImage()(image_pil)
            mask_t = v2.ToImage()(mask_pil)
        
        # Normalize image
        image_t = image_t.float()
        if image_t.max() > 1.5:
            image_t = image_t / 255.0
        image_t = (image_t - self.MEAN) / self.STD
        
        # Process mask
        mask_t = mask_t.long()
        if mask_t.dim() == 3 and mask_t.size(0) == 1:
            mask_t = mask_t.squeeze(0)
        
        return image_t, mask_t


print("LandcoverAIDataset class defined")

LandcoverAIDataset class defined


## 4. Load Datasets

In [4]:
print("=== Loading Datasets ===")
print()

# Verify paths exist
print(f"Dataset root: {DATA_ROOT}")
print(f"  output/ exists: {(DATA_ROOT / 'output').exists()}")
print(f"  train.txt exists: {(DATA_ROOT / 'train.txt').exists()}")
print(f"  val.txt exists: {(DATA_ROOT / 'val.txt').exists()}")
print(f"  test.txt exists: {(DATA_ROOT / 'test.txt').exists()}")
print()

if not (DATA_ROOT / 'output').exists():
    print("ERROR: Output directory not found!")
    print("Please run the preprocessing notebook first to generate tiles.")
else:
    # Load datasets
    train_dataset = LandcoverAIDataset(DATA_ROOT, DATA_ROOT / 'train.txt', augment=True)
    val_dataset = LandcoverAIDataset(DATA_ROOT, DATA_ROOT / 'val.txt', augment=False)
    test_dataset = LandcoverAIDataset(DATA_ROOT, DATA_ROOT / 'test.txt', augment=False)
    
    print(f"\nDataset sizes:")
    print(f"  Train: {len(train_dataset):,} tiles (with augmentation)")
    print(f"  Val:   {len(val_dataset):,} tiles")
    print(f"  Test:  {len(test_dataset):,} tiles")
    
    # Verify sample
    img, mask = train_dataset[0]
    print(f"\nSample shapes: Image {img.shape}, Mask {mask.shape}")

=== Loading Datasets ===

Dataset root: ..\data\landcover.ai.v1
  output/ exists: True
  train.txt exists: True
  val.txt exists: True
  test.txt exists: True

Loaded 7470 tiles from train.txt
Loaded 1602 tiles from val.txt
Loaded 1602 tiles from test.txt

Dataset sizes:
  Train: 7,470 tiles (with augmentation)
  Val:   1,602 tiles
  Test:  1,602 tiles

Sample shapes: Image torch.Size([3, 512, 512]), Mask torch.Size([512, 512])


## 5. Create DataLoaders

In [5]:
print("=== Creating DataLoaders ===")
print()

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

print(f"Batch size: {BATCH_SIZE}")
print(f"Num workers: {NUM_WORKERS}")
print(f"\nBatches per epoch:")
print(f"  Train: {len(train_loader):,}")
print(f"  Val:   {len(val_loader):,}")
print(f"  Test:  {len(test_loader):,}")

=== Creating DataLoaders ===

Batch size: 16
Num workers: 0

Batches per epoch:
  Train: 467
  Val:   101
  Test:  101


## 6. Verify Data

In [6]:
print("=== Verifying Data ===")
print()

batch = next(iter(train_loader))
x, y = batch

print(f"Image batch: {x.shape}, dtype={x.dtype}")
print(f"Mask batch:  {y.shape}, dtype={y.dtype}")
print(f"\nImage range: [{x.min():.3f}, {x.max():.3f}]")
print(f"Mask values: {sorted(torch.unique(y).tolist())}")

# Check for NaN/Inf
has_nan = torch.isnan(x).any()
has_inf = torch.isinf(x).any()

if not has_nan and not has_inf:
    print("\nData quality check passed")
else:
    print(f"\nWARNING: NaN={has_nan}, Inf={has_inf}")

=== Verifying Data ===

Image batch: torch.Size([16, 3, 512, 512]), dtype=torch.float32
Mask batch:  torch.Size([16, 512, 512]), dtype=torch.int64

Image range: [-2.118, 1.872]
Mask values: [0, 1, 2, 3, 4]

Data quality check passed


## 7. Load Class Weights

In [7]:
print("=== Loading Class Weights ===")
print()

weights_file = DATA_ROOT / 'class_weights.json'

if weights_file.exists():
    with open(weights_file) as f:
        weights_dict = json.load(f)
    
    class_frequencies = torch.tensor(weights_dict['class_frequencies'])
    class_weights = torch.tensor(weights_dict['weights_inverse_sqrt']).to(device)
    
    print(f"Loaded from: {weights_file.name}")
    print(f"\nClass frequencies:")
    for i, name in enumerate(CLASS_NAMES):
        print(f"  {name:12s}: {class_frequencies[i]:.4f}")
    print(f"\nClass weights (inverse sqrt):")
    for i, name in enumerate(CLASS_NAMES):
        print(f"  {name:12s}: {class_weights[i]:.4f}")
else:
    # Fallback: use hardcoded weights from paper
    print("class_weights.json not found, using default values")
    class_dist = torch.tensor([0.579, 0.015, 0.331, 0.065, 0.02])
    class_weights = (1.0 / torch.sqrt(class_dist)).to(device)
    
    print(f"\nDefault class weights: {class_weights.tolist()}")

=== Loading Class Weights ===

Loaded from: class_weights.json

Class frequencies:
  background  : 0.5729
  building    : 0.0087
  woodland    : 0.3298
  water       : 0.0718
  road        : 0.0167

Class weights (inverse sqrt):
  background  : 0.2618
  building    : 2.1208
  woodland    : 0.3450
  water       : 0.7393
  road        : 1.5330


## 8. Import Models and Training Utilities

In [9]:
# Add project root to path
sys.path.insert(0, '../../')

from models import DeepLab, ResNet_UNet, SegFormer, JaccardLoss, DiceLoss
from training_utils import TrainingSession

print("Imported:")
print("  Models: DeepLab, ResNet_UNet, SegFormer")
print("  Losses: JaccardLoss, DiceLoss")
print("  Training: TrainingSession")

Imported:
  Models: DeepLab, ResNet_UNet, SegFormer
  Losses: JaccardLoss, DiceLoss
  Training: TrainingSession


## 9. Initialize Model

In [10]:
print(f"=== Initializing Model: {MODEL_NAME} ===")
print()

if MODEL_NAME == 'deeplab':
    model = DeepLab(
        num_classes=NUM_CLASSES,
        input_image_size=512,
        backbone='resnet50',
        output_stride=4
    ).to(device)
elif MODEL_NAME == 'resnet_unet':
    model = ResNet_UNet(
        num_classes=NUM_CLASSES
    ).to(device)
elif MODEL_NAME == 'segformer':
    model = SegFormer(
        num_classes=NUM_CLASSES
    ).to(device)
else:
    raise ValueError(f"Unknown model: {MODEL_NAME}")

num_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Model: {MODEL_NAME}")
print(f"Total parameters: {num_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Device: {device}")

=== Initializing Model: deeplab ===

Model: deeplab
Total parameters: 41,999,962
Trainable parameters: 41,999,962
Device: cuda


## 10. Setup Loss Function

In [11]:
print("=== Setting Up Loss Function ===")
print()

# JaccardLoss combines CE + IoU + Boundary loss
loss_fn = JaccardLoss(
    num_classes=NUM_CLASSES,
    weight=class_weights,
    alpha=0.3,          # Weight for IoU component
    boundary_weight=0.3  # Weight for boundary loss
)

print("Loss: JaccardLoss (CE + IoU + Boundary)")
print(f"  alpha (IoU weight): 0.3")
print(f"  boundary_weight: 0.3")
print(f"  class_weights: {[f'{w:.2f}' for w in class_weights.tolist()]}")

=== Setting Up Loss Function ===

Loss: JaccardLoss (CE + IoU + Boundary)
  alpha (IoU weight): 0.3
  boundary_weight: 0.3
  class_weights: ['0.26', '2.12', '0.35', '0.74', '1.53']


## 11. Setup Optimizer and Scheduler

In [12]:
print("=== Setting Up Optimizer ===")
print()

steps_per_epoch = len(train_loader)
num_training_steps = NUM_EPOCHS * steps_per_epoch

optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=INIT_LR,
    weight_decay=WEIGHT_DECAY,
    betas=(0.9, 0.999)
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=num_training_steps,
    eta_min=0
)

print(f"Optimizer: AdamW")
print(f"  Initial LR: {INIT_LR}")
print(f"  Weight decay: {WEIGHT_DECAY}")
print(f"\nScheduler: CosineAnnealingLR")
print(f"  Total steps: {num_training_steps:,}")
print(f"  Steps per epoch: {steps_per_epoch}")

=== Setting Up Optimizer ===

Optimizer: AdamW
  Initial LR: 5e-05
  Weight decay: 0.01

Scheduler: CosineAnnealingLR
  Total steps: 23,350
  Steps per epoch: 467


## 12. Create Training Session

In [14]:
print("=== Creating Training Session ===")
print()

trainer = TrainingSession(
    model=model,
    trainLoader=train_loader,
    testLoader=val_loader,
    lossFunc=loss_fn,
    init_lr=INIT_LR,
    num_epochs=NUM_EPOCHS,
    experiment_name=EXPERIMENT_NAME,
    optimizer=optimizer,
    class_names=CLASS_NAMES,
    scheduler=scheduler,
)

print(f"Experiment: {EXPERIMENT_NAME}")
print(f"Epochs: {NUM_EPOCHS}")
print(f"Mixed precision: enabled")

=== Creating Training Session ===

Using CUDA device.
Experiment: deeplab_landcover_v1
Epochs: 50
Mixed precision: enabled


## 13. Train Model

In [15]:
print("=== Starting Training ===")
print()
print(f"Model: {MODEL_NAME}")
print(f"Dataset: Landcover.ai ({len(train_dataset):,} training tiles)")
print(f"Epochs: {NUM_EPOCHS}")
print(f"Batch size: {BATCH_SIZE}")
print()

# Start training
trainer.learn()

2026-02-05 17:20:43,527 - INFO - Starting training: 50 epochs
2026-02-05 17:20:43,528 - INFO - Model parameters: 41,999,962


=== Starting Training ===

Model: deeplab
Dataset: Landcover.ai (7,470 training tiles)
Epochs: 50
Batch size: 16



Epoch 1/50:   0%|          | 2/467 [00:50<3:15:42, 25.25s/it, loss=0.8162, lr=0.000050]


KeyboardInterrupt: 

## 14. Evaluate on Test Set

In [16]:
print("=== Evaluating on Test Set ===")
print()

test_metrics = trainer.evaluate(test_loader)

print(f"\nTest Set Results:")
print(f"  Pixel Accuracy: {test_metrics['Pixel_Accuracy']:.4f}")
print(f"  Mean IoU: {test_metrics['IoU']:.4f}")

# Plot per-class IoU
trainer.plot_metrics("Class IoU", metrics_wanted=["class_ious"])

=== Evaluating on Test Set ===



TypeError: unsupported operand type(s) for +=: 'NoneType' and 'NoneType'

## 15. Save Final Model

In [None]:
print("=== Saving Model ===")
print()

# Save to weights directory
model_path = WEIGHTS_DIR / f'{EXPERIMENT_NAME}_final.pth'
torch.save(model.state_dict(), model_path)
print(f"Saved: {model_path}")

# Also save training config
config = {
    'model_name': MODEL_NAME,
    'num_classes': NUM_CLASSES,
    'class_names': CLASS_NAMES,
    'batch_size': BATCH_SIZE,
    'num_epochs': NUM_EPOCHS,
    'init_lr': INIT_LR,
    'test_pixel_accuracy': test_metrics['Pixel_Accuracy'],
    'test_miou': test_metrics['IoU']
}

config_path = WEIGHTS_DIR / f'{EXPERIMENT_NAME}_config.json'
with open(config_path, 'w') as f:
    json.dump(config, f, indent=2)
print(f"Saved: {config_path}")

## 16. Visualize Predictions

In [None]:
def denormalize(img):
    """Reverse ImageNet normalization."""
    mean = torch.tensor([0.485, 0.456, 0.406]).view(-1, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(-1, 1, 1)
    return torch.clamp(img * std + mean, 0, 1)


# Class colors for visualization
CLASS_COLORS = {
    0: [0.8, 0.8, 0.8],  # Background - gray
    1: [1.0, 0.0, 0.0],  # Building - red
    2: [0.0, 0.5, 0.0],  # Woodland - dark green
    3: [0.0, 0.0, 1.0],  # Water - blue
    4: [1.0, 1.0, 0.0],  # Road - yellow
}


def visualize_predictions(model, dataset, indices, save_path=None):
    """Visualize model predictions on sample images."""
    model.eval()
    
    fig, axes = plt.subplots(len(indices), 4, figsize=(16, 4*len(indices)))
    fig.suptitle('Landcover.ai Predictions', fontsize=14, fontweight='bold')
    
    with torch.no_grad():
        for row, idx in enumerate(indices):
            img, mask = dataset[idx]
            
            # Get prediction
            pred = model(img.unsqueeze(0).to(device))
            pred_mask = torch.argmax(pred, dim=1).squeeze().cpu().numpy()
            
            # Prepare for display
            img_np = denormalize(img).numpy().transpose(1, 2, 0)
            mask_np = mask.numpy()
            
            # Image
            axes[row, 0].imshow(img_np)
            axes[row, 0].set_title('Image')
            axes[row, 0].axis('off')
            
            # Ground truth
            axes[row, 1].imshow(mask_np, cmap='tab10', vmin=0, vmax=5)
            axes[row, 1].set_title('Ground Truth')
            axes[row, 1].axis('off')
            
            # Prediction
            axes[row, 2].imshow(pred_mask, cmap='tab10', vmin=0, vmax=5)
            axes[row, 2].set_title('Prediction')
            axes[row, 2].axis('off')
            
            # Overlay
            pred_rgb = np.zeros((*pred_mask.shape, 3))
            for class_id, color in CLASS_COLORS.items():
                pred_rgb[pred_mask == class_id] = color
            overlay = 0.6 * img_np + 0.4 * pred_rgb
            axes[row, 3].imshow(np.clip(overlay, 0, 1))
            axes[row, 3].set_title('Overlay')
            axes[row, 3].axis('off')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=100, bbox_inches='tight')
        print(f"Saved: {save_path}")
    
    plt.show()


# Visualize on test set
test_indices = np.random.choice(len(test_dataset), 4, replace=False).tolist()
print(f"Visualizing test samples: {test_indices}")

visualize_predictions(
    model,
    test_dataset,
    test_indices,
    save_path=PLOTS_DIR / f'{EXPERIMENT_NAME}_predictions.png'
)

## 17. Summary

In [None]:
print("=" * 60)
print("Training Complete")
print("=" * 60)
print()
print(f"Model: {MODEL_NAME}")
print(f"Dataset: Landcover.ai v1")
print(f"Epochs: {NUM_EPOCHS}")
print()
print(f"Test Results:")
print(f"  Pixel Accuracy: {test_metrics['Pixel_Accuracy']:.4f}")
print(f"  Mean IoU: {test_metrics['IoU']:.4f}")
print()
print(f"Saved Files:")
print(f"  Model: {WEIGHTS_DIR / f'{EXPERIMENT_NAME}_final.pth'}")
print(f"  Config: {WEIGHTS_DIR / f'{EXPERIMENT_NAME}_config.json'}")
print(f"  Plots: {PLOTS_DIR}")