 # Landing Strip Detection Training Pipeline



 This notebook implements a training pipeline for detecting landing strips using satellite imagery. The pipeline includes:



 - Loading input landing strip data.

 - Creating input areas around the landing strips.

 - Downloading Sentinel-2 imagery from Google Earth Engine.

 - Preparing a dataset for training.

 - Loading the Geo Foundation Model (GFM) for transfer learning.

 - Setting up a training loop with Weights & Biases (wandb) logging.



 **Note**: Ensure that you have authenticated with Google Earth Engine (GEE) using `ee.Authenticate()` and have initialized it with `ee.Initialize()`. Also, make sure `train_utils.py` is in your working directory or Python path.

# *TODO*
* Max value of model outputs can be rather small (in one case, 0.6244). This leads to binary search setting threshold lower, predicting all zeroes
* (buffered_labels.float() == 1).float()
tensor([0., 0., 0.,  ..., 0., 0., 0.])
(buffered_labels.float() == 1).float().mean()
tensor(0.2678) **(!!!)**

 ## 1. Setup and Imports

In [7]:
import sys
import os
import random
import wandb
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import timm  # PyTorch Image Models library
import logging
from pathlib import Path
import re

# If on Google colab, chdir to /content/drive/MyDrive/Secret_Runway_Detection
try:
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    # Copy the 'Secret Runway Detection Challenge' folder to Colab local storage
    !cp -r '/content/drive/MyDrive/Secret Runway Detection Challenge/colab-stuff/' '/content/'
    # Change the current working directory to the notebooks folder in local storage
    os.chdir('/content/colab-stuff/notebooks')
    wandb.login(key=userdata.get('WANDB_API_KEY'))
    USING_COLAB = True
except Exception as e:
    print(e)
    USING_COLAB = False

# Add the src directory to the sys.path
sys.path.append(os.path.abspath('..'))

# Import functions and constants from train_utils
from secret_runway_detection.model import (
    get_multiscale_segmentation_model
)
from secret_runway_detection.dataset import LandingStripDataset, SegmentationTransform
from secret_runway_detection.train_utils import (
    RANDOM_SEED
)

if not USING_COLAB:
    sys.path.append(os.path.abspath('../GFM'))
    from GFM.models import build_model

No module named 'google.colab'




 ## 2. Configuration and Initialization

In [2]:
# %%
# Debug flag: Set to True to run on CPU, False to use GPU if available
# With DEBUG == True, test and train sets are reduced to 10 samples each
import pandas as pd


DEBUG = True

# Device configuration
device = torch.device('cpu') if DEBUG else torch.device(
    'cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds for reproducibility
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

# Set up logging
logging.basicConfig(level=logging.INFO)
# logging.getLogger('secret_runway_detection.train_utils').setLevel(logging.DEBUG)
logging.getLogger('secret_runway_detection.train_utils').setLevel(logging.INFO)

Using device: cpu


In [3]:
config = {
    'training_dataset': 'cross',
    'train_percentage': 0.8,
    'num_epochs': 50 if not DEBUG else 2,
    'batch_size': 32 if USING_COLAB else 4,
    'lr_head': 0.001,
    'lr_backbone': 0.000001,
    'lr_step_size': 10,
    'lr_gamma': 0.3,
    'early_stopping_patience': 3,
}

# Initialize wandb
wandb.init(project='secret-runway-detection',
           mode='online' if not DEBUG else 'dryrun',
           dir='..',
           tags=[config['training_dataset'],
                 'colab' if USING_COLAB else 'local'],
           job_type='train',
           config=config,
           )

if not wandb.run.name:
    wandb.run.name = f"Run from {pd.Timestamp.now()}"

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


 ## 5. Load Data into Dataset

In [4]:
train_dir = Path(
    f'../training_data/training_data_{config["training_dataset"]}')

if USING_COLAB:
    # Unzip the training data which is at f"{train_dir}.zip" using python
    import zipfile
    with zipfile.ZipFile(f"{train_dir}.zip", 'r') as zip_ref:
        zip_ref.extractall("../training_data")

images_dir = train_dir / 'images'
labels_dir = train_dir / 'labels'

# Get all filenames in the images directory
all_filenames = os.listdir(images_dir)

# Initialize dictionaries and lists
strip_to_files = {}        # For files with strip numbers
possibly_empty_files = []  # For 'possibly_empty' files

# Regular expression pattern to match filenames with strip numbers
pattern = re.compile(r'^area_\d+_of_strip_(\d+)\.npy$')

# Process filenames
for filename in all_filenames:
    if 'possibly_empty' in filename:
        # This is a 'possibly_empty' file
        possibly_empty_files.append(filename)
    else:
        # Try to match the pattern to extract strip number
        match = pattern.match(filename)
        if match:
            strip_number = int(match.group(1))
            # Add filename to the list for this strip number
            strip_to_files.setdefault(strip_number, []).append(filename)
        else:
            print(f"Filename does not match expected pattern: {filename}")

# List of all unique strip numbers
strip_numbers = list(strip_to_files.keys())

# Shuffle strip numbers for random splitting
random.seed(RANDOM_SEED)  # Ensure reproducibility
random.shuffle(strip_numbers)

# Calculate split index for strips
num_strips = len(strip_numbers)
split_index = int(num_strips * config['train_percentage'])

# Split strip numbers into train and test sets
train_strip_numbers = strip_numbers[:split_index]
val_strip_numbers = strip_numbers[split_index:]

wandb.config.update({
    'num_strips': num_strips,
    'train_strip_numbers': train_strip_numbers,
    'val_strip_numbers': val_strip_numbers,
})

# Collect filenames for train and test sets based on strip numbers
train_files = []
for strip_num in train_strip_numbers:
    train_files.extend(strip_to_files[strip_num])

val_files = []
for strip_num in val_strip_numbers:
    val_files.extend(strip_to_files[strip_num])

# Now handle the 'possibly_empty' files
# Shuffle the possibly_empty files
random.shuffle(possibly_empty_files)

# Calculate split index for possibly_empty files
num_possibly_empty = len(possibly_empty_files)
split_index_empty = int(num_possibly_empty * config['train_percentage'])

# Split possibly_empty files into train and test sets
train_possibly_empty_files = possibly_empty_files[:split_index_empty]
val_possibly_empty_files = possibly_empty_files[split_index_empty:]

# Add the possibly_empty files to the train and test file lists
train_files.extend(train_possibly_empty_files)
val_files.extend(val_possibly_empty_files)

# Output some information
print(f"Total files: {len(all_filenames)}")
print(f"Total strips: {len(strip_numbers)}")
print(f"Training files: {len(train_files)}")
print(f"Testing files: {len(val_files)}")

# Define your transform if you have one; otherwise, set to None
segmentation_transform = None  # Replace with your actual transform if any

# Create train dataset
train_dataset = LandingStripDataset(
    images_dir=images_dir,
    labels_dir=labels_dir,
    file_list=train_files,
    transform=segmentation_transform
)

# Create test dataset
val_dataset = LandingStripDataset(
    images_dir=images_dir,
    labels_dir=labels_dir,
    file_list=val_files,
    transform=segmentation_transform
)

if DEBUG:
    train_dataset.samples = train_dataset.samples[:10]
    val_dataset.samples = val_dataset.samples[:10]

train_dataloader = DataLoader(
    train_dataset, batch_size=config['batch_size'], shuffle=True)
val_dataloader = DataLoader(
    val_dataset, batch_size=config['batch_size'], shuffle=False)

Total files: 2130
Total strips: 113
Training files: 1700
Testing files: 430


 ## 6. Load the Geo Foundation Model (GFM)

In [5]:
! pip install yacs


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [6]:
from secret_runway_detection.model import get_upernet_segmentation_model, get_simple_segmentation_model


backbone_model_path = '../simmim_pretrain/gfm.pth'
model = get_simple_segmentation_model(backbone_model_path)
# model = get_upernet_segmentation_model(backbone_model_path)

  checkpoint = torch.load(model_path, map_location=device)


Sample keys from filtered_state_dict:
mask_token
patch_embed.proj.weight
patch_embed.proj.bias
patch_embed.norm.weight
patch_embed.norm.bias


RuntimeError: Error(s) in loading state_dict for SwinTransformer:
	size mismatch for layers.0.blocks.0.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 4]) from checkpoint, the shape in current model is torch.Size([169, 4]).
	size mismatch for layers.0.blocks.1.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 4]) from checkpoint, the shape in current model is torch.Size([169, 4]).
	size mismatch for layers.1.downsample.norm.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for layers.1.downsample.norm.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for layers.1.downsample.reduction.weight: copying a param with shape torch.Size([512, 1024]) from checkpoint, the shape in current model is torch.Size([256, 512]).
	size mismatch for layers.1.blocks.0.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 8]) from checkpoint, the shape in current model is torch.Size([169, 8]).
	size mismatch for layers.1.blocks.1.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 8]) from checkpoint, the shape in current model is torch.Size([169, 8]).
	size mismatch for layers.2.downsample.norm.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for layers.2.downsample.norm.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for layers.2.downsample.reduction.weight: copying a param with shape torch.Size([1024, 2048]) from checkpoint, the shape in current model is torch.Size([512, 1024]).
	size mismatch for layers.2.blocks.0.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.1.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.2.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.3.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.4.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.5.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.6.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.7.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.8.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.9.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.10.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.11.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.12.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.13.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.14.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.15.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.16.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.2.blocks.17.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 16]) from checkpoint, the shape in current model is torch.Size([169, 16]).
	size mismatch for layers.3.blocks.0.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 32]) from checkpoint, the shape in current model is torch.Size([169, 32]).
	size mismatch for layers.3.blocks.1.attn.relative_position_bias_table: copying a param with shape torch.Size([121, 32]) from checkpoint, the shape in current model is torch.Size([169, 32]).

In [None]:
for name, module in model.named_children():
    print(f"{name}: {module.__class__.__name__}")

 ## 7. Define Loss Function and Optimizer

In [8]:
for name, param in model.backbone.named_parameters():
    if not param.requires_grad:
        print(f"{name}: requires_grad={param.requires_grad}")

In [None]:
# Separate parameters for different learning rates
backbone_params = []
new_params = []

for name, param in model.named_parameters():
    if 'backbone' in name:
        backbone_params.append(param)
    else:
        new_params.append(param)

print(f"Backbone parameters: {len(backbone_params)}")
print(f"New parameters: {len(new_params)}")
print(f"Total parameters: {len(list(model.parameters()))}")

# Define optimizer with differential learning rates
optimizer = optim.Adam([
    {'params': backbone_params, 'lr': config['lr_backbone']},  # Lower learning rate for pretrained layers
    {'params': new_params, 'lr': config['lr_head']}        # Higher learning rate for new layers
])


In [10]:
# Define loss function and optimizer
# Suitable for binary classification with logits
criterion = nn.BCEWithLogitsLoss()

# Optionally, define a learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(
    optimizer, step_size=config['lr_step_size'], gamma=config['lr_gamma'])

In [None]:
# Create a 'checkpoints' directory within the current directory
os.makedirs('../checkpoints', exist_ok=True)

# Define the model save path within the 'checkpoints' directory
model_save_path = f'../checkpoints/{wandb.run.name}.pth'

wandb.run.name

### Accuracy method

In [12]:
import numpy as np  # Ensure numpy is imported

def compute_validation_accuracy(model, val_dataloader, device):
    """
    Computes the pixel-wise accuracy over the validation set for multiple thresholds.

    Args:
        model: The trained model.
        val_dataloader: DataLoader for the validation set.
        device: The device (CPU or GPU) to perform computations on.

    Returns:
        best_accuracy (float): The highest accuracy achieved across thresholds.
        best_threshold (float): The threshold corresponding to the best accuracy.
    """
    model.eval()
    total_pixels = 0
    best_accuracy = 0.0
    best_threshold = 0.0
    thresholds = np.linspace(0.0, 1.0, 11)  # Thresholds from 0.0 to 1.0, inclusive
    threshold_correct = {threshold: 0 for threshold in thresholds}

    with torch.no_grad():
        for inputs, labels in val_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            outputs = outputs.squeeze(1)  # Adjust dimensions if necessary

            # Apply sigmoid to get probabilities
            probs = torch.sigmoid(outputs)

            # Move tensors to CPU and flatten for numpy operations
            probs_np = probs.cpu().numpy().flatten()
            labels_np = labels.cpu().numpy().flatten()

            total_pixels += labels_np.size

            # Evaluate at multiple thresholds
            for threshold in thresholds:
                preds = (probs_np >= threshold).astype(np.uint8)
                correct = (preds == labels_np).sum()
                threshold_correct[threshold] += correct

    # Compute average accuracy for each threshold
    for threshold in thresholds:
        accuracy = threshold_correct[threshold] / total_pixels
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_threshold = threshold

    return best_accuracy, best_threshold


 ## 8. Training Loop with wandb Logging

In [13]:
bb = model.backbone

In [None]:
bb.feature_channels

In [37]:
img, lab = next(iter(train_dataloader))

In [41]:
features = bb(img)

In [None]:
print("Actual feature map shapes:")
for i, feat in enumerate(features):
    print(f"Feature {i}: shape {feat.shape}")

In [None]:
# Initialize early stopping variables before the training loop
best_val_loss = float('inf')
# Number of epochs to wait before stopping
patience = config['early_stopping_patience']
counter = 0   # Counter for early stopping

# Before the training loop, watch the model
wandb.watch(model, criterion=criterion, log="all", log_freq=10)

for epoch in range(config['num_epochs']):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        outputs = outputs.squeeze(1)  # Adjust dimensions if necessary

        # Compute loss
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Statistics
        running_loss += loss.item()

        # Log every 10 batches or last batch
        if (i + 1) % 10 == 0 or i == len(train_dataloader):
            avg_loss = running_loss / 10
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] Training Loss: {avg_loss:.4f}")

            # Log metrics to wandb
            wandb.log({
                'epoch': epoch + 1,
                'batch': i + 1,
                'training_loss': avg_loss,
                'learning_rate': optimizer.param_groups[0]['lr']
            })

            running_loss = 0.0

    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            outputs = outputs.squeeze(1)  # Adjust dimensions if necessary

            # Compute loss
            loss = criterion(outputs, labels)

            # Accumulate validation loss
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_dataloader)
    print(f"Epoch {epoch + 1} Validation Loss: {avg_val_loss:.4f}")

    # Compute validation accuracy and best threshold
    best_accuracy, best_threshold = compute_validation_accuracy(model, val_dataloader, device)
    print(f"Best Validation Accuracy: {best_accuracy:.4f} at Threshold: {best_threshold:.2f}")

    # Log validation loss and accuracy to wandb
    wandb.log({
        'epoch': epoch + 1,
        'validation_loss': avg_val_loss,
        'validation_accuracy': best_accuracy,
        'best_threshold': best_threshold
    })

    # Early Stopping Check
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        # Save the model
        torch.save(model.state_dict(), model_save_path)
        print(f"Validation loss decreased to {avg_val_loss:.4f}, saving model to {model_save_path}")
    else:
        counter += 1
        print(f"No improvement in validation loss for {counter} epoch(s).")
        if counter >= patience:
            print("Early stopping triggered.")
            # Optionally log early stopping to wandb
            wandb.log({'early_stopping_epoch': epoch + 1})
            break

    # Step the scheduler
    scheduler.step()

print("Training complete.")


## 8.1. Find accuracy-optimizing thresholds

 ## 9. Save the Trained Model

In [None]:
wandb.run.name

In [None]:
# Create a W&B Artifact for the model
artifact = wandb.Artifact('model', type='model')

# Add the saved model file to the artifact
artifact.add_file(model_save_path)

# Log the artifact to W&B
wandb.log_artifact(artifact)

In [None]:
wandb.finish()

In [16]:
if USING_COLAB:
    from google.colab import runtime
    runtime.unassign()