 # Landing Strip Detection Training Pipeline



 This notebook implements a training pipeline for detecting landing strips using satellite imagery. The pipeline includes:



 - Loading input landing strip data.

 - Creating input areas around the landing strips.

 - Downloading Sentinel-2 imagery from Google Earth Engine.

 - Preparing a dataset for training.

 - Loading the Geo Foundation Model (GFM) for transfer learning.

 - Setting up a training loop with Weights & Biases (wandb) logging.



 **Note**: Ensure that you have authenticated with Google Earth Engine (GEE) using `ee.Authenticate()` and have initialized it with `ee.Initialize()`. Also, make sure `train_utils.py` is in your working directory or Python path.

# *TODO*
* Max value of model outputs can be rather small (in one case, 0.6244). This leads to binary search setting threshold lower, predicting all zeroes
* (buffered_labels.float() == 1).float()
tensor([0., 0., 0.,  ..., 0., 0., 0.])
(buffered_labels.float() == 1).float().mean()
tensor(0.2678) **(!!!)**

 ## 1. Setup and Imports

In [None]:
import sys
import os
import random
import wandb
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import timm  # PyTorch Image Models library
import logging
from pathlib import Path
import re

# Function to check if running in Colab
def is_colab():
    try:
        import google.colab
        print("Using Colab")
        return True
    except ImportError:
        print("Not using Colab")
        return False

USING_COLAB = is_colab()


if USING_COLAB:
    # For running on colab
    os.chdir('/content/secret-runway-detection/notebooks')
    ! pip install -q yacs
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    wandb.login(key=userdata.get('WANDB_API_KEY'))

if USING_COLAB and not os.path.exists('/content/secret-runway-detection'):
    print("Cloning the secret-runway-detection repository...")
    !git clone https://github.com/emilschmitz/secret-runway-detection.git /content/secret-runway-detection

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../GFM'))

# Import functions and constants from train_utils
from secret_runway_detection.model import (
    get_model,
)
from secret_runway_detection.dataset import LandingStripDataset, SegmentationTransform
from secret_runway_detection.train_utils import (
    RANDOM_SEED,
)
from secret_runway_detection.eval_utils import (
    compute_validation_accuracy,
    compute_baseline_accuracy,
)

from GFM.models import build_model

Not using Colab
/home/emil/Desktop/secret-runway-detection/notebooks


 ## 2. Configuration and Initialization

In [None]:
# %%
# Debug flag: Set to True to run on CPU, False to use GPU if available
# With DEBUG == True, test and train sets are reduced to 10 samples each
import pandas as pd


DEBUG = True

# Device configuration
device = torch.device('cpu') if DEBUG else torch.device(
    'cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds for reproducibility
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

# Set up logging
logging.basicConfig(level=logging.INFO)
# logging.getLogger('secret_runway_detection.train_utils').setLevel(logging.DEBUG)
logging.getLogger('secret_runway_detection.train_utils').setLevel(logging.INFO)

Using device: cpu


In [None]:
config = {
    'training_dataset': 'point',
    'resolution': 192,
    'train_percentage': 0.8,
    'model_type': 'simple',
    'num_epochs': 50 if not DEBUG else 2,
    'batch_size': 32 if USING_COLAB else 4,
    'lr_head': 1e-3,
    'lr_backbone': 1e-6,
    'lr_step_size': 10,
    'lr_gamma': 0.3,
    'early_stopping_patience': 3,
}


# Initialize wandb
wandb.init(project='secret-runway-detection',
           mode='online' if not DEBUG else 'dryrun',
           dir='..',
           tags=[
                config['training_dataset'],
                'colab' if USING_COLAB else 'local',
                'PRETRAINED',
           ],
           job_type='train',
           config=config,
           )

BACKBONE_CFG_PATH = '../configs/gfm_config.yaml'

# Log the config.yaml file as an artifact
artifact = wandb.Artifact('gfm_config', type='backbone_config')
artifact.add_file(BACKBONE_CFG_PATH)

if not wandb.run.name:
    wandb.run.name = f"Run from {pd.Timestamp.now()}"

print(wandb.run.name)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


 ## 5. Load Data into Dataset

In [4]:
root_train_dir = Path(f'../training_data_{config["resolution"]}')
child_train_dir = Path(f'training_data_{config["training_dataset"]}')
train_dir = root_train_dir / child_train_dir

if USING_COLAB:
    os.makedirs(train_dir, exist_ok=True)
    print(f"Created directories: {train_dir} (including parents if they didn't exist)")

    import zipfile

    # Define the path to the training data zip on Google Drive
    drive_train_zip = Path(f'/content/drive/MyDrive/Secret Runway Detection Challenge/training_data_{config["resolution"]}/training_data_{config["training_dataset"]}.zip')

    # Define the destination path where the zip will be copied
    dest_train_zip = Path(f'{train_dir}.zip')

    # Copy the zip file from Drive to the destination
    print(f"Copying training data from {drive_train_zip} to {dest_train_zip}...")
    !cp "{drive_train_zip}" "{dest_train_zip}"
    print(os.listdir(train_dir))

    # Unzip the training data
    with zipfile.ZipFile(dest_train_zip, 'r') as zip_ref:
        zip_ref.extractall(train_dir)
    
    print("Training data setup completed.")

In [5]:

images_dir = train_dir / 'images'
labels_dir = train_dir / 'labels'

# Get all filenames in the images directory
all_filenames = os.listdir(images_dir)

# Initialize dictionaries and lists
strip_to_files = {}        # For files with strip numbers
possibly_empty_files = []  # For 'possibly_empty' files

# Regular expression pattern to match filenames with strip numbers
pattern = re.compile(r'^area_\d+_of_strip_(\d+)\.npy$')

# Process filenames
for filename in all_filenames:
    if 'possibly_empty' in filename:
        # This is a 'possibly_empty' file
        possibly_empty_files.append(filename)
    else:
        # Try to match the pattern to extract strip number
        match = pattern.match(filename)
        if match:
            strip_number = int(match.group(1))
            # Add filename to the list for this strip number
            strip_to_files.setdefault(strip_number, []).append(filename)
        else:
            print(f"Filename does not match expected pattern: {filename}")

# List of all unique strip numbers
strip_numbers = list(strip_to_files.keys())

# Shuffle strip numbers for random splitting
random.seed(RANDOM_SEED)  # Ensure reproducibility
random.shuffle(strip_numbers)

# Calculate split index for strips
num_strips = len(strip_numbers)
split_index = int(num_strips * config['train_percentage'])

# Split strip numbers into train and test sets
train_strip_numbers = strip_numbers[:split_index]
val_strip_numbers = strip_numbers[split_index:]

wandb.config.update({
    'num_strips': num_strips,
    'train_strip_numbers': train_strip_numbers,
    'val_strip_numbers': val_strip_numbers,
})

# Collect filenames for train and test sets based on strip numbers
train_files = []
for strip_num in train_strip_numbers:
    train_files.extend(strip_to_files[strip_num])

val_files = []
for strip_num in val_strip_numbers:
    val_files.extend(strip_to_files[strip_num])

# Now handle the 'possibly_empty' files
# Shuffle the possibly_empty files
random.shuffle(possibly_empty_files)

# Calculate split index for possibly_empty files
num_possibly_empty = len(possibly_empty_files)
split_index_empty = int(num_possibly_empty * config['train_percentage'])

# Split possibly_empty files into train and test sets
train_possibly_empty_files = possibly_empty_files[:split_index_empty]
val_possibly_empty_files = possibly_empty_files[split_index_empty:]

# Add the possibly_empty files to the train and test file lists
train_files.extend(train_possibly_empty_files)
val_files.extend(val_possibly_empty_files)

# Output some information
print(f"Total files: {len(all_filenames)}")
print(f"Total strips: {len(strip_numbers)}")
print(f"Training files: {len(train_files)}")
print(f"Testing files: {len(val_files)}")

# Define your transform if you have one; otherwise, set to None
segmentation_transform = None  # Replace with your actual transform if any

# Create train dataset
train_dataset = LandingStripDataset(
    images_dir=images_dir,
    labels_dir=labels_dir,
    file_list=train_files,
    transform=segmentation_transform
)

# Create test dataset
val_dataset = LandingStripDataset(
    images_dir=images_dir,
    labels_dir=labels_dir,
    file_list=val_files,
    transform=segmentation_transform
)

if DEBUG:
    train_dataset.samples = train_dataset.samples[:10]
    val_dataset.samples = val_dataset.samples[:10]

train_dataloader = DataLoader(
    train_dataset, batch_size=config['batch_size'], shuffle=True)
val_dataloader = DataLoader(
    val_dataset, batch_size=config['batch_size'], shuffle=False)

Total files: 1637
Total strips: 113
Training files: 1305
Testing files: 332


 ## 6. Load the Geo Foundation Model (GFM)

In [6]:
! pip install yacs


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
from secret_runway_detection.model import get_model

if USING_COLAB:
    ! cp -r '/content/drive/MyDrive/Secret Runway Detection Challenge/simmim_pretrain' '../simmim_pretrain'    

backbone_weights_path = '../simmim_pretrain/gfm.pth'

model = get_model(config['model_type'], BACKBONE_CFG_PATH, backbone_weights_path, output_size=config['resolution']).to(device)


=> merge config from ../configs/gfm_config.yaml


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]



Loading pretrained weights...

Missing keys when loading pretrained weights:
  - head.weight
  - head.bias

Unexpected keys when loading pretrained weights:
  - mask_token

Pretrained weights loaded with some missing/unexpected keys.


In [8]:
for name, module in model.named_children():
    print(f"{name}: {module.__class__.__name__}")

backbone: SwinTransformer
segmentation_head: SimpleSegmentationHead


 ## 7. Define Loss Function and Optimizer

In [9]:
for name, param in model.backbone.named_parameters():
    if not param.requires_grad:
        print(f"{name}: requires_grad={param.requires_grad}")

In [10]:
# Separate parameters for different learning rates
backbone_params = []
new_params = []

for name, param in model.named_parameters():
    if 'backbone' in name:
        backbone_params.append(param)
    else:
        new_params.append(param)

print(f"Backbone parameters: {len(backbone_params)}")
print(f"New parameters: {len(new_params)}")
print(f"Total parameters: {len(list(model.parameters()))}")

# Define optimizer with differential learning rates
optimizer = optim.Adam([
    {'params': backbone_params, 'lr': config['lr_backbone']},  # Lower learning rate for pretrained layers
    {'params': new_params, 'lr': config['lr_head']}        # Higher learning rate for new layers
])


Backbone parameters: 329
New parameters: 52
Total parameters: 381


In [11]:
backbone_param_count = sum(p.numel() for p in backbone_params)
new_param_count = sum(p.numel() for p in new_params)
total_param_count = backbone_param_count + new_param_count

wandb.config.update({
    'backbone_param_count': backbone_param_count,
    'new_param_count': new_param_count,
    'total_param_count': total_param_count,
})

print(
    f"Backbone parameters: {backbone_param_count}\n"
    f"New parameters: {new_param_count}\n"
    f"Total parameters: {total_param_count}"
)

Backbone parameters: 87750176
New parameters: 10315177
Total parameters: 98065353


In [12]:
# Define loss function and optimizer
# Suitable for binary classification with logits
criterion = nn.BCEWithLogitsLoss()

# Optionally, define a learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(
    optimizer, step_size=config['lr_step_size'], gamma=config['lr_gamma'])

In [13]:
# Create a 'checkpoints' directory within the current directory
os.makedirs('../checkpoints', exist_ok=True)

# Define the model save path within the 'checkpoints' directory
model_save_path = f'../checkpoints/{wandb.run.name}.pth'

wandb.run.name

'Run from 2024-11-09 09:31:13.330082'

### Baseline acc

In [14]:
# Assuming you have a DataLoader named val_dataloader
baseline_acc = compute_baseline_accuracy(val_dataloader)
print(f"Baseline Accuracy (all zeros): {baseline_acc:.4f}")

# Optionally, log to wandb
wandb.log({'baseline_accuracy': baseline_acc})


Baseline Accuracy (all zeros): 0.9960


 ## 8. Training Loop with wandb Logging

In [15]:
# Initialize early stopping variables before the training loop
best_val_loss = float('inf')
# Number of epochs to wait before stopping
patience = config['early_stopping_patience']
counter = 0   # Counter for early stopping

# Before the training loop, watch the model
wandb.watch(model, criterion=criterion, log="all", log_freq=10)

for epoch in range(config['num_epochs']):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        outputs = outputs.squeeze(1)  # Adjust dimensions if necessary

        # Compute loss
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()

         # Monitor gradients

        optimizer.step()

        # Statistics
        running_loss += loss.item()

        # Log every 10 batches or last batch
        if (i + 1) % 10 == 0 or i == len(train_dataloader):
            avg_loss = running_loss / 10
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] Training Loss: {avg_loss:.4f}")

            # Log metrics to wandb
            wandb.log({
                'epoch': epoch + 1,
                'batch': i + 1,
                'training_loss': avg_loss,
                'learning_rate': optimizer.param_groups[0]['lr']
            })

            running_loss = 0.0

    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            outputs = outputs.squeeze(1)  # Adjust dimensions if necessary

            # Compute loss
            loss = criterion(outputs, labels)

            # Accumulate validation loss
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_dataloader)
    print(f"Epoch {epoch + 1} Validation Loss: {avg_val_loss:.4f}")

    # Compute validation accuracy and best threshold
    best_accuracy, best_threshold = compute_validation_accuracy(model, val_dataloader, device)
    print(f"Best Validation Accuracy: {best_accuracy:.4f} at Threshold: {best_threshold:.2f}")

    # Log validation loss and accuracy to wandb
    wandb.log({
        'epoch': epoch + 1,
        'validation_loss': avg_val_loss,
        'validation_accuracy': best_accuracy,
        'best_threshold': best_threshold
    })

    # Early Stopping Check
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        # Save the model
        torch.save(model.state_dict(), model_save_path)
        print(f"Validation loss decreased to {avg_val_loss:.4f}, saving model to {model_save_path}")
    else:
        counter += 1
        print(f"No improvement in validation loss for {counter} epoch(s).")
        if counter >= patience:
            print("Early stopping triggered.")
            # Optionally log early stopping to wandb
            wandb.log({'early_stopping_epoch': epoch + 1})
            break

    # Step the scheduler
    scheduler.step()

print("Training complete.")


Epoch 1 Validation Loss: 0.6934
Best Validation Accuracy: 0.9960 at Threshold: 0.60
Validation loss decreased to 0.6934, saving model to ../checkpoints/Run from 2024-11-09 09:31:13.330082.pth
Epoch 2 Validation Loss: 0.7138
Best Validation Accuracy: 0.9960 at Threshold: 1.00
No improvement in validation loss for 1 epoch(s).
Training complete.


In [None]:
# Log accuracy improvement over baseline
wandb.log({'accuracy_improvement': best_accuracy - baseline_acc})

 ## 9. Save the Trained Model

In [16]:
wandb.run.name

'Run from 2024-11-09 09:31:13.330082'

In [17]:
# Create a W&B Artifact for the model
artifact = wandb.Artifact('model', type='model')

# Add the saved model file to the artifact
artifact.add_file(model_save_path)

# Log the artifact to W&B
wandb.log_artifact(artifact)

<Artifact model>

In [18]:
wandb.finish()

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
baseline_accuracy,▁
best_threshold,▁█
epoch,▁█
validation_accuracy,▁▁
validation_loss,▁█

0,1
baseline_accuracy,0.99603
best_threshold,1.0
epoch,2.0
validation_accuracy,0.99603
validation_loss,0.71379


In [19]:
if USING_COLAB:
    from google.colab import runtime
    runtime.unassign()