<a href="https://www.kaggle.com/code/sahilpawar9192/building-detections?scriptVersionId=259804225" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# --- Building Dataset Path Verification ---
import os
import glob

# This is the likely base path, but we will confirm.
BASE_PATH = '/kaggle/input/massachusetts-buildings-dataset/massachusetts-buildings-dataset'

print(f"--- Searching for training data in: {BASE_PATH} ---")

try:
    # This dataset uses 'sat' for satellite images and 'map' for masks
    train_sat_path = os.path.join(BASE_PATH, 'train', 'sat')
    train_map_path = os.path.join(BASE_PATH, 'train', 'map')

    train_images = glob.glob(os.path.join(train_sat_path, '*.tif'))
    train_masks = glob.glob(os.path.join(train_map_path, '*.tif'))

    if train_images and train_masks:
        print(f"✅ Success! Found {len(train_images)} training images and {len(train_masks)} masks.")
        print(f"   -> Image path looks correct: {train_sat_path}")
        print(f"   -> Mask path looks correct: {train_map_path}")
    else:
        print("\n❌ Could not find training files. Let's explore the directory.")
        print("Contents of base path:")
        for item in os.listdir(BASE_PATH):
            print(f" -> {item}")

except FileNotFoundError:
    print(f"\n❌ FATAL ERROR: The base path '{BASE_PATH}' is incorrect.")
    print("Please find the correct path in the right-hand 'Data' panel and update the BASE_PATH variable.")

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import mobilenet_v2
from PIL import Image
import numpy as np
import glob
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
# --- Diagnostic Script for Massachusetts Buildings Dataset ---
import os
import glob

# The base path we are trying to use
BASE_PATH = '/kaggle/input/d/balraj98/massachusetts-buildings-dataset/png'

print(f"--- Running Diagnostics on Base Path: {BASE_PATH} ---")

# The specific sub-folders we need to access
train_img_dir = os.path.join(BASE_PATH, 'train')
train_mask_dir = os.path.join(BASE_PATH, 'train_labels')

print(f"\nAttempting to read images from: {train_img_dir}")
print(f"Attempting to read masks from: {train_mask_dir}")

try:
    # Use glob to find all .png files in these directories
    train_images = glob.glob(os.path.join(train_img_dir, '*.png'))
    train_masks = glob.glob(os.path.join(train_mask_dir, '*.png'))

    print("-" * 20)
    print(f"Found {len(train_images)} training images.")
    print(f"Found {len(train_masks)} training masks.")
    print("-" * 20)

    if not train_images or not train_masks:
        print("\n❌ CONCLUSION: No files found. This means the path is incorrect.")
        print(f"Let's check the actual contents of '{BASE_PATH}':")
        
        # List what's really inside the 'png' folder
        contents = os.listdir(BASE_PATH)
        for item in contents:
            print(f" -> {item}")
        print("\nPlease compare the names above ('train', 'train_labels') with what the script is using.")

    else:
        print("\n✅ SUCCESS: Files were found! The paths are correct.")

except FileNotFoundError:
    print(f"\n❌ FATAL ERROR: The base path '{BASE_PATH}' does not exist.")
    print("Please check the 'Data' panel on the right to confirm the dataset's main folder name.")

In [None]:
# --- Configuration with FINAL CORRECTED PATH ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    # The correct path you discovered
    BASE_PATH = '/kaggle/input/d/balraj98/massachusetts-buildings-dataset/png'
    
    IMG_HEIGHT = 256
    IMG_WIDTH = 256
    BATCH_SIZE = 16
    EPOCHS = 40
    LEARNING_RATE = 1e-4
    MODEL_SAVE_PATH = "/kaggle/working/building_segmentation_model.pt"

In [None]:
# --- CORRECTED Dataset Class ---
import cv2 # Make sure cv2 is imported

class BuildingDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_paths = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        self.mask_paths = sorted(glob.glob(os.path.join(mask_dir, '*.png')))
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        mask_path = self.mask_paths[idx]
        
        # Load image (this part was fine)
        image = np.array(Image.open(img_path).convert("RGB"))
        
        # --- FIX: Use OpenCV for robust mask loading and binarization ---
        # 1. Load the mask directly in grayscale
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        # 2. Explicitly threshold the mask: any pixel value above 0 becomes 255
        _, mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
        # 3. Normalize to the required 0.0 or 1.0 float format
        mask = mask.astype(np.float32) / 255.0
        
        # Apply augmentations
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented["image"]
            mask = augmented["mask"]
            
        return image, mask.unsqueeze(0)

In [None]:
# --- Reusing our proven MobileNetV2-U-Net Architecture ---
class MobileNetV2_UNet(nn.Module):
    def __init__(self):
        super(MobileNetV2_UNet, self).__init__()
        self.encoder = mobilenet_v2(weights='IMAGENET1K_V1').features
        self.skip1 = self.encoder[:2]; self.skip2 = self.encoder[2:4]
        self.skip3 = self.encoder[4:7]; self.skip4 = self.encoder[7:14]
        self.bridge = self.encoder[14:18]
        self.up1 = nn.ConvTranspose2d(320, 96, 2, 2)
        self.conv1 = nn.Sequential(nn.Conv2d(192, 96, 3, 1, 1, bias=False), nn.ReLU(inplace=True))
        self.up2 = nn.ConvTranspose2d(96, 32, 2, 2)
        self.conv2 = nn.Sequential(nn.Conv2d(64, 32, 3, 1, 1, bias=False), nn.ReLU(inplace=True))
        self.up3 = nn.ConvTranspose2d(32, 24, 2, 2)
        self.conv3 = nn.Sequential(nn.Conv2d(48, 24, 3, 1, 1, bias=False), nn.ReLU(inplace=True))
        self.up4 = nn.ConvTranspose2d(24, 16, 2, 2)
        self.conv4 = nn.Sequential(nn.Conv2d(32, 16, 3, 1, 1, bias=False), nn.ReLU(inplace=True))
        self.final_up = nn.ConvTranspose2d(16, 16, 2, 2)
        self.final_conv = nn.Conv2d(16, 1, 1)
    def forward(self, x):
        s1 = self.skip1(x); s2 = self.skip2(s1); s3 = self.skip3(s2); s4 = self.skip4(s3)
        bridge = self.bridge(s4)
        x = self.up1(bridge); x = torch.cat([x, s4], 1); x = self.conv1(x)
        x = self.up2(x); x = torch.cat([x, s3], 1); x = self.conv2(x)
        x = self.up3(x); x = torch.cat([x, s2], 1); x = self.conv3(x)
        x = self.up4(x); x = torch.cat([x, s1], 1); x = self.conv4(x)
        x = self.final_up(x)
        return self.final_conv(x)

# --- Advanced Loss Function for Best Accuracy ---
class DiceBCELoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(DiceBCELoss, self).__init__()
    def forward(self, inputs, targets, smooth=1):
        inputs = torch.sigmoid(inputs)       
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        intersection = (inputs * targets).sum()                            
        dice_loss = 1 - (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        BCE = nn.functional.binary_cross_entropy(inputs, targets, reduction='mean')
        return BCE + dice_loss

In [None]:
# --- Training and Validation Functions ---
def train_fn(loader, model, optimizer, loss_fn):
    loop = tqdm(loader, leave=True)
    for data, targets in loop:
        data, targets = data.to(Config.DEVICE), targets.to(Config.DEVICE)
        predictions = model(data)
        loss = loss_fn(predictions, targets)
        optimizer.zero_grad(); loss.backward(); optimizer.step()
        loop.set_postfix(loss=loss.item())

def eval_fn(loader, model, device):
    dice_score = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
            dice_score += (2 * (preds * y).sum()) / ((preds + y).sum() + 1e-8)
    model.train()
    return dice_score / len(loader)

In [None]:
# --- Main Execution ---
print(f"--- Using device: {Config.DEVICE} ---")

# Define paths using the verified structure
train_img_dir = os.path.join(Config.BASE_PATH, 'train')
train_mask_dir = os.path.join(Config.BASE_PATH, 'train_labels')
val_img_dir = os.path.join(Config.BASE_PATH, 'val')
val_mask_dir = os.path.join(Config.BASE_PATH, 'val_labels')

# Augmentations
train_transform = A.Compose([
    A.Resize(height=Config.IMG_HEIGHT, width=Config.IMG_WIDTH), A.Rotate(limit=35),
    A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.ColorJitter(p=0.2),
    A.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0),
    ToTensorV2(),
])
val_transform = A.Compose([
    A.Resize(height=Config.IMG_HEIGHT, width=Config.IMG_WIDTH),
    A.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0),
    ToTensorV2(),
])

# Create Datasets and DataLoaders
train_dataset = BuildingDataset(train_img_dir, train_mask_dir, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True, num_workers=2)
val_dataset = BuildingDataset(val_img_dir, val_mask_dir, transform=val_transform)
val_loader = DataLoader(val_dataset, batch_size=Config.BATCH_SIZE, shuffle=False, num_workers=2)

# Initialize Model, Loss, Optimizer, and Scheduler
model = MobileNetV2_UNet().to(Config.DEVICE)
loss_fn = DiceBCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=Config.LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, 'max', patience=3, factor=0.1, verbose=True)

# --- Start Training ---
print("--- Starting Building Segmentation Model Training ---")
best_dice_score = -1.0
for epoch in range(Config.EPOCHS):
    print(f"Epoch {epoch+1}/{Config.EPOCHS}")
    train_fn(train_loader, model, optimizer, loss_fn)
    
    # Check validation performance
    dice_score = eval_fn(val_loader, model, Config.DEVICE)
    print(f"Validation Dice Score: {dice_score:.4f}")
    
    # Update learning rate
    scheduler.step(dice_score)
    
    # Save the model only if it's the best one so far
    if dice_score > best_dice_score:
        best_dice_score = dice_score
        torch.save(model.state_dict(), Config.MODEL_SAVE_PATH)
        print(f"✅ New best model saved with Dice Score: {best_dice_score:.4f}")

print("\n--- Training Complete ---")
print(f"🏆 Final best model saved to {Config.MODEL_SAVE_PATH} with a Dice Score of {best_dice_score:.4f}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# --- Diagnostic Script to Visualize a Training Batch ---

# We need the BuildingDataset class to be defined.
# If it's not in a cell above this one, you'll need to copy it here.

print("--- Preparing to visualize one batch of data ---")

# We use the validation transform because we want to see the clean data, not augmented.
val_transform = A.Compose([
    A.Resize(height=Config.IMG_HEIGHT, width=Config.IMG_WIDTH),
    A.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0),
    ToTensorV2(),
])

# Use the validation data for a clean look
val_img_dir = os.path.join(Config.BASE_PATH, 'val')
val_mask_dir = os.path.join(Config.BASE_PATH, 'val_labels')
val_dataset = BuildingDataset(val_img_dir, val_mask_dir, transform=val_transform)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=True) # Get a small batch of 4

# --- Fetch and Plot One Batch ---
print("Fetching one batch from the DataLoader...")
try:
    images, masks = next(iter(val_loader))

    fig, axes = plt.subplots(4, 2, figsize=(10, 20))
    fig.suptitle("Data Batch Visualization (Image vs. Mask)", fontsize=16)

    for i in range(4):
        # De-normalize and prepare image for plotting
        image = images[i].permute(1, 2, 0).numpy()
        image = (image * 255.0).astype(np.uint8)
        
        # Prepare mask for plotting
        mask = masks[i].squeeze().numpy()

        axes[i, 0].imshow(image)
        axes[i, 0].set_title(f"Image {i+1}")
        axes[i, 0].axis("off")
        
        axes[i, 1].imshow(mask, cmap='gray')
        axes[i, 1].set_title(f"Mask {i+1}")
        axes[i, 1].axis("off")

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

except Exception as e:
    print(f"\n❌ An error occurred while trying to fetch a batch: {e}")
    print("This likely confirms a problem in the BuildingDataset class.")

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import mobilenet_v2
from PIL import Image
import numpy as np
import glob
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import ReduceLROnPlateau
import cv2

# --- Configuration ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BASE_PATH = '/kaggle/input/d/balraj98/massachusetts-buildings-dataset/png'
    
    IMG_HEIGHT = 256
    IMG_WIDTH = 256
    BATCH_SIZE = 16
    
    # --- TWO-STAGE TRAINING PARAMS ---
    # Stage 1: Train the decoder only
    STAGE_1_EPOCHS = 15
    STAGE_1_LR = 1e-3
    # Stage 2: Fine-tune the whole model
    STAGE_2_EPOCHS = 25
    STAGE_2_LR = 1e-5
    
    MODEL_SAVE_PATH = "/kaggle/working/building_segmentation_model_final.pt"

# --- Corrected Dataset Class ---
class BuildingDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_paths = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        self.mask_paths = sorted(glob.glob(os.path.join(mask_dir, '*.png')))
        self.transform = transform
    def __len__(self): return len(self.image_paths)
    def __getitem__(self, idx):
        img_path, mask_path = self.image_paths[idx], self.mask_paths[idx]
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        _, mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
        mask = mask.astype(np.float32) / 255.0
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented["image"], augmented["mask"]
        return image, mask.unsqueeze(0)

# --- Model Architecture (Unchanged) ---
class MobileNetV2_UNet(nn.Module):
    def __init__(self):
        super(MobileNetV2_UNet, self).__init__()
        self.encoder = mobilenet_v2(weights='IMAGENET1K_V1').features
        self.skip1 = self.encoder[:2]; self.skip2 = self.encoder[2:4]
        self.skip3 = self.encoder[4:7]; self.skip4 = self.encoder[7:14]
        self.bridge = self.encoder[14:18]
        self.up1 = nn.ConvTranspose2d(320, 96, 2, 2)
        self.conv1 = nn.Sequential(nn.Conv2d(192, 96, 3, 1, 1, bias=False), nn.ReLU(inplace=True))
        self.up2 = nn.ConvTranspose2d(96, 32, 2, 2)
        self.conv2 = nn.Sequential(nn.Conv2d(64, 32, 3, 1, 1, bias=False), nn.ReLU(inplace=True))
        self.up3 = nn.ConvTranspose2d(32, 24, 2, 2)
        self.conv3 = nn.Sequential(nn.Conv2d(48, 24, 3, 1, 1, bias=False), nn.ReLU(inplace=True))
        self.up4 = nn.ConvTranspose2d(24, 16, 2, 2)
        self.conv4 = nn.Sequential(nn.Conv2d(32, 16, 3, 1, 1, bias=False), nn.ReLU(inplace=True))
        self.final_up = nn.ConvTranspose2d(16, 16, 2, 2)
        self.final_conv = nn.Conv2d(16, 1, 1)
    def forward(self, x):
        s1 = self.skip1(x); s2 = self.skip2(s1); s3 = self.skip3(s2); s4 = self.skip4(s3)
        bridge = self.bridge(s4)
        x = self.up1(bridge); x = torch.cat([x, s4], 1); x = self.conv1(x)
        x = self.up2(x); x = torch.cat([x, s3], 1); x = self.conv2(x)
        x = self.up3(x); x = torch.cat([x, s2], 1); x = self.conv3(x)
        x = self.up4(x); x = torch.cat([x, s1], 1); x = self.conv4(x)
        x = self.final_up(x)
        return self.final_conv(x)

# --- Loss Function and Training/Eval Functions (Unchanged) ---
class DiceBCELoss(nn.Module):
    def __init__(self): super(DiceBCELoss, self).__init__()
    def forward(self, inputs, targets, smooth=1):
        inputs = torch.sigmoid(inputs)
        inputs, targets = inputs.view(-1), targets.view(-1)
        intersection = (inputs * targets).sum()
        dice_loss = 1 - (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)
        BCE = nn.functional.binary_cross_entropy(inputs, targets, reduction='mean')
        return BCE + dice_loss

def train_fn(loader, model, optimizer, loss_fn):
    loop = tqdm(loader, leave=True)
    for data, targets in loop:
        data, targets = data.to(Config.DEVICE), targets.to(Config.DEVICE)
        predictions = model(data); loss = loss_fn(predictions, targets)
        optimizer.zero_grad(); loss.backward(); optimizer.step()
        loop.set_postfix(loss=loss.item())

def eval_fn(loader, model, device):
    dice_score = 0; model.eval()
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            preds = torch.sigmoid(model(x)); preds = (preds > 0.5).float()
            dice_score += (2 * (preds * y).sum()) / ((preds + y).sum() + 1e-8)
    model.train(); return dice_score / len(loader)

# --- Main Execution ---
print(f"--- Using device: {Config.DEVICE} ---")
train_img_dir = os.path.join(Config.BASE_PATH, 'train'); train_mask_dir = os.path.join(Config.BASE_PATH, 'train_labels')
val_img_dir = os.path.join(Config.BASE_PATH, 'val'); val_mask_dir = os.path.join(Config.BASE_PATH, 'val_labels')
train_transform = A.Compose([A.Resize(Config.IMG_HEIGHT, Config.IMG_WIDTH), A.Rotate(limit=35), A.HorizontalFlip(), A.VerticalFlip(), A.Normalize(mean=0, std=1), ToTensorV2()])
val_transform = A.Compose([A.Resize(Config.IMG_HEIGHT, Config.IMG_WIDTH), A.Normalize(mean=0, std=1), ToTensorV2()])
train_dataset = BuildingDataset(train_img_dir, train_mask_dir, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True, num_workers=2)
val_dataset = BuildingDataset(val_img_dir, val_mask_dir, transform=val_transform)
val_loader = DataLoader(val_dataset, batch_size=Config.BATCH_SIZE, shuffle=False, num_workers=2)
model = MobileNetV2_UNet().to(Config.DEVICE); loss_fn = DiceBCELoss()

# --- STAGE 1: TRAIN DECODER ---
print("--- Stage 1: Training the Decoder ---")
# Freeze the encoder layers
for param in model.encoder.parameters():
    param.requires_grad = False
optimizer = torch.optim.Adam(model.parameters(), lr=Config.STAGE_1_LR)
for epoch in range(Config.STAGE_1_EPOCHS):
    print(f"Stage 1 - Epoch {epoch+1}/{Config.STAGE_1_EPOCHS}")
    train_fn(train_loader, model, optimizer, loss_fn)

# --- STAGE 2: FINE-TUNE ENTIRE MODEL ---
print("\n--- Stage 2: Fine-Tuning the Entire Model ---")
# Unfreeze all layers
for param in model.encoder.parameters():
    param.requires_grad = True
optimizer = torch.optim.Adam(model.parameters(), lr=Config.STAGE_2_LR)
scheduler = ReduceLROnPlateau(optimizer, 'max', patience=3, factor=0.1, verbose=True)
best_dice_score = -1.0
for epoch in range(Config.STAGE_2_EPOCHS):
    print(f"Stage 2 - Epoch {epoch+1}/{Config.STAGE_2_EPOCHS}")
    train_fn(train_loader, model, optimizer, loss_fn)
    dice_score = eval_fn(val_loader, model, Config.DEVICE)
    print(f"Validation Dice Score: {dice_score:.4f}")
    scheduler.step(dice_score)
    if dice_score > best_dice_score:
        best_dice_score = dice_score
        torch.save(model.state_dict(), Config.MODEL_SAVE_PATH)
        print(f"✅ New best model saved with Dice Score: {best_dice_score:.4f}")

print("\n--- Training Complete ---")
print(f"🏆 Final best model saved to {Config.MODEL_SAVE_PATH} with a Dice Score of {best_dice_score:.4f}")

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet34
from PIL import Image
import numpy as np
import glob
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import ReduceLROnPlateau
import cv2

# --- Configuration ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BASE_PATH = '/kaggle/input/d/balraj98/massachusetts-buildings-dataset/png'
    IMG_HEIGHT, IMG_WIDTH = 256, 256
    BATCH_SIZE = 16
    STAGE_1_EPOCHS = 10 # Fewer epochs needed for the powerful ResNet
    STAGE_1_LR = 1e-3
    STAGE_2_EPOCHS = 30
    STAGE_2_LR = 1e-5
    MODEL_SAVE_PATH = "/kaggle/working/resnet34_unet_buildings.pt"

# --- Corrected Dataset Class (Unchanged) ---
class BuildingDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_paths = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        self.mask_paths = sorted(glob.glob(os.path.join(mask_dir, '*.png')))
        self.transform = transform
    def __len__(self): return len(self.image_paths)
    def __getitem__(self, idx):
        img_path, mask_path = self.image_paths[idx], self.mask_paths[idx]
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        _, mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
        mask = mask.astype(np.float32) / 255.0
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented["image"], augmented["mask"]
        return image, mask.unsqueeze(0)

# --- NEW: ResNet34 U-Net Architecture ---
class ResNet_UNet(nn.Module):
    def __init__(self, n_classes=1):
        super().__init__()
        # Encoder
        base_model = resnet34(weights='IMAGENET1K_V1')
        self.base_layers = list(base_model.children())
        self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x/2, y/2)
        self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x/4, y/4)
        self.layer2 = self.base_layers[5] # size=(N, 128, x/8, y/8)
        self.layer3 = self.base_layers[6] # size=(N, 256, x/16, y/16)
        self.layer4 = self.base_layers[7] # size=(N, 512, x/32, y/32)

        # Decoder
        self.up1 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.conv1 = nn.Sequential(nn.Conv2d(512, 256, 3, 1, 1), nn.ReLU(inplace=True))
        self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.conv2 = nn.Sequential(nn.Conv2d(256, 128, 3, 1, 1), nn.ReLU(inplace=True))
        self.up3 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.conv3 = nn.Sequential(nn.Conv2d(128, 64, 3, 1, 1), nn.ReLU(inplace=True))
        self.up4 = nn.ConvTranspose2d(64, 64, kernel_size=2, stride=2)
        self.conv4 = nn.Sequential(nn.Conv2d(128, 64, 3, 1, 1), nn.ReLU(inplace=True))
        
        self.final_up = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
        self.final_conv = nn.Conv2d(32, n_classes, kernel_size=1)

    def forward(self, x):
        l0 = self.layer0(x)
        l1 = self.layer1(l0)
        l2 = self.layer2(l1)
        l3 = self.layer3(l2)
        l4 = self.layer4(l3)
        
        x = self.up1(l4); x = torch.cat([x, l3], dim=1); x = self.conv1(x)
        x = self.up2(x); x = torch.cat([x, l2], dim=1); x = self.conv2(x)
        x = self.up3(x); x = torch.cat([x, l1], dim=1); x = self.conv3(x)
        x = self.up4(x); x = torch.cat([x, l0], dim=1); x = self.conv4(x)
        
        x = self.final_up(x)
        return self.final_conv(x)

# --- Loss Function and Training/Eval Functions (Unchanged) ---
class DiceBCELoss(nn.Module):
    def __init__(self): super().__init__()
    def forward(self, i, t, s=1): i=torch.sigmoid(i);i,t=i.view(-1),t.view(-1);n=(i*t).sum();l=1-(2.*n+s)/(i.sum()+t.sum()+s);b=nn.functional.binary_cross_entropy(i,t,reduction='mean');return b+l
def train_fn(ldr, mdl, opt, l_fn):
    loop=tqdm(ldr,leave=True)
    for d, t in loop: d,t=d.to(Config.DEVICE),t.to(Config.DEVICE);p=mdl(d);l=l_fn(p,t);opt.zero_grad();l.backward();opt.step();loop.set_postfix(loss=l.item())
def eval_fn(ldr, mdl, dev):
    ds=0;mdl.eval()
    with torch.no_grad():
        for x,y in ldr: x,y=x.to(dev),y.to(dev);p=torch.sigmoid(mdl(x));p=(p>0.5).float();ds+=(2*(p*y).sum())/((p+y).sum()+1e-8)
    mdl.train();return ds/len(ldr)

# --- Main Execution ---
print(f"--- Using device: {Config.DEVICE} ---")
train_img_dir=os.path.join(Config.BASE_PATH,'train');train_mask_dir=os.path.join(Config.BASE_PATH,'train_labels')
val_img_dir=os.path.join(Config.BASE_PATH,'val');val_mask_dir=os.path.join(Config.BASE_PATH,'val_labels')
train_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Rotate(35),A.HorizontalFlip(),A.VerticalFlip(p=0.1),A.Normalize(0,1),ToTensorV2()])
val_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Normalize(0,1),ToTensorV2()])
train_dataset=BuildingDataset(train_img_dir,train_mask_dir,transform=train_transform);val_dataset=BuildingDataset(val_img_dir,val_mask_dir,transform=val_transform)
train_loader=DataLoader(train_dataset,Config.BATCH_SIZE,shuffle=True,num_workers=2);val_loader=DataLoader(val_dataset,Config.BATCH_SIZE,shuffle=False,num_workers=2)
model=ResNet_UNet().to(Config.DEVICE);loss_fn=DiceBCELoss()

# --- STAGE 1: TRAIN DECODER ---
print("--- Stage 1: Training the Decoder ---")
for param in model.base_layers: param.requires_grad=False
optimizer=torch.optim.Adam(model.parameters(),lr=Config.STAGE_1_LR)
for epoch in range(Config.STAGE_1_EPOCHS): print(f"S1 - E{epoch+1}");train_fn(train_loader,model,optimizer,loss_fn)

# --- STAGE 2: FINE-TUNE ENTIRE MODEL ---
print("\n--- Stage 2: Fine-Tuning the Entire Model ---")
for param in model.base_layers: param.requires_grad=True
optimizer=torch.optim.Adam(model.parameters(),lr=Config.STAGE_2_LR)
scheduler=ReduceLROnPlateau(optimizer,'max',patience=3,factor=0.1,verbose=True)
best_dice_score=-1.0
for epoch in range(Config.STAGE_2_EPOCHS):
    print(f"S2 - E{epoch+1}");train_fn(train_loader,model,optimizer,loss_fn)
    dice_score=eval_fn(val_loader,model,Config.DEVICE)
    print(f"Val Dice Score: {dice_score:.4f}")
    scheduler.step(dice_score)
    if dice_score > best_dice_score: best_dice_score=dice_score;torch.save(model.state_dict(),Config.MODEL_SAVE_PATH);print(f"✅ New best model saved: {best_dice_score:.4f}")

print(f"\n🏆 Final best model saved: {best_dice_score:.4f}")

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import glob
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import ReduceLROnPlateau
import cv2

# --- Configuration ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BASE_PATH = '/kaggle/input/d/balraj98/massachusetts-buildings-dataset/png'
    IMG_HEIGHT, IMG_WIDTH = 256, 256
    BATCH_SIZE = 8 # HRNet uses more memory, so we reduce the batch size
    EPOCHS = 40
    LEARNING_RATE = 1e-4
    MODEL_SAVE_PATH = "/kaggle/working/hrnet_buildings_final.pt"

# --- Dataset Class (Unchanged) ---
class BuildingDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_paths = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        self.mask_paths = sorted(glob.glob(os.path.join(mask_dir, '*.png')))
        self.transform = transform
    def __len__(self): return len(self.image_paths)
    def __getitem__(self, idx):
        img_path, mask_path = self.image_paths[idx], self.mask_paths[idx]
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        _, mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
        mask = mask.astype(np.float32) / 255.0
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented["image"], augmented["mask"]
        return image, mask.unsqueeze(0)

# --- NEW: HRNet Architecture ---
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, inplanes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = nn.Sequential(nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes)) if stride != 1 or inplanes != planes else None

    def forward(self, x):
        identity = x
        out = self.conv1(x); out = self.bn1(out); out = self.relu(out)
        out = self.conv2(out); out = self.bn2(out)
        if self.downsample is not None: identity = self.downsample(x)
        out += identity; out = self.relu(out)
        return out

class HRNet(nn.Module):
    def __init__(self, n_classes=1):
        super(HRNet, self).__init__()
        # Stem
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        # Stage 1
        self.layer1 = self._make_layer(BasicBlock, 64, 64, 4)
        
        # Transition 1
        self.transition1_1 = nn.Sequential(nn.Conv2d(64, 32, 3, 1, 1, bias=False), nn.BatchNorm2d(32), nn.ReLU(True))
        self.transition1_2 = nn.Sequential(nn.Conv2d(64, 64, 3, 2, 1, bias=False), nn.BatchNorm2d(64), nn.ReLU(True))

        # Stage 2
        self.stage2_1 = nn.Sequential(BasicBlock(32, 32), BasicBlock(32, 32))
        self.stage2_2 = nn.Sequential(BasicBlock(64, 64), BasicBlock(64, 64))

        # Fusion 1
        self.fuse1_1 = nn.Sequential(nn.Conv2d(64, 32, 1, 1, 0, bias=False), nn.BatchNorm2d(32), nn.ReLU(True), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True))
        
        # Final Layers
        self.final_conv = nn.Conv2d(32, 32, kernel_size=1)
        self.out = nn.Conv2d(32, n_classes, kernel_size=1)

    def _make_layer(self, block, inplanes, planes, blocks, stride=1):
        layers = [block(inplanes, planes, stride)]
        for _ in range(1, blocks): layers.append(block(planes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x); x = self.bn1(x); x = self.relu(x)
        x = self.conv2(x); x = self.bn2(x); x = self.relu(x)
        x = self.layer1(x)
        
        x1 = self.transition1_1(x)
        x2 = self.transition1_2(x)

        x1 = self.stage2_1(x1)
        x2 = self.stage2_2(x2)

        x2_fused = self.fuse1_1(x2)
        x = x1 + x2_fused
        
        x = nn.functional.interpolate(x, scale_factor=4, mode='bilinear', align_corners=True)
        x = self.final_conv(x)
        return self.out(x)

# --- Loss Function and Training/Eval Functions (Unchanged) ---
class DiceBCELoss(nn.Module):
    def __init__(self): super().__init__()
    def forward(self, i, t, s=1): i=torch.sigmoid(i);i,t=i.view(-1),t.view(-1);n=(i*t).sum();l=1-(2.*n+s)/(i.sum()+t.sum()+s);b=nn.functional.binary_cross_entropy(i,t,reduction='mean');return b+l
def train_fn(ldr, mdl, opt, l_fn):
    loop=tqdm(ldr,leave=True)
    for d, t in loop: d,t=d.to(Config.DEVICE),t.to(Config.DEVICE);p=mdl(d);l=l_fn(p,t);opt.zero_grad();l.backward();opt.step();loop.set_postfix(loss=l.item())
def eval_fn(ldr, mdl, dev):
    ds=0;mdl.eval()
    with torch.no_grad():
        for x,y in ldr: x,y=x.to(dev),y.to(dev);p=torch.sigmoid(mdl(x));p=(p>0.5).float();ds+=(2*(p*y).sum())/((p+y).sum()+1e-8)
    mdl.train();return ds/len(ldr)

# --- Main Execution ---
print(f"--- Using device: {Config.DEVICE} ---")
train_img_dir=os.path.join(Config.BASE_PATH,'train');train_mask_dir=os.path.join(Config.BASE_PATH,'train_labels')
val_img_dir=os.path.join(Config.BASE_PATH,'val');val_mask_dir=os.path.join(Config.BASE_PATH,'val_labels')
train_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Rotate(35),A.HorizontalFlip(),A.VerticalFlip(p=0.1),A.Normalize(0,1),ToTensorV2()])
val_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Normalize(0,1),ToTensorV2()])
train_dataset=BuildingDataset(train_img_dir,train_mask_dir,transform=train_transform);val_dataset=BuildingDataset(val_img_dir,val_mask_dir,transform=val_transform)
train_loader=DataLoader(train_dataset,Config.BATCH_SIZE,shuffle=True,num_workers=2);val_loader=DataLoader(val_dataset,Config.BATCH_SIZE,shuffle=False,num_workers=2)
model=HRNet().to(Config.DEVICE);loss_fn=DiceBCELoss()
optimizer=torch.optim.Adam(model.parameters(),lr=Config.LEARNING_RATE)
scheduler=ReduceLROnPlateau(optimizer,'max',patience=3,factor=0.1,verbose=True)
best_dice_score=-1.0
for epoch in range(Config.EPOCHS):
    print(f"Epoch {epoch+1}/{Config.EPOCHS}");train_fn(train_loader,model,optimizer,loss_fn)
    dice_score=eval_fn(val_loader,model,Config.DEVICE)
    print(f"Val Dice Score: {dice_score:.4f}")
    scheduler.step(dice_score)
    if dice_score > best_dice_score: best_dice_score=dice_score;torch.save(model.state_dict(),Config.MODEL_SAVE_PATH);print(f"✅ New best model saved: {best_dice_score:.4f}")

print(f"\n🏆 Final best model saved: {best_dice_score:.4f}")

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet34
from PIL import Image
import numpy as np
import glob
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

# --- Configuration ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BASE_PATH = '/kaggle/input/inria-aerial-image-labeling-dataset/AerialImageDataset'
    IMG_HEIGHT, IMG_WIDTH = 256, 256
    BATCH_SIZE = 16
    STAGE_1_EPOCHS = 10
    STAGE_1_LR = 1e-3
    STAGE_2_EPOCHS = 30
    STAGE_2_LR = 1e-5
    MODEL_SAVE_PATH = "/kaggle/working/inria_resnet34_unet_final.pt"

# --- Dataset Class for Inria ---
class InriaDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform
    def __len__(self): return len(self.image_paths)
    def __getitem__(self, idx):
        img_path, mask_path = self.image_paths[idx], self.mask_paths[idx]
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))
        mask = (mask > 0).astype(np.float32)
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented["image"], augmented["mask"]
        return image, mask.unsqueeze(0)

# --- ResNet34 U-Net Architecture ---
class ResNet_UNet(nn.Module):
    def __init__(self, n_classes=1):
        super().__init__()
        base_model=resnet34(weights='IMAGENET1K_V1');self.base_layers=list(base_model.children())
        self.layer0=nn.Sequential(*self.base_layers[:3]);self.layer1=nn.Sequential(*self.base_layers[3:5])
        self.layer2,self.layer3,self.layer4=self.base_layers[5],self.base_layers[6],self.base_layers[7]
        self.up1=nn.ConvTranspose2d(512,256,2,2);self.conv1=nn.Sequential(nn.Conv2d(512,256,3,1,1),nn.ReLU(True))
        self.up2=nn.ConvTranspose2d(256,128,2,2);self.conv2=nn.Sequential(nn.Conv2d(256,128,3,1,1),nn.ReLU(True))
        self.up3=nn.ConvTranspose2d(128,64,2,2);self.conv3=nn.Sequential(nn.Conv2d(128,64,3,1,1),nn.ReLU(True))
        self.up4=nn.ConvTranspose2d(64,64,2,2);self.conv4=nn.Sequential(nn.Conv2d(128,64,3,1,1),nn.ReLU(True))
        self.final_up=nn.ConvTranspose2d(64,32,2,2);self.final_conv=nn.Conv2d(32,n_classes,1)
    def forward(self,x):
        l0=self.layer0(x);l1=self.layer1(l0);l2=self.layer2(l1);l3=self.layer3(l2);l4=self.layer4(l3)
        u1=self.up1(l4);c1=torch.cat([u1,l3],1);e1=self.conv1(c1)
        u2=self.up2(e1);c2=torch.cat([u2,l2],1);e2=self.conv2(c2)
        u3=self.up3(e2);c3=torch.cat([u3,l1],1);e3=self.conv3(c3)
        u4=self.up4(e3);c4=torch.cat([u4,l0],1);e4=self.conv4(c4)
        final=self.final_up(e4);return self.final_conv(final)

# --- Loss, Train, Eval Functions ---
class DiceBCELoss(nn.Module):
    def __init__(self):super().__init__()
    def forward(self,i,t,s=1):i=torch.sigmoid(i);i,t=i.view(-1),t.view(-1);n=(i*t).sum();l=1-(2.*n+s)/(i.sum()+t.sum()+s);b=nn.functional.binary_cross_entropy(i,t,reduction='mean');return b+l
def train_fn(ldr,mdl,opt,l_fn):
    loop=tqdm(ldr,leave=True)
    for d,t in loop:d,t=d.to(Config.DEVICE),t.to(Config.DEVICE);p=mdl(d);l=l_fn(p,t);opt.zero_grad();l.backward();opt.step();loop.set_postfix(loss=l.item())
def eval_fn(ldr,mdl,dev):
    ds=0;mdl.eval()
    with torch.no_grad():
        for x,y in ldr:x,y=x.to(dev),y.to(dev);p=torch.sigmoid(mdl(x));p=(p>0.5).float();ds+=(2*(p*y).sum())/((p+y).sum()+1e-8)
    mdl.train();return ds/len(ldr)

# --- Main Execution ---
print(f"--- Using device: {Config.DEVICE} ---")

# --- NEW: Split the training data into train/val sets ---
train_img_dir=os.path.join(Config.BASE_PATH,'train/images')
train_mask_dir=os.path.join(Config.BASE_PATH,'train/gt')
all_train_images = sorted(glob.glob(os.path.join(train_img_dir, '*.tif')))
all_train_masks = sorted(glob.glob(os.path.join(train_mask_dir, '*.tif')))

# Use sklearn to create a robust split
train_images, val_images, train_masks, val_masks = train_test_split(
    all_train_images, all_train_masks, test_size=0.2, random_state=42
)
print(f"Data split: {len(train_images)} training images, {len(val_images)} validation images.")


train_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Rotate(35),A.HorizontalFlip(),A.Normalize(0,1),ToTensorV2()])
val_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Normalize(0,1),ToTensorV2()])
train_dataset=InriaDataset(train_images,train_masks,transform=train_transform) # Use split data
val_dataset=InriaDataset(val_images,val_masks,transform=val_transform) # Use split data
train_loader=DataLoader(train_dataset,Config.BATCH_SIZE,shuffle=True,num_workers=2)
val_loader=DataLoader(val_dataset,Config.BATCH_SIZE,shuffle=False,num_workers=2)

model=ResNet_UNet().to(Config.DEVICE);loss_fn=DiceBCELoss()

# --- STAGE 1 ---
print("--- Stage 1: Training the Decoder ---")
for p in model.base_layers:p.requires_grad=False
optimizer=torch.optim.Adam(model.parameters(),lr=Config.STAGE_1_LR)
for epoch in range(Config.STAGE_1_EPOCHS):print(f"S1 - E{epoch+1}");train_fn(train_loader,model,optimizer,loss_fn)

# --- STAGE 2 ---
print("\n--- Stage 2: Fine-Tuning the Entire Model ---")
for p in model.base_layers:p.requires_grad=True
optimizer=torch.optim.Adam(model.parameters(),lr=Config.STAGE_2_LR)
scheduler=ReduceLROnPlateau(optimizer,'max',patience=3,factor=0.1,verbose=True)
best_dice_score=-1.0
for epoch in range(Config.STAGE_2_EPOCHS):
    print(f"S2 - E{epoch+1}");train_fn(train_loader,model,optimizer,loss_fn)
    dice_score=eval_fn(val_loader,model,Config.DEVICE)
    print(f"Val Dice Score: {dice_score:.4f}")
    scheduler.step(dice_score)
    if dice_score>best_dice_score:best_dice_score=dice_score;torch.save(model.state_dict(),Config.MODEL_SAVE_PATH);print(f"✅ New best model saved: {best_dice_score:.4f}")

print(f"\n🏆 Final best model saved: {best_dice_score:.4f}")

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet34
from PIL import Image
import numpy as np
import glob
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

# --- FINAL TUNED Configuration ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BASE_PATH = '/kaggle/input/inria-aerial-image-labeling-dataset/AerialImageDataset'
    IMG_HEIGHT, IMG_WIDTH = 256, 256
    BATCH_SIZE = 16
    
    # --- TUNED PARAMETERS ---
    # We will train for much longer to allow the model to fully converge
    STAGE_1_EPOCHS = 15
    STAGE_1_LR = 5e-4 # Slightly lower initial LR for the decoder
    STAGE_2_EPOCHS = 70 # Significantly more epochs for deep fine-tuning
    STAGE_2_LR = 1e-4 # Higher fine-tuning LR to encourage more learning
    
    MODEL_SAVE_PATH = "/kaggle/working/inria_resnet34_unet_final_best.pt"

# --- Dataset Class (Unchanged) ---
class InriaDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform
    def __len__(self): return len(self.image_paths)
    def __getitem__(self, idx):
        img_path, mask_path = self.image_paths[idx], self.mask_paths[idx]
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))
        mask = (mask > 0).astype(np.float32)
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented["image"], augmented["mask"]
        return image, mask.unsqueeze(0)

# --- ResNet34 U-Net Architecture (Unchanged) ---
class ResNet_UNet(nn.Module):
    def __init__(self, n_classes=1):
        super().__init__()
        base_model=resnet34(weights='IMAGENET1K_V1');self.base_layers=list(base_model.children())
        self.layer0=nn.Sequential(*self.base_layers[:3]);self.layer1=nn.Sequential(*self.base_layers[3:5])
        self.layer2,self.layer3,self.layer4=self.base_layers[5],self.base_layers[6],self.base_layers[7]
        self.up1=nn.ConvTranspose2d(512,256,2,2);self.conv1=nn.Sequential(nn.Conv2d(512,256,3,1,1),nn.ReLU(True))
        self.up2=nn.ConvTranspose2d(256,128,2,2);self.conv2=nn.Sequential(nn.Conv2d(256,128,3,1,1),nn.ReLU(True))
        self.up3=nn.ConvTranspose2d(128,64,2,2);self.conv3=nn.Sequential(nn.Conv2d(128,64,3,1,1),nn.ReLU(True))
        self.up4=nn.ConvTranspose2d(64,64,2,2);self.conv4=nn.Sequential(nn.Conv2d(128,64,3,1,1),nn.ReLU(True))
        self.final_up=nn.ConvTranspose2d(64,32,2,2);self.final_conv=nn.Conv2d(32,n_classes,1)
    def forward(self,x):
        l0=self.layer0(x);l1=self.layer1(l0);l2=self.layer2(l1);l3=self.layer3(l2);l4=self.layer4(l3)
        u1=self.up1(l4);c1=torch.cat([u1,l3],1);e1=self.conv1(c1)
        u2=self.up2(e1);c2=torch.cat([u2,l2],1);e2=self.conv2(c2)
        u3=self.up3(e2);c3=torch.cat([u3,l1],1);e3=self.conv3(c3)
        u4=self.up4(e3);c4=torch.cat([u4,l0],1);e4=self.conv4(c4)
        final=self.final_up(e4);return self.final_conv(final)

# --- Loss, Train, Eval Functions (Unchanged) ---
class DiceBCELoss(nn.Module):
    def __init__(self):super().__init__()
    def forward(self,i,t,s=1):i=torch.sigmoid(i);i,t=i.view(-1),t.view(-1);n=(i*t).sum();l=1-(2.*n+s)/(i.sum()+t.sum()+s);b=nn.functional.binary_cross_entropy(i,t,reduction='mean');return b+l
def train_fn(ldr,mdl,opt,l_fn):
    loop=tqdm(ldr,leave=True)
    for d,t in loop:d,t=d.to(Config.DEVICE),t.to(Config.DEVICE);p=mdl(d);l=l_fn(p,t);opt.zero_grad();l.backward();opt.step();loop.set_postfix(loss=l.item())
def eval_fn(ldr,mdl,dev):
    ds=0;mdl.eval()
    with torch.no_grad():
        for x,y in ldr:x,y=x.to(dev),y.to(dev);p=torch.sigmoid(mdl(x));p=(p>0.5).float();ds+=(2*(p*y).sum())/((p+y).sum()+1e-8)
    mdl.train();return ds/len(ldr)

# --- Main Execution ---
print(f"--- Using device: {Config.DEVICE} ---")

train_img_dir=os.path.join(Config.BASE_PATH,'train/images');train_mask_dir=os.path.join(Config.BASE_PATH,'train/gt')
all_train_images=sorted(glob.glob(os.path.join(train_img_dir,'*.tif')));all_train_masks=sorted(glob.glob(os.path.join(train_mask_dir,'*.tif')))
train_images,val_images,train_masks,val_masks=train_test_split(all_train_images,all_train_masks,test_size=0.2,random_state=42)
print(f"Data split: {len(train_images)} training, {len(val_images)} validation.")

# --- Richer Augmentations ---
train_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Rotate(35),A.HorizontalFlip(),A.VerticalFlip(p=0.1),A.ColorJitter(p=0.2),A.Normalize(0,1),ToTensorV2()])
val_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Normalize(0,1),ToTensorV2()])
train_dataset=InriaDataset(train_images,train_masks,transform=train_transform)
val_dataset=InriaDataset(val_images,val_masks,transform=val_transform)
train_loader=DataLoader(train_dataset,Config.BATCH_SIZE,shuffle=True,num_workers=2)
val_loader=DataLoader(val_dataset,Config.BATCH_SIZE,shuffle=False,num_workers=2)

model=ResNet_UNet().to(Config.DEVICE);loss_fn=DiceBCELoss()

# --- STAGE 1 ---
print("--- Stage 1: Training the Decoder ---")
for p in model.base_layers:p.requires_grad=False
optimizer=torch.optim.Adam(model.parameters(),lr=Config.STAGE_1_LR)
for epoch in range(Config.STAGE_1_EPOCHS):print(f"S1 - E{epoch+1}");train_fn(train_loader,model,optimizer,loss_fn)

# --- STAGE 2 ---
print("\n--- Stage 2: Fine-Tuning the Entire Model ---")
for p in model.base_layers:p.requires_grad=True
optimizer=torch.optim.Adam(model.parameters(),lr=Config.STAGE_2_LR)
scheduler=ReduceLROnPlateau(optimizer,'max',patience=5,factor=0.2,verbose=True) # More patient scheduler
best_dice_score=-1.0
for epoch in range(Config.STAGE_2_EPOCHS):
    print(f"S2 - E{epoch+1}");train_fn(train_loader,model,optimizer,loss_fn)
    dice_score=eval_fn(val_loader,model,Config.DEVICE)
    print(f"Val Dice Score: {dice_score:.4f}")
    scheduler.step(dice_score)
    if dice_score>best_dice_score:best_dice_score=dice_score;torch.save(model.state_dict(),Config.MODEL_SAVE_PATH);print(f"✅ New best model saved: {best_dice_score:.4f}")

print(f"\n🏆 Final best model saved: {best_dice_score:.4f}")

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet34
from PIL import Image
import numpy as np
import glob
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

# --- FINAL TUNED Configuration ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BASE_PATH = '/kaggle/input/inria-aerial-image-labeling-dataset/AerialImageDataset'
    IMG_HEIGHT, IMG_WIDTH = 256, 256
    BATCH_SIZE = 16
    
    # --- TUNED PARAMETERS FOR MAX ACCURACY ---
    STAGE_1_EPOCHS = 15
    STAGE_1_LR = 3e-4 
    STAGE_2_EPOCHS = 60 # A long fine-tuning stage
    STAGE_2_LR = 2e-5 # A very small learning rate for careful fine-tuning
    
    MODEL_SAVE_PATH = "/kaggle/working/inria_resnet34_unet_BEST.pt"

# --- Dataset Class (Unchanged) ---
class InriaDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths, self.mask_paths, self.transform = image_paths, mask_paths, transform
    def __len__(self): return len(self.image_paths)
    def __getitem__(self, idx):
        img_path, mask_path = self.image_paths[idx], self.mask_paths[idx]
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))
        mask = (mask > 0).astype(np.float32)
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented["image"], augmented["mask"]
        return image, mask.unsqueeze(0)

# --- ResNet34 U-Net Architecture (Unchanged) ---
class ResNet_UNet(nn.Module):
    def __init__(self, n_classes=1):
        super().__init__()
        base_model=resnet34(weights='IMAGENET1K_V1');self.base_layers=list(base_model.children())
        self.layer0=nn.Sequential(*self.base_layers[:3]);self.layer1=nn.Sequential(*self.base_layers[3:5])
        self.layer2,self.layer3,self.layer4=self.base_layers[5],self.base_layers[6],self.base_layers[7]
        self.up1=nn.ConvTranspose2d(512,256,2,2);self.conv1=nn.Sequential(nn.Conv2d(512,256,3,1,1),nn.ReLU(True))
        self.up2=nn.ConvTranspose2d(256,128,2,2);self.conv2=nn.Sequential(nn.Conv2d(256,128,3,1,1),nn.ReLU(True))
        self.up3=nn.ConvTranspose2d(128,64,2,2);self.conv3=nn.Sequential(nn.Conv2d(128,64,3,1,1),nn.ReLU(True))
        self.up4=nn.ConvTranspose2d(64,64,2,2);self.conv4=nn.Sequential(nn.Conv2d(128,64,3,1,1),nn.ReLU(True))
        self.final_up=nn.ConvTranspose2d(64,32,2,2);self.final_conv=nn.Conv2d(32,n_classes,1)
    def forward(self,x):
        l0=self.layer0(x);l1=self.layer1(l0);l2=self.layer2(l1);l3=self.layer3(l2);l4=self.layer4(l3)
        u1=self.up1(l4);c1=torch.cat([u1,l3],1);e1=self.conv1(c1)
        u2=self.up2(e1);c2=torch.cat([u2,l2],1);e2=self.conv2(c2)
        u3=self.up3(e2);c3=torch.cat([u3,l1],1);e3=self.conv3(c3)
        u4=self.up4(e3);c4=torch.cat([u4,l0],1);e4=self.conv4(c4)
        final=self.final_up(e4);return self.final_conv(final)

# --- Loss, Train, Eval Functions (Unchanged) ---
class DiceBCELoss(nn.Module):
    def __init__(self):super().__init__()
    def forward(self,i,t,s=1):i=torch.sigmoid(i);i,t=i.view(-1),t.view(-1);n=(i*t).sum();l=1-(2.*n+s)/(i.sum()+t.sum()+s);b=nn.functional.binary_cross_entropy(i,t,reduction='mean');return b+l
def train_fn(ldr,mdl,opt,l_fn):
    loop=tqdm(ldr,leave=True)
    for d,t in loop:d,t=d.to(Config.DEVICE),t.to(Config.DEVICE);p=mdl(d);l=l_fn(p,t);opt.zero_grad();l.backward();opt.step();loop.set_postfix(loss=l.item())
def eval_fn(ldr,mdl,dev):
    ds=0;mdl.eval()
    with torch.no_grad():
        for x,y in ldr:x,y=x.to(dev),y.to(dev);p=torch.sigmoid(mdl(x));p=(p>0.5).float();ds+=(2*(p*y).sum())/((p+y).sum()+1e-8)
    mdl.train();return ds/len(ldr)

# --- Main Execution ---
print(f"--- Using device: {Config.DEVICE} ---")
train_img_dir=os.path.join(Config.BASE_PATH,'train/images');train_mask_dir=os.path.join(Config.BASE_PATH,'train/gt')
all_train_images=sorted(glob.glob(os.path.join(train_img_dir,'*.tif')));all_train_masks=sorted(glob.glob(os.path.join(train_mask_dir,'*.tif')))
train_images,val_images,train_masks,val_masks=train_test_split(all_train_images,all_train_masks,test_size=0.2,random_state=42)
print(f"Data split: {len(train_images)} training, {len(val_images)} validation.")
train_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Rotate(35),A.HorizontalFlip(),A.Normalize(0,1),ToTensorV2()])
val_transform=A.Compose([A.Resize(Config.IMG_HEIGHT,Config.IMG_WIDTH),A.Normalize(0,1),ToTensorV2()])
train_dataset=InriaDataset(train_images,train_masks,transform=train_transform);val_dataset=InriaDataset(val_images,val_masks,transform=val_transform)
train_loader=DataLoader(train_dataset,Config.BATCH_SIZE,shuffle=True,num_workers=2);val_loader=DataLoader(val_dataset,Config.BATCH_SIZE,shuffle=False,num_workers=2)

model=ResNet_UNet().to(Config.DEVICE);loss_fn=DiceBCELoss()

# --- STAGE 1 ---
print("--- Stage 1: Training the Decoder ---")
for p in model.base_layers:p.requires_grad=False
optimizer=torch.optim.AdamW(model.parameters(),lr=Config.STAGE_1_LR) # --- TUNED ---
for epoch in range(Config.STAGE_1_EPOCHS):print(f"S1 - E{epoch+1}");train_fn(train_loader,model,optimizer,loss_fn)

# --- STAGE 2 ---
print("\n--- Stage 2: Fine-Tuning the Entire Model ---")
for p in model.base_layers:p.requires_grad=True
optimizer=torch.optim.AdamW(model.parameters(),lr=Config.STAGE_2_LR) # --- TUNED ---
scheduler=ReduceLROnPlateau(optimizer,'max',patience=3,factor=0.5,verbose=True) # --- TUNED ---
best_dice_score=-1.0
for epoch in range(Config.STAGE_2_EPOCHS):
    print(f"S2 - E{epoch+1}");train_fn(train_loader,model,optimizer,loss_fn)
    dice_score=eval_fn(val_loader,model,Config.DEVICE)
    print(f"Val Dice Score: {dice_score:.4f}")
    scheduler.step(dice_score)
    if dice_score>best_dice_score:best_dice_score=dice_score;torch.save(model.state_dict(),Config.MODEL_SAVE_PATH);print(f"✅ New best model saved: {best_dice_score:.4f}")

print(f"\n🏆 Final best model saved: {best_dice_score:.4f}")

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet34
from PIL import Image
import numpy as np
import glob
import os
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split
import optuna # The new library for hyperparameter optimization

# --- Configuration ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BASE_PATH = '/kaggle/input/inria-aerial-image-labeling-dataset/AerialImageDataset'
    IMG_HEIGHT, IMG_WIDTH = 256, 256
    BATCH_SIZE = 16 # We will keep this fixed for now
    EPOCHS = 20 # Each trial will run for a shorter time to allow for more trials
    MODEL_SAVE_PATH = "/kaggle/working/inria_resnet34_unet_OPTIMIZED.pt"

# --- Dataset and Model Classes (Unchanged) ---
# --- CORRECTED InriaDataset Class ---
class InriaDataset(Dataset):
    # This __init__ line is the only change needed.
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path, mask_path = self.image_paths[idx], self.mask_paths[idx]
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))
        mask = (mask > 0).astype(np.float32)
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented["image"], augmented["mask"]
        return image, mask.unsqueeze(0)
class ResNet_UNet(nn.Module):
    def __init__(self, n_classes=1):
        super().__init__()
        base_model=resnet34(weights='IMAGENET1K_V1');self.base_layers=list(base_model.children())
        self.layer0=nn.Sequential(*self.base_layers[:3]);self.layer1=nn.Sequential(*self.base_layers[3:5])
        self.layer2,self.layer3,self.layer4=self.base_layers[5],self.base_layers[6],self.base_layers[7]
        self.up1=nn.ConvTranspose2d(512,256,2,2);self.conv1=nn.Sequential(nn.Conv2d(512,256,3,1,1),nn.ReLU(True))
        self.up2=nn.ConvTranspose2d(256,128,2,2);self.conv2=nn.Sequential(nn.Conv2d(256,128,3,1,1),nn.ReLU(True))
        self.up3=nn.ConvTranspose2d(128,64,2,2);self.conv3=nn.Sequential(nn.Conv2d(128,64,3,1,1),nn.ReLU(True))
        self.up4=nn.ConvTranspose2d(64,64,2,2);self.conv4=nn.Sequential(nn.Conv2d(128,64,3,1,1),nn.ReLU(True))
        self.final_up=nn.ConvTranspose2d(64,32,2,2);self.final_conv=nn.Conv2d(32,n_classes,1)
    def forward(self,x):
        l0=self.layer0(x);l1=self.layer1(l0);l2=self.layer2(l1);l3=self.layer3(l2);l4=self.layer4(l3)
        u1=self.up1(l4);c1=torch.cat([u1,l3],1);e1=self.conv1(c1);u2=self.up2(e1);c2=torch.cat([u2,l2],1);e2=self.conv2(c2)
        u3=self.up3(e2);c3=torch.cat([u3,l1],1);e3=self.conv3(c3);u4=self.up4(e3);c4=torch.cat([u4,l0],1);e4=self.conv4(c4)
        final=self.final_up(e4);return self.final_conv(final)
class DiceBCELoss(nn.Module):
    def __init__(self):super().__init__()
    def forward(self,i,t,s=1):i=torch.sigmoid(i);i,t=i.view(-1),t.view(-1);n=(i*t).sum();l=1-(2.*n+s)/(i.sum()+t.sum()+s);b=nn.functional.binary_cross_entropy(i,t,reduction='mean');return b+l
def eval_fn(ldr,mdl,dev):
    ds=0;mdl.eval()
    with torch.no_grad():
        for x,y in ldr:x,y=x.to(dev),y.to(dev);p=torch.sigmoid(mdl(x));p=(p>0.5).float();ds+=(2*(p*y).sum())/((p+y).sum()+1e-8)
    mdl.train();return ds/len(ldr)

# --- NEW: The Optuna Objective Function ---
# This function defines ONE training experiment. Optuna will call this many times.
def objective(trial):
    # --- 1. Suggest Hyperparameters ---
    # Optuna will pick values for these from the ranges we define.
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "AdamW"])
    
    # --- 2. Create Model and Optimizer with Suggested Params ---
    model = ResNet_UNet().to(Config.DEVICE)
    optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)
    loss_fn = DiceBCELoss()

    # --- 3. Run the Training Loop ---
    # We will use the same data loaders for every trial
    for epoch in range(Config.EPOCHS):
        # We don't need the progress bar for the automatic search
        for data, targets in train_loader:
            data, targets = data.to(Config.DEVICE), targets.to(Config.DEVICE)
            predictions = model(data)
            loss = loss_fn(predictions, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        # --- 4. Report the performance to Optuna ---
        # Optuna uses this score to decide which params are better
        dice_score = eval_fn(val_loader, model, Config.DEVICE)
        trial.report(dice_score, epoch)
        
        # Pruning: Stop unpromising trials early
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    # --- 5. Return the Final Score ---
    # The function must return the metric we want to maximize
    return dice_score

# --- Main Execution ---
# Prepare data once
train_img_dir = os.path.join(Config.BASE_PATH, 'train/images')
train_mask_dir = os.path.join(Config.BASE_PATH, 'train/gt')
all_train_images = sorted(glob.glob(os.path.join(train_img_dir, '*.tif')))
all_train_masks = sorted(glob.glob(os.path.join(train_mask_dir, '*.tif')))
train_images, val_images, train_masks, val_masks = train_test_split(all_train_images, all_train_masks, test_size=0.2, random_state=42)
train_transform = A.Compose([A.Resize(Config.IMG_HEIGHT, Config.IMG_WIDTH), A.Rotate(35), A.HorizontalFlip(), A.Normalize(0, 1), ToTensorV2()])
val_transform = A.Compose([A.Resize(Config.IMG_HEIGHT, Config.IMG_WIDTH), A.Normalize(0, 1), ToTensorV2()])
train_dataset = InriaDataset(train_images, train_masks, transform=train_transform)
val_dataset = InriaDataset(val_images, val_masks, transform=val_transform)
train_loader = DataLoader(train_dataset, Config.BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, Config.BATCH_SIZE, shuffle=False, num_workers=2)

# --- Start the Optuna Study ---
# This will run 20 different experiments to find the best settings.
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)

# --- Print the Best Results ---
print("\n--- Optimization Complete ---")
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value (Dice Score): ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

# --- Train the Final Model with the BEST settings ---
print("\n--- Training the FINAL model with the best hyperparameters ---")
best_params = study.best_params
final_model = ResNet_UNet().to(Config.DEVICE)
final_optimizer = getattr(torch.optim, best_params["optimizer"])(final_model.parameters(), lr=best_params["lr"])
final_loss_fn = DiceBCELoss()

for epoch in range(50): # Train for longer on the final run
    print(f"Final Training - Epoch {epoch+1}/50")
    train_fn(train_loader, final_model, final_optimizer, final_loss_fn)

# Save the final, best model
torch.save(final_model.state_dict(), Config.MODEL_SAVE_PATH)
print(f"\n🏆 Final BEST model saved to {Config.MODEL_SAVE_PATH}")