In [None]:
import torch
import tqdm
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.encoders import get_preprocessing_fn
from segmentation_models_pytorch.utils.meter import AverageValueMeter
from torch.utils.data import DataLoader, random_split
from utils.image_loading import load_training_images, load_groundtruth_images, load_test_images
from custom_datasets import Sat_Mask_Dataset_UPP_preprocessed
import albumentations as album
from albumentations.pytorch import ToTensorV2
import torch.nn as nn
from torcheval.metrics.functional import multiclass_f1_score

In [None]:
# Check if GPU is available
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

## Extract the training data:

In [None]:
# Preprocessing function
preprocess_input = get_preprocessing_fn('efficientnet-b5', pretrained='imagenet')
preprocess_input =  album.Compose([album.Lambda(image=preprocess_input), ToTensorV2()])

In [None]:
# Load original data
original_data = {}
original_data["images"] = load_training_images()
original_data["masks"] = load_groundtruth_images()

In [None]:
# Load custom data for specified cities
city_names = ["boston", "nyc", "philadelphia", "austin"]
custom_data = {"images": [], "masks": []}
for name in city_names:
    custom_data["images"].extend(load_training_images(name))
    custom_data["masks"].extend(load_groundtruth_images(name))

In [None]:
# Create training datasets
custom_data_set = Sat_Mask_Dataset_UPP_preprocessed(custom_data["images"], custom_data["masks"], min_street_ratio=0.03, max_street_ratio=1.0, upp_preprocess=preprocess_input)
original_data_set = Sat_Mask_Dataset_UPP_preprocessed(original_data["images"], original_data["masks"],  upp_preprocess=preprocess_input)
print("After cleanup, the dataset now contains", len(custom_data_set), "images")

### Print Images in different styles:
- Normal Satellite Image
- Corresponding Mask
- Preprocessed Satellite Image
- Preprocessed Mask

In [None]:
import numpy as np
import torchvision.transforms.functional as TF
import matplotlib.pyplot as plt

print(original_data["images"][0].shape)
plt.imshow(original_data["images"][0])
plt.show()
print(original_data["masks"][0].shape)
plt.imshow(original_data["masks"][0])
plt.show()
item = original_data_set.__getitem__(0)
img_normal = np.swapaxes(item[0], 0, 2)
img_mask = np.swapaxes(item[1], 0, 2)
plt.imshow(img_normal)
plt.show()
plt.imshow(img_mask.squeeze(0))
plt.show()

### Split custom dataset into training and validation sets:

In [None]:
train_size = int(0.8 * len(custom_data_set))
valid_size = len(custom_data_set) - train_size
train_dataset, valid_dataset = random_split(custom_data_set, [train_size, valid_size])

batch_size = 5
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False)

## Define the models that are used (ONLY RUN MODEL THAT SHOULD BE USED):

Please note that each model needs to be run separately. Hence one needs to run the notebook 4 times, each time with the correct model version selected. 

In [None]:
model = smp.Unet(
    encoder_name="resnet34",
    encoder_depth=5,
    encoder_weights="imagenet",
    decoder_use_batchnorm=True,
    decoder_channels=(1024,512,256,64,16),
    decoder_attention_type=None,
    in_channels=3,
    classes=1,
    activation=None,
    aux_params=None
).to(device)

model_name = "unet_resnet34"

In [None]:
model = smp.Unet(
    encoder_name="efficientnet-b5",
    encoder_depth=5,
    encoder_weights="imagenet",
    decoder_use_batchnorm=True,
    decoder_channels=(1024,512,256,64,16),
    decoder_attention_type=None,
    in_channels=3,
    classes=1,
    activation=None,
    aux_params=None
).to(device)

model_name = "unet_efficientnet-b5"

In [None]:
model = smp.UnetPlusPlus(
    encoder_name="resnet34",
    encoder_depth=5,
    encoder_weights="imagenet",
    decoder_use_batchnorm=True,
    decoder_channels=(1024,512,256,64,16),
    decoder_attention_type=None,
    in_channels=3,
    classes=1,
    activation=None,
    aux_params=None
).to(device)

model_name = "unetplusplus_resnet34"

In [None]:
model = smp.UnetPlusPlus(
    encoder_name="efficientnet-b5",
    encoder_depth=5,
    encoder_weights="imagenet",
    decoder_use_batchnorm=True,
    decoder_channels=(1024,512,256,64,16),
    decoder_attention_type=None,
    in_channels=3,
    classes=1,
    activation=None,
    aux_params=None
).to(device)

model_name = "unetplusplus_efficientnet-b5"

#### Show some facts about the model in use:

In [None]:
print("Model loaded")
# how many trainable parameters does the model have?
print("Trainable parameters",sum(p.numel() for p in model.parameters() if p.requires_grad))
# how many total parameters does the model have?
print("Total parameters",sum(p.numel() for p in model.parameters()))

# percentage of trainable parameters
print("Percentage of trainable parameters: ")
print(sum(p.numel() for p in model.parameters() if p.requires_grad) / sum(p.numel() for p in model.parameters()))

## Define loss functions:

In [None]:
class_weights = torch.tensor([1./0.13]).to(device)
bce_loss = nn.BCEWithLogitsLoss(weight=class_weights)
soft_bce_loss = smp.losses.SoftBCEWithLogitsLoss(weight=class_weights)

def dice_loss(logits,masks, smooth=1e-6):
    probs = torch.sigmoid(logits)
    probs_flat = probs.reshape(-1)
    masks_flat = masks.reshape(-1)
    
    intersection = (probs_flat * masks_flat).sum()
    union = probs_flat.sum() + masks_flat.sum()
    dice_coeff = (2.0 * intersection + smooth) / (union + smooth)
    return 1.0 - dice_coeff

def combined_loss_correct_dice(logits, masks, smooth=1e-6):
    # reshape the mask and predictions for the bce loss: 
    batch_size = logits.shape[0]
    logits_sq = logits.reshape((batch_size, 416*416))
    mask_sq = masks.reshape((batch_size, 416*416))
    
    return dice_loss(logits, masks, smooth=smooth) + bce_loss(logits_sq, mask_sq)

## Define performance metrics:

In [None]:
def mean_f1_score_from_logits(pred, mask):
    pred_classes = torch.round(torch.sigmoid(pred))
    return mean_f1_score_from_classes(mask, pred_classes)

def mean_f1_score_from_classes(preds, masks):
    b_size = masks.shape[0]
    f1_acc = 0.
    size = torch.prod(torch.tensor(masks.shape[1:]))
    for i in range(b_size):
        mask = masks[i]
        pred = preds[i]
        f1_acc = f1_acc + multiclass_f1_score(pred.reshape((size)), mask.reshape((size)))
    mean_f1 = f1_acc/b_size
    return mean_f1

def mean_iou_from_logits(pred, mask):
    pred_classes = torch.round(torch.sigmoid(pred))
    return mean_iou_from_classes(mask, pred_classes)

def mean_iou_from_classes(preds, masks):
    b_size = masks.shape[0]
    iou_acc = 0.
    size = torch.prod(torch.tensor(masks.shape[1:]))
    for i in range(b_size):
        mask = masks[i].reshape((size)).bool()
        pred = preds[i].reshape((size)).bool()
        intersection = (pred & mask).float().sum()
        union = (pred | mask).float().sum()
        iou = (intersection + 1e-10) / (union + 1e-10)
        iou_acc = iou_acc + iou
    mean_iou = iou_acc / b_size
    return mean_iou

## Define the optimizer and scheduler:

In [None]:
# Define optimizer and scheduler
optimizer = torch.optim.Adam([dict(params=model.parameters(), lr=0.0005)])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.2, patience=10, verbose=True, threshold=1e-5)

## Time to train:

##### If needed, load the model from a specific model checkpoint by uncommenting the following line:

In [None]:
#model.load_state_dict(torch.load("model/best_model_0.pth"))

In [None]:
loss_scores = []
f1_scores = []
iou_scores = []

#### First, we train on the custom dataset:

In [None]:
# Training loop
for epoch in range(10):
    model.train()
    average_loss = 0
    f1_score = 0
    iou_score = 0

    for data in tqdm.tqdm(train_loader, leave=False):
        x, y = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        y_pred = model.forward(x)

        loss = combined_loss_correct_dice(y_pred, y)
        loss.backward()
        optimizer.step()

        # Update loss, f1 and iou
        average_loss += loss.item()
        f1_score += mean_f1_score_from_logits(y_pred, y)
        iou_score += mean_iou_from_logits(y_pred, y)

    # Print training stats for the current epoch
    print(f"Epoch: {epoch}, Training Loss: {average_loss / len(train_loader)}, F1: {f1_score / len(train_loader)}, IOU: {iou_score / len(train_loader)}")


    # store the model after each epoch
    torch.save(model.state_dict(), f"model/best_model_{epoch}.pth")

    # Validation loop
    model.eval()
    average_loss = 0
    f1_score = 0
    iou_score = 0

    for x, y in tqdm.tqdm(valid_loader):
        x, y = x.to(device), y.to(device)
        with torch.no_grad():
            y_pred = model.forward(x)
            loss = combined_loss_correct_dice(y_pred, y)

        # Update loss, f1 and iou
        average_loss += loss.item()
        f1_score += mean_f1_score_from_logits(y_pred, y)
        iou_score += mean_iou_from_logits(y_pred, y)

    # Print validation stats for the current epoch
    val_loss = average_loss / len(valid_loader)
    val_f1 = f1_score / len(valid_loader)
    val_iou = iou_score / len(valid_loader)
    print(f"Epoch: {epoch}, Validation Loss: {val_loss}, F1: {val_f1}, IOU: {val_iou}")
    loss_scores.append(val_loss)
    f1_scores.append(val_f1.cpu().item())
    iou_scores.append(val_f1.cpu().item())
    
    scheduler.step(val_loss)

UPP-Model, SoftBCELoss:\
The lowest training value is 0.8790411872128463 at index 12.\
The largest f1-score is 0.9565524458885193 at index 44.\
The largest iou-score is 0.7358250021934509 at index 27.

### Show a visual representation of the intermediate training process:

In [None]:
import matplotlib.pyplot as plt
import numpy as np 

item = valid_loader.dataset[0]
image_visual = item[0]
mask_visual = item[1]
print(image_visual.shape)
print(mask_visual.shape)
img_normal = np.swapaxes(image_visual, 0, 2)
img_mask = np.swapaxes(mask_visual, 0, 2)
plt.imshow(img_normal)
plt.show()
plt.imshow(img_mask)
plt.show()
model.eval()
pred = None
with torch.no_grad():
    image = image_visual.unsqueeze(0).to(device)
    pred = model(image).cpu()
plt.imshow(pred.squeeze())
plt.show()
plt.imshow(torch.round(torch.sigmoid(pred.squeeze())))
plt.show()

##### If needed, load the model from a specific model checkpoint by uncommenting the following line:

In [None]:
#model.load_state_dict(torch.load("model_XXXX.pth"))

### Split original Kaggle dataset into training and validation sets:

In [None]:
train_size = int(0.9 * len(original_data_set))
valid_size = len(original_data_set) - train_size
gen = torch.Generator()
gen.manual_seed(0)
train_dataset, valid_dataset = random_split(original_data_set, [train_size, valid_size], generator=gen)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False)

In [None]:
print(len(valid_loader))
print(len(train_loader))

In [None]:
# Training loop
for epoch in range(15):
    model.train()
    average_loss = 0
    f1_score = 0
    iou_score = 0

    for data in tqdm.tqdm(train_loader, leave=False):
        x, y = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        y_pred = model.forward(x)

        loss = soft_bce_loss(y_pred, y)
        loss.backward()
        optimizer.step()

        # Update loss, f1 and iou
        average_loss += loss.item()
        f1_score += mean_f1_score_from_logits(y_pred, y)
        iou_score += mean_iou_from_logits(y_pred, y)

    # Print training stats for the current epoch
    print(f"Epoch: {epoch}, Training Loss: {average_loss / len(train_loader)}, F1: {f1_score / len(train_loader)}, IOU: {iou_score / len(train_loader)}")


    # store the model after each epoch 
    torch.save(model.state_dict(), f"model/{model_name}.pth")

    # Validation loop
    model.eval()
    average_loss = 0
    f1_score = 0
    iou_score = 0

    for x, y in tqdm.tqdm(valid_loader):
        x, y = x.to(device), y.to(device)
        with torch.no_grad():
            y_pred = model.forward(x)
            loss = soft_bce_loss(y_pred, y)

        # Update loss, f1 and iou
        average_loss += loss.item()
        f1_score += mean_f1_score_from_logits(y_pred, y)
        iou_score += mean_iou_from_logits(y_pred, y)

    # Print validation stats for the current epoch
    val_loss = average_loss / len(valid_loader)
    val_f1 = f1_score / len(valid_loader)
    val_iou = iou_score / len(valid_loader)
    print(f"Epoch: {epoch}, Validation Loss: {val_loss}, F1: {val_f1}, IOU: {val_iou}")
    loss_scores.append(val_loss)
    f1_scores.append(val_f1.cpu().item())
    iou_scores.append(val_f1.cpu().item())
    
    scheduler.step(val_loss)

Baselines (F1-score):
- Unet Efficient: 0.9400148987770081 (Validation), 0.93428 (Kaggle)
- UPP Efficient: 0.936101496219635 (Validation), 0.93273 (Kaggle)
- Unet ResNet: 0.9320721626281738 (Validation), 0.92591 (Kaggle)
- UPP ResNet: 0.9340260028839111 (Validation), 0.92733 (Kaggle)

# Prepare the Kaggle Submission:

Now we go over each trained model and generate a submission file for each one.

In [None]:
from custom_datasets import Sat_Only_Image_UPP_preprocessed
kaggle_submission_images = load_test_images()
submission_data_set = Sat_Only_Image_UPP_preprocessed(kaggle_submission_images, upp_preprocess=preprocess_input)
submission_dataloader = DataLoader(submission_data_set, batch_size=1, shuffle=False, drop_last=False, num_workers=4, persistent_workers=True)

In [None]:
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np 
from mask_to_submission import make_submission


model_names = ["unet_resnet34", "unet_efficientnet-b5", "unetplusplus_resnet34", "unetplusplus_efficientnet-b5"]

for model_name in model_names:

    # construct the model depending if name is unet or unetplusplus and efficientnet or resnet34
    encoder_name = model_name.split("_")[1]
    model_type = model_name.split("_")[0]
    if model_type == "unet":
        model = smp.Unet(
            encoder_name=encoder_name,
            encoder_depth=5,
            encoder_weights="imagenet",
            decoder_use_batchnorm=True,
            decoder_channels=(1024,512,256,64,16),
            decoder_attention_type=None,
            in_channels=3,
            classes=1,
            activation=None,
            aux_params=None
        ).to(device)
    else:
        model = smp.UnetPlusPlus(
            encoder_name=encoder_name,
            encoder_depth=5,
            encoder_weights="imagenet",
            decoder_use_batchnorm=True,
            decoder_channels=(1024,512,256,64,16),
            decoder_attention_type=None,
            in_channels=3,
            classes=1,
            activation=None,
            aux_params=None
        ).to(device)

    # load the model weights from the training
    model.load_state_dict(torch.load(f"model/{model_name}.pth"))

    # make the predictions for each model. 
    model.eval()
    predictions = []
    with torch.no_grad():
        for image in tqdm.tqdm(submission_dataloader):
            image = image.to(device)
            pred = model(image).cpu()
            predictions.append(pred)

    counter = 144
    path = "submission/model/"
    final_pred_images = []
    for sp in predictions:
        pred_image = torch.round(torch.sigmoid(sp.squeeze()))
        pred = F.interpolate(pred_image.unsqueeze(0).unsqueeze(0), size=(400,400), mode='nearest')
        pred = pred.squeeze().numpy()
        pred = (pred * 255).astype(np.uint8)
        pred = np.stack([pred, pred, pred],axis=2)
        plt.imsave(path+"mask_"+str(counter)+".png", pred)
        counter += 1
        final_pred_images.append(pred)

    make_submission(f"{model_name}.csv", path, foreground_threshold= 0.25)