In [None]:
!pip install -Uq wandb tqdm torchsummary
!pip install -Uq randomname # for generating funky names for the sweeps



In [None]:
import wandb

# Define wandb username, project name and dataset path
wandb_username = "adrishd"
wandb_project = "taco-baseline"
dataset_artifact = "adrishd/taco/taco:pytorch"

# Downloading dataset
# use root parameter in artifacts.download(root=<custom_path>)
# to specify download directory. else download in the current directory.
api = wandb.Api()
artifact = api.artifact(dataset_artifact)
artifact_dir = artifact.download()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Downloading large artifact taco:pytorch, 2507.15MB. 1503 files... Done. 0:0:0


In [None]:
import randomname
import tacoloader
import torch
import torch.nn
import torch.nn.functional as F
import torchsummary
import time
import tqdm as tqdm
import torchvision

device = "cuda:0" if torch.cuda.is_available() else "cpu:0"

In [None]:
# Constants in the training pipeline
train_batch_size = 10
test_batch_size = 3

In [None]:
h, w, c = 512, 512, 3  # height, width and channel of images
# Use torchvision.transpose.Compose to compose multiple transformations
# together. Refer to: https://pytorch.org/vision/stable/transforms.html
transform = torchvision.transforms.Resize(
    (h, w), torchvision.transforms.InterpolationMode.NEAREST
)

In [None]:
# By default an LRU cache is used for storing the last recently loaded image
# and re-using that in the next epoch.
# To store `all` the loaded images in memory and reuse them in the next epochs
# set cache_fn=taco_loader.cache_fn in tacoloader.load_dataset(...)
# an user can also increase the size of LRU cache, by
# setting cache_fn=functools.lru_cache(maxsize=<int size>) in
# tacoloader.load_dataset(...)

dataset, collate_fn = tacoloader.load_dataset(
    artifact_dir, tacoloader.Environment.TORCH, transform_fn=transform
)

# Splitting Dataset to 80%-20% for training and testing, respectively
train_split = 0.8
dataset_size = len(dataset)
indices = range(dataset_size)
train_indices = indices[: int(train_split * dataset_size)]
test_indices = indices[int(train_split * dataset_size) + 1 :]

train_dataset = torch.utils.data.Subset(dataset, train_indices)
test_dataset = torch.utils.data.Subset(dataset, test_indices)

NOTE! Installing ujson may make loading annotations faster.
creating index...
index created!


In [None]:
# Creating Data Loaders
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=train_batch_size,
    collate_fn=dataset.collate_fn,
    num_workers=6,
    shuffle=True,
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=test_batch_size,
    collate_fn=dataset.collate_fn,
    num_workers=6,
    shuffle=True,
)

In [None]:
def viz_mask(image, pred_mask, true_mask):
    # Visualize segmentation mask on W&B dashboard
    # image: torch tensor of dim [c, h, w]
    # pred_mask: detached torch tensor of dim [h, w]
    # true_mask: torch tensor of dim [h, w]
    pred_labels = torch.unique(pred_mask).cpu().numpy().tolist()
    predicted_class_labels = {
        i: x for i, x in enumerate(dataset.get_categories(pred_labels))
    }
    gt_labels = torch.unique(true_mask).cpu().numpy().tolist()
    ground_truth_labels = {
        i: x for i, x in enumerate(dataset.get_categories(gt_labels))
    }
    wandb_image = wandb.Image(
        image.cpu(),
        masks={
            "prediction": {
                "mask_data": pred_mask.squeeze().cpu().numpy(),
                "class_labels": predicted_class_labels,
            },
            "ground_truth": {
                "mask_data": true_mask.cpu().numpy(),
                "class_labels": ground_truth_labels,
            },
        },
    )
    wandb.log({"semantic_segmentation": wandb_image})

## Model Design and Implementations
### Starter Code: Helper Modules for UNet Image Segmentation

In [None]:
# Helper function for getting activation functions
# from torch.nn given the function name.
# activations with inplace operations, are enabled
# by default.
import inspect
import functools


def get_activation_fn(fn_name):
    fn = getattr(torch.nn, fn_name)
    isinplace = "inplace" in inspect.signature(fn).parameters
    if isinplace:
        fn = functools.partial(fn, inplace=True)
    return fn

In [None]:
# Dummy baseline UNet model based on:
# https://github.com/xiaopeng-liao/Pytorch-UNet/blob/master/unet/unet_parts.py
class double_conv(torch.nn.Module):
    """(conv => BN => ReLU) * 2"""

    def __init__(self, in_ch, out_ch, activation_fn_name):
        super(double_conv, self).__init__()
        activation_fn = get_activation_fn(activation_fn_name)
        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(in_ch, out_ch, 3, padding=1),
            torch.nn.BatchNorm2d(out_ch),
            activation_fn(),
            torch.nn.Conv2d(out_ch, out_ch, 3, padding=1),
            torch.nn.BatchNorm2d(out_ch),
            activation_fn(),
        )

    def forward(self, x):
        x = self.conv(x)
        return x


class inconv(torch.nn.Module):
    def __init__(self, in_ch, out_ch, activation_fn):
        super(inconv, self).__init__()
        self.conv = double_conv(in_ch, out_ch, activation_fn)

    def forward(self, x):
        x = self.conv(x)
        return x


class down(torch.nn.Module):
    def __init__(self, in_ch, out_ch, activation_fn):
        super(down, self).__init__()
        self.mpconv = torch.nn.Sequential(
            torch.nn.MaxPool2d(2), double_conv(in_ch, out_ch, activation_fn)
        )

    def forward(self, x):
        x = self.mpconv(x)
        return x


class up(torch.nn.Module):
    def __init__(self, in_ch, out_ch, activation_fn, bilinear=True):
        super(up, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(
                scale_factor=2, mode="bilinear", align_corners=True
            )
        else:
            self.up = torch.nn.ConvTranspose2d(in_ch // 2, in_ch // 2, 2, stride=2)

        self.conv = double_conv(in_ch, out_ch, activation_fn)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffX = x2.size()[2] - x1.size()[2]
        diffY = x2.size()[3] - x1.size()[3]
        x1 = F.pad(x1, (diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2))
        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)
        return x


class outconv(torch.nn.Module):
    def __init__(self, in_ch, out_ch):
        super(outconv, self).__init__()
        self.conv = torch.nn.Conv2d(in_ch, out_ch, 1)
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv(x)
        x = self.softmax(x)
        return x

In [None]:
class UNet(torch.nn.Module):
    def __init__(self, n_channels, n_classes, config):
        super(UNet, self).__init__()
        mid_channels = config["unet_channels"]
        activation_fn = config["activation_fn"]
        self.inc = inconv(n_channels, mid_channels, activation_fn)
        self.down1 = down(mid_channels, mid_channels * 2, activation_fn)
        self.down2 = down(mid_channels * 2, mid_channels * 4, activation_fn)
        self.down3 = down(mid_channels * 4, mid_channels * 8, activation_fn)
        self.down4 = down(mid_channels * 8, mid_channels * 8, activation_fn)
        self.up1 = up(
            mid_channels * 16,
            mid_channels * 4,
            activation_fn,
            bilinear=config["bilinear"],
        )
        self.up2 = up(
            mid_channels * 8,
            mid_channels * 2,
            activation_fn,
            bilinear=config["bilinear"],
        )
        self.up3 = up(
            mid_channels * 4, mid_channels, activation_fn, bilinear=config["bilinear"]
        )
        self.up4 = up(
            mid_channels * 2, mid_channels, activation_fn, bilinear=config["bilinear"]
        )
        self.outc = outconv(mid_channels, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.outc(x)
        return x

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()

### Training, Logging, Finding Hyper Parameters

In [None]:
# accuracy metrics and utility functions


def pixel_accuracy(pred, ground):
    eqmap = torch.eq(pred, ground).int()
    return eqmap.sum().float() / eqmap.numel()


# mIOU based on: https://stackoverflow.com/questions/62461379/multiclass-semantic-segmentation-model-evaluation
def mIOU(pred, label, num_classes):
    iou_list = list()
    present_iou_list = list()
    pred = pred.view(-1)
    label = label.view(-1)
    for sem_class in range(num_classes):
        pred_inds = pred == sem_class
        target_inds = label == sem_class
        if target_inds.long().sum() == 0:
            iou_now = torch.nan
        else:
            intersection_now = (pred_inds[target_inds]).long().sum()
            union_now = (
                pred_inds.long().sum() + target_inds.long().sum() - intersection_now
            )
            iou_now = intersection_now.float() / union_now.float()
            present_iou_list.append(iou_now)
        iou_list.append(iou_now)
    return torch.mean(torch.stack(present_iou_list))


def save_model(model, run):
    fname = "trained_model.%s.pt" % run.id
    torch.save(model, fname)
    metadata = dict(framework="pytorch")
    artifact = wandb.Artifact("trained_model", type="model", metadata=metadata)
    artifact.add_file(fname)
    run.log_artifact(artifact)

In [None]:
def train():
    with wandb.init(entity=wandb_username, project=wandb_project) as run:
        config = wandb.config
        num_classes = dataset.len_categories
        unet = UNet(3, num_classes, config).to(device)
        optim = torch.optim.Adam(unet.parameters(), lr=config["lr"])
        for x in tqdm.tqdm(range(config["epochs"])):
            bar = tqdm.tqdm(train_loader)
            for data in bar:
                optim.zero_grad()
                segmask = unet(data.images.to(device))
                loss = loss_fn(segmask, data.masks.to(device).long())
                loss.backward()
                bar.set_description("Loss: %f" % loss.detach().cpu())
                wandb.log({"loss": loss.detach().cpu()})
                optim.step()
        save_model(unet, run)
        test_bar = tqdm.tqdm(test_loader, position=0)
        with torch.no_grad():
            for data in test_bar:
                segmask = unet(data.images.to(device))
                mask = torch.argmax(segmask, dim=1).detach().cpu()
                acc = pixel_accuracy(mask, data.masks)
                miou = mIOU(mask, data.masks, num_classes).numpy()
                test_bar.set_description("Acc: %.2f" % acc)
                wandb.log({"mean_pixel_accuracy": acc, "mean_iou": miou})
        # Draw one sample and visualize the mask for each sweep
        sample = test_dataset[0]
        segmask = unet(sample.image.unsqueeze(0).to(device))
        mask = torch.argmax(segmask, dim=1).detach().squeeze()
        viz_mask(sample.image, mask, sample.mask)

In [None]:
# Using wandb's hyperparameter optimization framework sweeps
# More information can be found here: https://docs.wandb.ai/guides/sweeps
sweep_name = randomname.get_name()
sweep_config = {
    "name": sweep_name,
    "method": "bayes",
    "metric": {"name": "mean_iou", "goal": "maximize"},
    "parameters": {
        "epochs": {"values": [0]},
        "lr": {"min": 1e-4, "max": 1e-2},
        "activation_fn": {"values": ["ReLU", "LeakyReLU", "PReLU"]},
        "unet_channels": {"values": [8, 16, 32]},
        "bilinear": {"values": [True, False]},
    },
}
sweep_id = wandb.sweep(sweep_config, entity=wandb_username, project=wandb_project)

Create sweep with ID: tyz037mm
Sweep URL: https://wandb.ai/adrishd/taco-baseline/sweeps/tyz037mm


In [None]:
count = 5  # Run 5 sweeps
wandb.agent(sweep_id, function=train, count=count)
sweep = wandb.Api().sweep("/".join([wandb_username, wandb_project, sweep_id]))
print("Best Run: %s" % sweep.best_run().url)

[34m[1mwandb[0m: Agent Starting Run: wkxffxi1 with config:
[34m[1mwandb[0m: 	activation_fn: PReLU
[34m[1mwandb[0m: 	bilinear: False
[34m[1mwandb[0m: 	epochs: 0
[34m[1mwandb[0m: 	lr: 0.005888803533483861
[34m[1mwandb[0m: 	unet_channels: 16
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mwandb_fc[0m (use `wandb login --relogin` to force relogin)


0it [00:00, ?it/s]
Acc: 0.02: 100%|██████████| 100/100 [00:55<00:00,  1.81it/s]


True
True


0,1
mean_iou,▄▄▃▇▁▆▅▇▆▃▃▅▃▄▅▆▄▄▂▂▄▆▅▄▂▂▂▅█▆▄▁▅█▃▆▄▄▅▃
mean_pixel_accuracy,▂▂▄▄▂▅▂▆▃▃▂▄▄▄▄▄█▂▁▂▃▂▂▂▃▃▃▆▆▃▃▂▃▆▂▂▄▄▃▃

0,1
mean_iou,0.00374
mean_pixel_accuracy,0.02118


[34m[1mwandb[0m: Agent Starting Run: jhzx0z1s with config:
[34m[1mwandb[0m: 	activation_fn: ReLU
[34m[1mwandb[0m: 	bilinear: False
[34m[1mwandb[0m: 	epochs: 0
[34m[1mwandb[0m: 	lr: 0.0005645053746471101
[34m[1mwandb[0m: 	unet_channels: 32
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0it [00:00, ?it/s]
Acc: 0.00: 100%|██████████| 100/100 [01:25<00:00,  1.18it/s]


True
True


0,1
mean_iou,▂▂▃▃▂▂▁▄▃▅▂▂▂▄▁▁▄█▂▆▄▂▄▅▂▂▄▁▂▄▂▅▅▂█▁▄▄▃▅
mean_pixel_accuracy,▆▄▃▂▇▄▄▄▆▅▆▃▅▄▄▃▆▆▆▅▂▆▄▅▄▄▇▅▄▃▅▆▆▄▁▅█▅▅▆

0,1
mean_iou,0.00154
mean_pixel_accuracy,0.00214


[34m[1mwandb[0m: Agent Starting Run: 2e82gn12 with config:
[34m[1mwandb[0m: 	activation_fn: PReLU
[34m[1mwandb[0m: 	bilinear: True
[34m[1mwandb[0m: 	epochs: 0
[34m[1mwandb[0m: 	lr: 0.0039085039821085115
[34m[1mwandb[0m: 	unet_channels: 16
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0it [00:00, ?it/s]
Acc: 0.06: 100%|██████████| 100/100 [00:51<00:00,  1.95it/s]


True
True


0,1
mean_iou,▂▂▃▂▂▃▂▃▂▃▂▃▁▃▃▄▁▄▆▁▃▃▃▂▃▂█▆▃▂▃▃▁▁▄▂▃▂▂▃
mean_pixel_accuracy,▂▂▂▂▁▂▂▂▂▃▂▂▂▃▃▄▁▄█▂▃▂▂▂▂▃▆▇▂▂▄▃▂▂▃▂▅▂▂▃

0,1
mean_iou,0.01089
mean_pixel_accuracy,0.05921


[34m[1mwandb[0m: Agent Starting Run: m7nivc9m with config:
[34m[1mwandb[0m: 	activation_fn: PReLU
[34m[1mwandb[0m: 	bilinear: False
[34m[1mwandb[0m: 	epochs: 0
[34m[1mwandb[0m: 	lr: 0.004027723207271584
[34m[1mwandb[0m: 	unet_channels: 16
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0it [00:00, ?it/s]
Acc: 0.00: 100%|██████████| 100/100 [00:52<00:00,  1.90it/s]


True
True


0,1
mean_iou,▂▅▁▄▂▂▆▃▂▂▃▂▃▅▃▃▁▂▂▂▇▂▄▂▃▄▂█▂▁▁▂▂▂█▂▄▃▂▄
mean_pixel_accuracy,▃▅▄█▃▆█▅▄▃▆▅▃▆▅▄▁▁▅▄▅▃▄▃▆▅▄▇▆▅▄▄▃▄▆▃▃▄▅▃

0,1
mean_iou,0.00193
mean_pixel_accuracy,0.00138


[34m[1mwandb[0m: Agent Starting Run: e02s4wr9 with config:
[34m[1mwandb[0m: 	activation_fn: PReLU
[34m[1mwandb[0m: 	bilinear: False
[34m[1mwandb[0m: 	epochs: 0
[34m[1mwandb[0m: 	lr: 0.003902229286315114
[34m[1mwandb[0m: 	unet_channels: 16
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0it [00:00, ?it/s]
Acc: 0.04: 100%|██████████| 100/100 [00:53<00:00,  1.88it/s]


True
True


0,1
mean_iou,█▇▃▂▅▄█▄▁▃▃▄▄▄▃▂▂▂▆▃▂▃▂▆▄▄▇▄▃▆▃▆▂▂▄▃▃▃▄▅
mean_pixel_accuracy,▆█▄▃▅▅▆▆▂▅▄▄▆▄▅▁▄▅▆▅▄▄▃▅▇▅▇▆▅█▆▅▅▅▆█▆▄▄▄

0,1
mean_iou,0.00864
mean_pixel_accuracy,0.03839


[34m[1mwandb[0m: Sorting runs by -summary_metrics.mean_iou


Best Run: https://wandb.ai/adrishd/taco-baseline/runs/2e82gn12
