<a href="https://colab.research.google.com/github/Wazhee/Semantic-Segmentation/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import glob
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset
from torch.utils import data
from torchvision import transforms as T
from torchvision import models

torch.manual_seed(0)
np.random.seed(0)

In [2]:
!gdown https://drive.google.com/uc?id=1eYYJ26R1S9Ln_ExwHFBqd3rbln9qVdi4&export=download
!unzip -qq cityscapes.zip

Downloading...
From: https://drive.google.com/uc?id=1eYYJ26R1S9Ln_ExwHFBqd3rbln9qVdi4
To: /content/cityscapes.zip
100% 651M/651M [00:05<00:00, 129MB/s]


In [31]:
class Segmenter(torch.nn.Module):
    def __init__(self, n_classes, encoder):
        super(Segmenter, self).__init__()
        self.encoder = encoder
        self.decoder = nn.Sequential(
            nn.Conv2d(256, 64, kernel_size=(3,3), padding=1),
            nn.ReLU(),
            nn.Upsample(scale_factor=2, mode='bilinear'),
            nn.Conv2d(64, 64, kernel_size=(3,3), padding=1),
            nn.ReLU(),
            nn.Upsample(scale_factor=2, mode='bilinear'),
            nn.Conv2d(64, 64, kernel_size=(3,3), padding=1),
            nn.ReLU(),
            nn.Upsample(scale_factor=2, mode='bilinear'),
            nn.Conv2d(64, n_classes, kernel_size=(3,3), padding=1),
        )

    def forward(self, x):
      out1 = self.encoder(x)
      out2 = self.decoder(out1)
      return out2 # Your code for Problem 1a goes here

In [32]:
class CityScapesDataset(Dataset):
  def __init__(self, images, labels, im_transform, mask_transform):
    self.images = images
    self.labels = labels
    self.im_transform = im_transform
    self.mask_transform = mask_transform
  
  def __getitem__(self, idx):
    im = Image.open(self.images[idx])
    mask = Image.open(self.labels[idx])
    im = self.im_transform(im)[0:3, ...] # Transform image

    # Add an extra first dimension to mask (needed for transforms), convert
    # to LongTensor b/c values are integers, and apply transforms.
    mask = np.asarray(mask)[None, ...] 
    mask = torch.LongTensor(mask)
    mask = self.mask_transform(mask)

    # Apply random horizontal flip to image and mask
    if np.random.rand() > 0.5:
      im = TF.hflip(im)
      mask  = TF.hflip(mask)

    return im, mask

  def __len__(self):
    return len(self.images)

In [33]:
batch_size = 16

# Make image and mask transforms.
im_transform = [T.ToTensor()]
im_transform.append(T.Resize((256, 256), interpolation=T.InterpolationMode.BILINEAR))
im_transform = T.Compose(im_transform)

mask_transform = T.Resize((256, 256), interpolation=T.InterpolationMode.NEAREST)

def get_dataloader(im_path):
  images = sorted(glob.glob(im_path + '/*8bit.jpg'))
  labels = sorted(glob.glob(im_path + '/*labelIds.png'))   
  dataset = CityScapesDataset(images, labels, im_transform, mask_transform)
  return data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)

# Create dataloaders
train_dataloader = get_dataloader('./cityscapes/train')
val_dataloader = get_dataloader('./cityscapes/val')

In [34]:
# Get features from VGG16 up through 3 downsampling (maxpool) operations.
vgg = models.vgg16(pretrained=True);
encoder = nn.Sequential(*(list(vgg.children())[:1])[0][0:17]);

# Create model
n_classes = 34
model = Segmenter(n_classes, encoder);
model.to('cuda');



In [35]:
import warnings
# Settings the warnings to be ignored
warnings.filterwarnings('ignore')

device = 'cuda'
lr = 1e-4
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
num_epochs = 7

# Problem 1b: Your training loop code goes here
size = len(train_dataloader.dataset)
for batch, (X, y) in enumerate(train_dataloader):
  X,y = X.to(device), y.to(device)
  # Compute prediction and loss
  #print(f"y.shape: {y[:,0,:,:].shape}")
  pred = model(X)
  #print(f"pred: {type(X)}, y: {type(y)}")
  loss = loss_fn(pred, y[:,0,:,:])
  # Backpropagation
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  if batch % 100 == 0:
    loss, current = loss.item(), (batch + 1) * len(X)
    # if loss < old_loss:
    #   save_model(model, loss, epochs, optimizer)
    #   old_loss = loss
    print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f9ef46fd430>
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: 


loss: 3.563379  [   16/ 2975]
loss: 0.996051  [ 1616/ 2975]


KeyboardInterrupt: ignored

In [None]:
from tqdm import tqdm

for t in tqdm(range(num_epochs)):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer, old_loss, epochs, device)
    test_loop(test_dataloader, model, loss_fn, device)
    loss_arr.append(get_loss(train_dataloader, model, loss_fn))
print("Done!")

In [None]:
# Problem 1c: Your IoU evaluation code goes here

In [None]:
# Problem 1d: Your image results code goes here