In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
import os
os.chdir("/content/drive/My Drive/deeplearning_intro/NeedForSpeedAI/ENet")

In [None]:
%%shell
cd /content/drive/MyDrive/deeplearning_intro/NeedForSpeedAI/ENet
python main.py --mode='train' --with-unlabeled

In [11]:
%%shell
cd /content/drive/MyDrive/deeplearning_intro/NeedForSpeedAI/ENet
python main.py --mode='test' --with-unlabeled


Loading dataset...

Selected dataset: nfs
Dataset directory: data/nfs
Save directory: save
Number of classes to predict: 3
Train dataset size: 449
Validation dataset size: 53
Image size: torch.Size([8, 3, 480, 640])
Label size: torch.Size([8, 480, 640])
Class-color dictionary: OrderedDict([('unlabeled', (0, 0, 0)), ('road', (128, 64, 128)), ('car', (64, 0, 128))])

Weighing technique: ENet
Computing class weights...
This can take a while depending on the dataset size
Class weights: tensor([ 2.0883,  2.8590, 37.5171], device='cuda:0')
ENet(
  (initial_block): InitialBlock(
    (main_branch): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (ext_branch): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (batch_norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (out_activation): PReLU(num_parameters=1)
  )
  (downsample1_0): DownsamplingBottleneck(
    (main_max1): MaxPool2d(kernel_size=2



In [None]:
import torch
import torchvision
import numpy as np
from PIL import Image
from torch import optim
from models.enet import ENet
import matplotlib.pyplot as plt
from torchvision import transforms
from collections import OrderedDict

import utils
import transforms as ext_transforms

In [None]:
def alter_predict(model, raw_img):
    """
    This API is designed to directly predicted the result.
    ``raw_img`` is the input image in type 'PIL' or nd-array

    The shape of prediction directly from the model will be (N, K, H, W) as the output from
        CrossEntropy loss will containing all the label class. So dimensional reduction is
        unavoidable.

    Return 2 images:
        the direct predicted image, whose pixel value has a range of 0, K-1
        the encoded image, whose segmented parts has the corresponding encoding color.
    """
    img = torch.unsqueeze(transforms.ToTensor()(raw_img), dim=0).to(device)
    model.eval()
    with torch.no_grad():
        pre = model(img)
    # Predictions is one-hot encoded with "num_classes" channels.
    # Convert it by unsqueezing.
    return torch.argmax(pre, 1).cpu()


def pre2render(pre, class_encoding):
    label2rgb = transforms.Compose([
        ext_transforms.LongTensorToRGBPIL(class_encoding),
        transforms.ToTensor()
    ])
    render = utils.batch_transform(pre, label2rgb)
    render = torchvision.utils.make_grid(render).numpy()
    return np.transpose(render, (1, 2, 0))

In [None]:
device = 'cuda'

class_encoding = OrderedDict([
    ('unlabeled', (0, 0, 0)),
    ('road', (128, 0, 0)),
    ('car', (252, 231, 142)),
])

num_classes = len(class_encoding)
model = ENet(num_classes).to(device)
optimizer = optim.Adam(model.parameters())
model = utils.load_checkpoint(model, optimizer, './save', 'nfs_enet')[0]

In [None]:
test_img = Image.open('./data/nfs/test/WA43_2.png')
pre = alter_predict(model, test_img)
render = pre2render(pre, class_encoding)