# Exercise 1. FCN implementation

In [None]:
import torch
import torchvision.transforms as transforms

import numpy as np
import torch.nn as nn
import os.path as osp
import PIL
import imageio
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from torch.utils.data import DataLoader
import torch.optim as optim

if torch.cuda.is_available(): device = torch.device('cuda')
else: device = torch.device('cpu')

### Download Dataset via wget

wget은 web-get의 약자로, 웹에서 파일을 직접 가져오는 명령어입니다. 우리는 데이터가 있는 웹 주소에서 데이터를 직접 가져와 tar.gz 형태의 파일로 다운로드 받을 수 있도록 하겠습니다.

In [None]:
# # Downloading dataset from google drive, 밑의 코드의 주석을 풀면 구글 드라이브로 부터 데이터셋 다운, 압축이 풀리고, Kitti라는 폴더가 생성됩니다. 

!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=19EiycfOQtf6uDKvMgwlHZB50cAxX_U4z' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=19EiycfOQtf6uDKvMgwlHZB50cAxX_U4z" -O Kitti.zip && rm -rf /tmp/cookies.txt
!unzip Kitti.zip -d ./data/Kitti


파일을 제대로 다운로드 하였고 읽을 수 있는지 확인해봅니다.

In [None]:
imgsets_file = osp.join('./data/Kitti', '{}.txt'.format('train'))
for line in open(imgsets_file):
    line = line.strip()
    print(line)
    line = line.split()

## Dataset & Dataloader

In [None]:
class KITTIdataset(torch.utils.data.Dataset):
    class_names = np.array(['background', 'road'])

    def __init__(self, root, transform, split='train'):
        self.root = ## implement code here
        self.split = ## implement code here
        self.transform = ## implement code here

        self.image_path = []
        self.ys = []
        
        imgsets_file = osp.join(root, '{}.txt'.format(split))
        for did in open(imgsets_file):
            did = did.strip()
            did = did.split()
            img_file = osp.join(root, 'data_road/{}'.format(did[0]))
            lbl_file = osp.join(root, 'data_road/{}'.format(did[1]))
            self.image_path.append(img_file)
            self.ys.append(lbl_file)

    def __len__(self):
        return len(self.ys)

    def __getitem__(self, index):
        # load image
        img_file = ## implement code here
        img = PIL.Image.open(img_file)
        
        # load label
        lbl_file = ## implement code here
        lbl = PIL.Image.open(lbl_file)
        lbl = np.array(lbl)
        lbl[lbl == 255] = 1 # 0 is black 255 is white
        
        return self.transform(img), torch.from_numpy(lbl).long()

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5), (0.5))
])
train_dataset = KITTIdataset(root = './data/Kitti', split = 'train', transform = transform)
val_dataset = KITTIdataset(root = './data/Kitti', split = 'val', transform = transform)

train_loader = ## implement code here (use torch.utils.data.DataLoader, batch_size=1)
val_loader = ## implement code here (use torch.utils.data.DataLoader, batch_size=1)

## Evaluation matric (mIoU)

The evaluation matric code is given.  

In [None]:
def _fast_hist(label_true, label_pred, n_class):
    mask = (label_true >= 0) & (label_true < n_class)
    hist = np.bincount(
        n_class * label_true[mask].astype(int) +
        label_pred[mask], minlength=n_class**2).reshape(n_class, n_class)
    return hist

def compute_mean_iou(label_trues, label_preds, n_class):
    hist = np.zeros((n_class, n_class))
    for lt, lp in zip(label_trues, label_preds):
        hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
    iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
    mean_iou = np.nanmean(iu)
    
    return mean_iou

# Define the Network

- FCN model

In [None]:
class Net(pl.LightningModule):
    def __init__(self, num_class=3):
        super().__init__()
        self.loss = ## Apply cross entropy loss
        self.num_class = ## implement code here
    
        #############################################################
        # Structure of the FCN model
        #
        #3->64 2
        #64->128 2
        #128->256 3 conv->relu->conv->relu->conv->relu => Predict 3
        #256->512 3 conv->relu->conv->relu->conv->relu => Predict 2
        #512->512 3 conv->relu->conv->relu->conv->relu
        #512->4096 2 conv->relu->conv->relu => Predict 1
        #############################################################
        
        ## conv1
        self.features1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding = 100), # the padding=100 is given for a reason! Other conv2d should all have padding=1
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding =1),
            nn.ReLU())
        ## pool1
        
        ## conv2
        self.features2 = nn.Sequential() ## implement code here
        ## pool2
        
        ## conv3
        self.features3 = nn.Sequential() ## implement code here
        ## pool3

        ## conv4
        self.features4 = nn.Sequential() ## implement code here
        ## pool4
        
        ## conv5
        self.features5 = nn.Sequential() ## implement code here
        
        self.maxpool = ## implement code here (stride=2, ceil_mode=True)
        
        #4096->4096->num_class
        self.classifier = nn.Sequential() # conv(kernel_size=7) - relu - dropout - conv(kernel_size=1) - relu - dropout - conv(kernel_size=1)
        
        ## pool 5


        ## upsampling transposed convolution (use nn.ConvTranspose2d, in&out channel:num_class)
        self.upscore2 =  # kernel:4, strid:2
        self.upscore4 =  # kernel:4, strid:2
        self.upscore8 =  # kernel:16, strid:8
        
        self.score_pool4 = ## conv for Predict 2
        self.score_pool3 = ## conv for Predict 3

        self.softmax = ## implement code here

        
    def forward(self, x):
        #################################
        ## implement code here
        ## 230628_Segmentation.pdf Exercise 1에 나온 모델 구조 그림을 참고하세요.

        x1 = 
        pool1 = 

        x2 = 
        pool2 = 

        x3 = 
        pool3 = 

        x4 = 
        pool4 = 

        x5 = 
        pool5 = 

        predict1 = 
        

        deconv1 = 
        predict2 = 
        predict2 = predict2[:, :, 5:5 + deconv1.size()[2], 5:5 + deconv1.size()[3]] # 사이즈 조절을 위함
        add1 = # use torch.add() to add two feature maps

        deconv2 = 
        predict3 = 
        predict3 = predict3[:, :, 9:9 + deconv2.size()[2], 9:9 + deconv2.size()[3]] # 사이즈 조절을 위함
        add2 = # use torch.add() to add two feature maps


        deconv3 = 
        deconv3 = deconv3[:, :, 33:33 + x.size()[2], 33:33 + x.size()[3]] # 사이즈 조절을 위함
        out = 
        ##################################

        return out

In [None]:
# Define model (num_class:2)
model = 

In [None]:
import torch.optim as optim
criterion = ## use cross entropy loss
optimizer = ## use Adam optimizer

In [None]:
training_epochs = 5
best_iou = 0
num_class = len(train_loader.dataset.class_names)

for epoch in range(training_epochs):
    model.train()
    print ('current epoch : %d'%(epoch))
    running_loss = 0.0

    for batch_idx, (data, target) in enumerate(train_loader):
        # load data, forward
        # data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        
        score = model(data)

        loss = criterion(score, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 20 ==0:
            print ('batch : {}, loss : {}'.format(batch_idx, loss.item()))

        
    #validation
    model.eval()
    val_loss = 0
    metrics = []
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            # load data, forward
            # data, target = data.cuda(), target.cuda()
            score = model(data)

            # calc val loss, accuracy
            loss = criterion(score, target)

            val_loss += loss.item()

            _, lbl_pred = score.max(1)
            lbl_pred = lbl_pred.cpu().numpy()  
            lbl_true = target.cpu().numpy()

            for lt, lp in zip(lbl_true, lbl_pred): # lbl_true, lbl_pred: [batch, h, w]
                tmp = compute_mean_iou(lt, lp, num_class)
                metrics.append(tmp)
            
    val_loss /= len(val_loader)
    metrics = np.mean(metrics)
    
    print ('val loss : {}, mean_iou : {}'.format(val_loss, metrics))

    ##save model
    if best_iou < metrics:
        best_iou = metrics
        print("Best model saved")
        torch.save(model.state_dict(), './model_best.pth')
    
print('Finished Training')

# Exercise 2. U-Net implementation

In [None]:
class UNet(pl.LightningModule):
    def __init__(self, num_class=3):
        super().__init__()
        self.loss = ## apply cross entropy loss
        self.num_class = ## implement code here

        ##################################################
        # 자유롭게 U-Net 구조를 만들어보세요.
        # 230628_Segmentation.pdf 파일의 Exercise 2. U-Net implementation 설명 figure를 참고하셔도 좋습니다.
        # 간소한 버전으로 구현을 하고, 최대한 skip-connection을 사용하는 방법을 생각해보세요.
        
        # self.@@@ = nn.@@@@
        # ...


        ##################################################

        
    def forward(self, x):
        ##################################################
        # 자유롭게 U-Net 구조를 만들어보세요.
        # 230628_Segmentation.pdf 파일의 Exercise 2. U-Net implementation 설명 figure를 참고하셔도 좋습니다.
        # 간소한 버전으로 구현을 하고, 최대한 skip-connection을 사용하는 방법을 생각해보세요.
        
        # x1 = self.@@@(x)
        # ...


        ##################################################

        return out
    

UNet 코드는 실행되지 않아도 좋습니다. U-Net 구조와 Skip-connection의 활용법을 배우는 것이 목표입니다.  
(UNet 코드의 경우 데이터의 사이즈 문제 등으로 그대로 실행시 돌아가지 않을 가능성이 큽니다.)