## Download Data


In [5]:
!mkdir Data

mkdir: cannot create directory ‘Data’: File exists


In [6]:
!wget http://diode-dataset.s3.amazonaws.com/val.tar.gz && tar -xf val.tar.gz -C ./Data/

--2022-04-18 13:41:59--  http://diode-dataset.s3.amazonaws.com/val.tar.gz
Resolving diode-dataset.s3.amazonaws.com (diode-dataset.s3.amazonaws.com)... 52.217.224.89
Connecting to diode-dataset.s3.amazonaws.com (diode-dataset.s3.amazonaws.com)|52.217.224.89|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2774625282 (2.6G) [application/x-tar]
Saving to: ‘val.tar.gz.1’


2022-04-18 13:42:56 (47.3 MB/s) - ‘val.tar.gz.1’ saved [2774625282/2774625282]



## data pipeline




In [7]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
import torch.utils.data as Data
import torchvision
import torchvision.transforms.functional as F
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
import os
path = "./Data/val/indoors"
filelist = []

for root, dirs, files in os.walk(path):
    for file in files:
        filelist.append(os.path.join(root, file))

filelist.sort()
data = {
    "image": [x for x in filelist if x.endswith(".png")],
    "depth": [x for x in filelist if x.endswith("_depth.npy")],
    "mask": [x for x in filelist if x.endswith("_depth_mask.npy")],
}
df = pd.DataFrame(data)
df = df.sample(frac=1, random_state=42)

In [8]:
class DataGenerator(torch.utils.data.Dataset):
  def __init__(self, data, batch_size=6, dim=(768, 1024), n_channels=3, shuffle=True):
    self.data = data
    self.indices = self.data.index.tolist()
    self.dim = dim
    self.n_channels = n_channels
    self.batch_size = batch_size
    self.shuffle = shuffle
    self.min_depth = 0.1
    self.on_epoch_end()


  def __len__(self):
      return int(np.ceil(len(self.data) / self.batch_size))

  def __getitem__(self, index):
    if (index + 1) * self.batch_size > len(self.indices):
        self.batch_size = len(self.indices) - index * self.batch_size
        # Generate one batch of data
        # Generate indices of the batch
    index = self.indices[index * self.batch_size: (index + 1) * self.batch_size]
    # Find list of IDs
    batch = [self.indices[k] for k in index]
    x, y = self.data_generation(batch)

    return x, y

  def on_epoch_end(self):

    """
    Updates indexes after each epoch
    """
    self.index = np.arange(len(self.indices))
    if self.shuffle == True:
        np.random.shuffle(self.index)

  def load(self, image_path, depth_map, mask):
    """Load input and target image."""

    image_ = cv2.imread(image_path)
    image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2RGB)
    image_ = cv2.resize(image_, self.dim)
    image_ = F.convert_image_dtype(torch.tensor(image_), torch.float32)

    depth_map = np.load(depth_map).squeeze()

    mask = np.load(mask)
    mask = mask > 0

    max_depth = min(300, np.percentile(depth_map, 99))
    depth_map = np.clip(depth_map, self.min_depth, max_depth)
    depth_map = np.log(depth_map, where=mask)

    depth_map = np.ma.masked_where(~mask, depth_map)

    depth_map = np.clip(depth_map, 0.1, np.log(max_depth))
    depth_map = cv2.resize(depth_map, self.dim)
    depth_map = np.expand_dims(depth_map, axis=2)
    depth_map = F.convert_image_dtype(torch.tensor(depth_map), torch.float32)

    return image_, depth_map

  def data_generation(self, batch):

    x = np.empty((self.batch_size, *self.dim, self.n_channels))
    y = np.empty((self.batch_size, *self.dim, 1))

    for i, batch_id in enumerate(batch):
        x[i,], y[i,] = self.load(
            self.data["image"][batch_id],
            self.data["depth"][batch_id],
            self.data["mask"][batch_id],
        )

    return x, y


## Model define (UNet)

In [9]:
class DoubleConv(nn.Module):
  """(convolution => [BN] => ReLU) * 2"""

  def __init__(self, in_channels, out_channels, mid_channels=None):
    super().__init__()
    if not mid_channels:
        mid_channels = out_channels
    self.double_conv = nn.Sequential(
        nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
        nn.BatchNorm2d(mid_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )

  def forward(self, x):
    return self.double_conv(x)


class Down(nn.Module):
  """Downscaling with maxpool then double conv"""

  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.maxpool_conv = nn.Sequential(
        nn.MaxPool2d(2),
        DoubleConv(in_channels, out_channels)
    )

  def forward(self, x):
    return self.maxpool_conv(x)


class Up(nn.Module):
  """Upscaling then double conv"""

  def __init__(self, in_channels, out_channels, bilinear=True):
    super().__init__()

    # if bilinear, use the normal convolutions to reduce the number of channels
    if bilinear:
        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
    else:
        self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
        self.conv = DoubleConv(in_channels, out_channels)

  def forward(self, x1, x2):
    x1 = self.up(x1)
    # input is CHW
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]

    x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                    diffY // 2, diffY - diffY // 2])
    # if you have padding issues, see
    # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
    # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
    x = torch.cat([x2, x1], dim=1)
    return self.conv(x)


class OutConv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super(OutConv, self).__init__()
    self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

  def forward(self, x):
    return self.conv(x)

In [10]:
class UNet(nn.Module):
  def __init__(self, n_channels, n_classes, bilinear=False):
    super(UNet, self).__init__()
    self.n_channels = n_channels
    self.n_classes = n_classes
    self.bilinear = bilinear

    self.inc = DoubleConv(n_channels, 64)
    self.down1 = Down(64, 128)
    self.down2 = Down(128, 256)
    self.down3 = Down(256, 512)
    factor = 2 if bilinear else 1
    self.down4 = Down(512, 1024 // factor)
    self.up1 = Up(1024, 512 // factor, bilinear)
    self.up2 = Up(512, 256 // factor, bilinear)
    self.up3 = Up(256, 128 // factor, bilinear)
    self.up4 = Up(128, 64, bilinear)
    self.outc = OutConv(64, n_classes)

  def forward(self, x):
    x1 = self.inc(x)
    x2 = self.down1(x1)
    x3 = self.down2(x2)
    x4 = self.down3(x3)
    x5 = self.down4(x4)
    x = self.up1(x5, x4)
    x = self.up2(x, x3)
    x = self.up3(x, x2)
    x = self.up4(x, x1)
    logits = self.outc(x)
    return logits

## parameter and loadData

In [11]:
HEIGHT = 256
WIDTH = 256
LR = 0.0002
EPOCHS = 30
BATCH_SIZE = 8

In [12]:
train_loader = DataGenerator(
    data=df[:260].reset_index(drop="true"), batch_size=BATCH_SIZE, dim=(HEIGHT, WIDTH)
)

validation_loader = DataGenerator(
    data=df[260:].reset_index(drop="true"), batch_size=BATCH_SIZE, dim=(HEIGHT, WIDTH)
)


In [13]:
amp = False
net = UNet(n_channels=3, n_classes=5)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(net.parameters())
grad_scaler = torch.cuda.amp.GradScaler(enabled=amp)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net.to(device)
epochs = 10
n_train = 260

## Train

In [23]:
for epoch in range(10):
    net.train()
    epoch_loss = 0
    with tqdm(total=n_train, desc=f'Epoch {epoch}/{epochs}', unit='img') as pbar:
        for batch in train_loader:
            images,true_masks = batch
        with torch.cuda.amp.autocast(enabled=amp):
            masks_pred = net(images)
            loss = criterion(masks_pred, true_masks)
            optimizer.zero_grad(set_to_none=True)
            grad_scaler.scale(loss).backward()
            grad_scaler.step(optimizer)
            grad_scaler.update()
        print("over")

over
over
over
over
over
over
over
over
over
over


## For Test

In [14]:
image,true_mskk = train_loader[0]
image = torch.tensor(image,device=device)
image = image.permute(0,3,1,2).float()

  

In [15]:
net.train()
pred = net(image)
pred.shape



torch.Size([8, 5, 256, 256])

In [18]:
true_mskk = torch.tensor(true_mskk,device=device).permute(0,3,1,2).float()
loss

RuntimeError: ignored

In [19]:
true_mskk.shape

torch.Size([8, 1, 256, 256])