# Y-net for depth estimation

### This notebook contains code to run a new depth estimation model called Y-net
Done By:
Chandravaran Kunjeti
Saikumar Dande

In [None]:
!pip install albumentations==0.4.6

Collecting albumentations==0.4.6
  Downloading albumentations-0.4.6.tar.gz (117 kB)
[K     |████████████████████████████████| 117 kB 11.6 MB/s 
Collecting imgaug>=0.4.0
  Downloading imgaug-0.4.0-py2.py3-none-any.whl (948 kB)
[K     |████████████████████████████████| 948 kB 36.9 MB/s 
Building wheels for collected packages: albumentations
  Building wheel for albumentations (setup.py) ... [?25l[?25hdone
  Created wheel for albumentations: filename=albumentations-0.4.6-py3-none-any.whl size=65172 sha256=1c460bcf785b694acd246645a7bdc119e7e9480145fa4c36d4e1f4250eff73cf
  Stored in directory: /root/.cache/pip/wheels/cf/34/0f/cb2a5f93561a181a4bcc84847ad6aaceea8b5a3127469616cc
Successfully built albumentations
Installing collected packages: imgaug, albumentations
  Attempting uninstall: imgaug
    Found existing installation: imgaug 0.2.9
    Uninstalling imgaug-0.2.9:
      Successfully uninstalled imgaug-0.2.9
  Attempting uninstall: albumentations
    Found existing installation: albu

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Neural\ Network\ Project

/content/drive/MyDrive/Neural Network Project


In [None]:
from DataLoader import TransposeDepthInput, NYUDataset, save_checkpoint, get_loaders, save_predictions_as_imgs
from metrics import ScaleInvariantLoss, threeshold_percentage, rmse_linear, rmse_log, abs_relative_difference, squared_relative_difference

In [None]:
import torch
from Ynet import YNET

In [None]:
def test():
    image = torch.randn((3, 3, 120, 160))
    gradient = torch.randn((3, 2, 120, 160))
    model = YNET(in_channels1=3, in_channels2=2, out_channels=1)
    preds = model(image, gradient)
    print("Input shape\t:", image.shape)
    print("Gradient shape\t:", gradient.shape)
    print("Output shape\t:", preds.shape)
    assert preds.shape[2:] == image.shape[2:]

test()

Input shape	: torch.Size([3, 3, 120, 160])
Gradient shape	: torch.Size([3, 2, 120, 160])
Output shape	: torch.Size([3, 1, 120, 160])


In [None]:
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import math
import torchvision
import torchvision.transforms as transforms

# Hyperparameters etc.
LEARNING_RATE = 1e-4
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
NUM_EPOCHS = 100
NUM_WORKERS = 16
PIN_MEMORY = True
LOAD_MODEL = False
TRAIN_IMG_DIR = "Datasets/Train/images/"
TRAIN_DEPTH_DIR = "Datasets/Train/depths/"
VAL_IMG_DIR = "Datasets/Validation/images/"
VAL_DEPTH_DIR = "Datasets/Validation/depths/"
TEST_IMG_DIR = "Datasets/Test/images/"
TEST_DEPTH_DIR = "Datasets/Test/depths/"

IMAGE_HEIGHT = 120
IMAGE_WIDTH = 160

MODEL_NAME = 'Ynet_model'
MODEL_SAVE_DIR = "Models/Ynet/checkpoint/"
MODEL_LOAD_PATH = "Models/Ynet/checkpoint/" + MODEL_NAME + "_10.pth.tar"
VALIDATION_IMAGES_SAVE_DIR = "Models/Ynet/validation_outputs/"

dtype=torch.cuda.FloatTensor

def train_unet(loader, model, optimizer, loss_fn, scaler):
    # loop = tqdm(loader)

    train_loss = 0
    for batch_idx, (data, gradient, targets) in enumerate(loader):
        data = data.to(device=DEVICE)
        gradient = gradient.to(device=DEVICE)
        targets = targets.to(device=DEVICE)

        # forward
        predictions = model(data.type(dtype), gradient.type(dtype))
        loss = loss_fn(predictions, targets)

        train_loss += loss.item()
        
        # backward
        optimizer.zero_grad()
        
        loss.backward()
        optimizer.step()

        # scaler.scale(loss).backward()
        # scaler.step(optimizer)
        # scaler.update()

        # update tqdm loop
        # loop.set_postfix(loss=loss.item())

    train_loss /= (batch_idx + 1)
    return train_loss

def validate_unet(loader, model, loss_fn, epoch, train_loss, save_folder):
  # loop = tqdm(loader)

  validation_loss = 0
  scale_invariant_loss = 0
  delta1_accuracy = 0
  delta2_accuracy = 0
  delta3_accuracy = 0
  rmse_linear_loss = 0
  rmse_log_loss = 0
  abs_relative_difference_loss = 0
  squared_relative_difference_loss = 0

  model.eval()
  for batch_idx, (data, gradient, targets) in enumerate(loader):
      data = data.to(device=DEVICE)
      gradient = gradient.to(device=DEVICE)
      targets = targets.to(device=DEVICE)

      with torch.no_grad():
        predictions = model(data.type(dtype), gradient.type(dtype))
        loss = loss_fn(predictions, targets)
      
      validation_loss += loss.item()

      # Error function
      scale_invariant_loss += loss_fn(predictions, targets)
      delta1_accuracy += threeshold_percentage(predictions, targets, 1.25)
      delta2_accuracy += threeshold_percentage(predictions, targets, 1.25*1.25)
      delta3_accuracy += threeshold_percentage(predictions, targets, 1.25*1.25*1.25)
      rmse_linear_loss += rmse_linear(predictions, targets)
      rmse_log_loss += rmse_log(predictions, targets)
      abs_relative_difference_loss += abs_relative_difference(predictions, targets)
      squared_relative_difference_loss += squared_relative_difference(predictions, targets)

      # Saving output depths
      targets -= torch.min(targets)
      targets = targets/torch.max(targets)

      predictions -= torch.min(predictions)
      predictions = predictions/torch.max(predictions)

      torchvision.utils.save_image(predictions, f"{save_folder}/pred_{batch_idx}.png")
      torchvision.utils.save_image(targets, f"{save_folder}{batch_idx}.png")
      
      # update tqdm loop
      # loop.set_postfix(validation_loss=loss.item())
  
  validation_loss /= (batch_idx + 1)
  delta1_accuracy /= (batch_idx + 1)
  delta2_accuracy /= (batch_idx + 1)
  delta3_accuracy /= (batch_idx + 1)
  rmse_linear_loss /= (batch_idx + 1)
  rmse_log_loss /= (batch_idx + 1)
  abs_relative_difference_loss /= (batch_idx + 1)
  squared_relative_difference_loss /= (batch_idx + 1)

  print('Epoch: {}    {:.4f}      {:.4f}      {:.4f}      {:.4f}      {:.4f}      {:.4f}      {:.4f}      {:.4f}      {:.4f}'.format(epoch, train_loss, 
        validation_loss, delta1_accuracy, delta2_accuracy, delta3_accuracy, rmse_linear_loss, rmse_log_loss, 
        abs_relative_difference_loss, squared_relative_difference_loss))
  
  model.train()
  return validation_loss

def main():
    rgb_data_transforms = transforms.Compose([
        transforms.Resize((IMAGE_HEIGHT, IMAGE_WIDTH)),
        transforms.ToTensor(),
    ])

    depth_data_transforms = transforms.Compose([
        TransposeDepthInput(),
    ])

    train_loader, val_loader, test_loader = get_loaders(
          TRAIN_IMG_DIR,
          TRAIN_DEPTH_DIR,
          VAL_IMG_DIR,
          VAL_DEPTH_DIR,
          TEST_IMG_DIR,
          TEST_DEPTH_DIR,
          BATCH_SIZE,
          rgb_data_transforms,
          depth_data_transforms,
          NUM_WORKERS,
          PIN_MEMORY,
    )

    model = YNET(in_channels1=3, in_channels2=2, out_channels=1).to(DEVICE)
    loss_fn = ScaleInvariantLoss
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    # scaler = torch.cuda.amp.GradScaler()
    scaler = None
    
    train_losses, validation_losses = [], []

    if LOAD_MODEL:
      print("=> Loading Chekpoint")
      checkpoint = torch.load(MODEL_LOAD_PATH)
      model.load_state_dict(checkpoint["state_dict"])
      train_losses = checkpoint["train_losses"]
      validation_losses = checkpoint["validation_losses"]
      print("=> Checkpoint Loaded")

    print("********* Training the Unet Model **************")
    print("Epochs:     Train_loss  Val_loss    Delta_1     Delta_2     Delta_3    rmse_lin    rmse_log    abs_rel.  square_relative")
    print("Paper Val:                          (0.618)     (0.891)     (0.969)     (0.871)     (0.283)     (0.228)     (0.223)")
    
    for epoch in range(1, NUM_EPOCHS+1):
        train_loss = train_unet(train_loader, model, optimizer, loss_fn, scaler)
        validation_loss = validate_unet(val_loader, model, loss_fn, epoch, train_loss, save_folder=VALIDATION_IMAGES_SAVE_DIR)

        train_losses.append(train_loss)
        validation_losses.append(validation_loss)

        if epoch % 10 == 0:
          # save model
          checkpoint = {
              "state_dict": model.state_dict(),
              "train_losses": train_losses,
              "validation_losses": validation_losses,
          }
          save_path = MODEL_SAVE_DIR + MODEL_NAME + '_' + str(epoch) + '.pth.tar'
          save_checkpoint(checkpoint, save_path)

    print()

In [None]:
import warnings
warnings.filterwarnings("ignore")
main()

********* Training the Unet Model **************
Epochs:     Train_loss  Val_loss    Delta_1     Delta_2     Delta_3    rmse_lin    rmse_log    abs_rel.  square_relative
Paper Val:                          (0.618)     (0.891)     (0.969)     (0.871)     (0.283)     (0.228)     (0.223)
Epoch: 1    0.3414      0.1909      0.2447      0.5531      0.8189      1.1960      0.2718      0.4093      0.6139
Epoch: 2    0.2244      0.1835      0.2566      0.5787      0.8241      1.1501      0.2617      0.4007      0.5793
Epoch: 3    0.1929      0.1279      0.4537      0.7793      0.9304      0.9087      0.1581      0.3432      0.4893
Epoch: 4    0.1780      0.1344      0.4015      0.7326      0.9181      0.9515      0.1770      0.3516      0.5106
Epoch: 5    0.1662      0.1101      0.5621      0.8574      0.9590      0.7870      0.1261      0.3431      0.5537
Epoch: 6    0.1571      0.1189      0.5442      0.8468      0.9549      0.8475      0.1371      0.3882      0.7987
Epoch: 7    0.1506      

### **Testing**

In [None]:
%cd /content/drive/MyDrive/Neural\ Network\ Project

/content/drive/MyDrive/Neural Network Project


In [None]:
import os
import cv2
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
import torch
from Ynet import YNET
from DataLoader import TransposeDepthInput, NYUDataset, save_checkpoint, get_loaders, save_predictions_as_imgs
from metrics import ScaleInvariantLoss, threeshold_percentage, rmse_linear, rmse_log, abs_relative_difference, squared_relative_difference

In [None]:
IMAGE_HEIGHT = 120
IMAGE_WIDTH = 160

rgb_data_transforms = transforms.Compose([
    transforms.Resize((IMAGE_HEIGHT, IMAGE_WIDTH)),
    transforms.ToTensor(),
])

In [None]:
import matplotlib.pyplot as plt

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
YNET_MODEL_PATH = "Models/Ynet/checkpoint/Ynet_model_100.pth.tar"
TRAIN_SAVE_PATH = "Models/Ynet/predictions/Train/"
VAL_SAVE_PATH = "Models/Ynet/predictions/Validation/"
TEST_SAVE_PATH = "Models/Ynet/predictions/Test/"

TRAIN_IMG_DIR = "Datasets/Train/images/"
TRAIN_DEPTH_DIR = "Datasets/Train/depths/"
VAL_IMG_DIR = "Datasets/Validation/images/"
VAL_DEPTH_DIR = "Datasets/Validation/depths/"
TEST_IMG_DIR = "Datasets/Test/images/"
TEST_DEPTH_DIR = "Datasets/Test/depths/"

model = YNET(in_channels1=3, in_channels2=2, out_channels=1).to(DEVICE)

# Loading Unet model
checkpoint = torch.load(YNET_MODEL_PATH)
model.load_state_dict(checkpoint["state_dict"])

<All keys matched successfully>

In [None]:
def Save_Predictions(image_dir, depth_dir, save_dir):
    model.eval()
    for image_name in os.listdir(image_dir):
        # Load the image and dpeth
        image = cv2.imread(image_dir + image_name, cv2.IMREAD_UNCHANGED)
        depth = cv2.imread(depth_dir+ image_name, cv2.IMREAD_UNCHANGED)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        image = rgb_data_transforms(image)

        # Find the gradient
        gray = np.moveaxis(image.numpy(), [0, 1, 2], [2, 0, 1])
        gray = cv2.cvtColor(gray, cv2.COLOR_BGR2GRAY)
        gx = cv2.Sobel(gray, ddepth = cv2.CV_32F, dx=1, dy=0, ksize=3)
        gy = cv2.Sobel(gray, ddepth = cv2.CV_32F, dx=0, dy=1, ksize=3)
        gradient = torch.from_numpy(np.stack([gx, gy]))

        image = torch.unsqueeze(image, 0)
        gradient = torch.unsqueeze(gradient, 0)

        # Predict the output
        image = image.to(device=DEVICE)
        gradient = gradient.to(device=DEVICE)
        with torch.no_grad():
            predicted = model(image, gradient)

        image = image.cpu()
        predicted = predicted.cpu()

        input_image = np.zeros((120, 160, 3), dtype=np.float32)
        input_image[:, :, 0] = image[0, 0, :, :]
        input_image[:, :, 1] = image[0, 1, :, :]
        input_image[:, :, 2] = image[0, 2, :, :]
        predicted = predicted[0, 0, :, :]

        fig = plt.figure(figsize=(14, 6))

        ax = fig.add_subplot(1, 3, 1)
        ax.set_title('Input image')
        plt.imshow(input_image)
        ax = fig.add_subplot(1, 3, 2)
        ax.set_title('Ground truth')
        plt.imshow(depth, cmap='gist_gray')    #plt.imshow(actual_depth, cmap='jet')
        ax = fig.add_subplot(1, 3, 3)
        ax.set_title('Ynet predicted')
        plt.imshow(predicted, cmap='gist_gray')
        plt.savefig(f'{save_dir}/{image_name}')
        plt.close(fig)
    model.train()

In [None]:
Save_Predictions(VAL_IMG_DIR, VAL_DEPTH_DIR, VAL_SAVE_PATH)

In [None]:
Save_Predictions(TRAIN_IMG_DIR, TRAIN_DEPTH_DIR, TRAIN_SAVE_PATH)

In [None]:
Save_Predictions(TEST_IMG_DIR, TEST_DEPTH_DIR, TEST_SAVE_PATH)

### **Time taken**

In [None]:
import time
model.eval()
start_time = time.time()
num_images = 50
total_time = 0
for i in range(num_images):
    image = cv2.imread(TRAIN_IMG_DIR + str(i) + '.png', cv2.IMREAD_UNCHANGED)
    depth = cv2.imread(TRAIN_DEPTH_DIR + str(i) + '.png', cv2.IMREAD_UNCHANGED)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = Image.fromarray(image)
    image = rgb_data_transforms(image)

    # Find the gradient
    gray = np.moveaxis(image.numpy(), [0, 1, 2], [2, 0, 1])
    gray = cv2.cvtColor(gray, cv2.COLOR_BGR2GRAY)
    gx = cv2.Sobel(gray, ddepth = cv2.CV_32F, dx=1, dy=0, ksize=3)
    gy = cv2.Sobel(gray, ddepth = cv2.CV_32F, dx=0, dy=1, ksize=3)
    gradient = torch.from_numpy(np.stack([gx, gy]))

    image = torch.unsqueeze(image, 0)
    gradient = torch.unsqueeze(gradient, 0)

    # Predict the output
    image = image.to(device=DEVICE)
    gradient = gradient.to(device=DEVICE)
    with torch.no_grad():
        start_time1 = time.time()
        predicted = model(image, gradient)
        end_time1 = time.time()
    total_time += (end_time1-start_time1)
end_time = time.time()
model.train()
print('Time taken:', (end_time-start_time)/num_images)
print('Time taken:', (total_time)/num_images)

Time taken: 0.11453585147857666
Time taken: 0.010718274116516113


### **Model summary**

In [None]:
print(model)

YNET(
  (ups): ModuleList()
  (downConvs1): DownConv(
    (downs): ModuleList(
      (0): DoubleConv(
        (conv): Sequential(
          (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
          (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (5): ReLU(inplace=True)
        )
      )
      (1): DoubleConv(
        (conv): Sequential(
          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
          (4): BatchNorm2d(128, eps=1e-05, mome

In [None]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(model)

+-------------------------------------------+------------+
|                  Modules                  | Parameters |
+-------------------------------------------+------------+
|      downConvs1.downs.0.conv.0.weight     |    1728    |
|      downConvs1.downs.0.conv.1.weight     |     64     |
|       downConvs1.downs.0.conv.1.bias      |     64     |
|      downConvs1.downs.0.conv.3.weight     |   36864    |
|      downConvs1.downs.0.conv.4.weight     |     64     |
|       downConvs1.downs.0.conv.4.bias      |     64     |
|      downConvs1.downs.1.conv.0.weight     |   73728    |
|      downConvs1.downs.1.conv.1.weight     |    128     |
|       downConvs1.downs.1.conv.1.bias      |    128     |
|      downConvs1.downs.1.conv.3.weight     |   147456   |
|      downConvs1.downs.1.conv.4.weight     |    128     |
|       downConvs1.downs.1.conv.4.bias      |    128     |
|      downConvs1.downs.2.conv.0.weight     |   294912   |
|      downConvs1.downs.2.conv.1.weight     |    256    

58156705