In [2]:
!pip install datasets
from datasets import load_dataset

Collecting datasets
  Using cached datasets-2.15.0-py3-none-any.whl (521 kB)
Collecting pyarrow-hotfix (from datasets)
  Using cached pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Using cached dill-0.3.7-py3-none-any.whl (115 kB)
Collecting multiprocess (from datasets)
  Using cached multiprocess-0.70.15-py310-none-any.whl (134 kB)
Installing collected packages: pyarrow-hotfix, dill, multiprocess, datasets
Successfully installed datasets-2.15.0 dill-0.3.7 multiprocess-0.70.15 pyarrow-hotfix-0.6


In [3]:
import numpy as np
import cv2
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from PIL import Image
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import h5py
import matplotlib.pyplot as plt
import numpy as np

In [4]:
import yaml

In [5]:
parent_path = "drive/MyDrive/M202A/"

Hyperparameter Setup and other banal things

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


Importing Data into Disk

In [7]:
from datasets import load_from_disk
ds = load_from_disk(parent_path+"NYU/")


Loading Testing Dataset

In [8]:
path_to_dataset = "drive//MyDrive//M202A//nyu_depth_v2_labeled.mat"

def extract_files(config):
  path = config["path"]
  f = h5py.File(path_to_dataset)
  length = len(f["images"])
  indices = np.random.randint(0,length,size=length//3)
  batch_size = length//3
  color = np.zeros((batch_size,480,640,3))
  rawdepth = np.zeros((batch_size,480,640))
  truedepth = np.zeros_like(rawdepth)
  for i in tqdm(range(0,len(indices))):
    j = indices[i]
    # read 0-th image. original format is [3 x 640 x 480], uint8
    img = f['images'][j].astype(np.uint8)
    img = img.transpose(2,1,0)
    color[i,...] = img
    rawD = f['rawDepths'][j]
    rawdepth[i,...] = rawD.T.astype(np.float32)
    depth_from_disk = f["depths"][j].astype(np.float32)
    truedepth[i,...] = depth_from_disk.T
  config["color"] = color
  config["depth"] = truedepth
  config["rawDepth"] = rawdepth
config={"path":path_to_dataset}
extract_files(config)

100%|██████████| 483/483 [01:11<00:00,  6.79it/s]


In [9]:
rgb_config = {"R_u":-0.00776206,"R_w":0.01519309,"Rvar":6.88208756e-06,
              "G_u":0.01147944,"G_w":0.50978071,"Gvar":9.19419688e-06,
              "B_u":0.00371937,"B_w":0.47502621,"Bvar":6.89479643e-06}

# Depth Noise based on the Channels, readings taken @0.5m

In [10]:
print(len(ds['train']))
ds.set_format("torch", device=device)
ds.shuffle()

47584


DatasetDict({
    train: Dataset({
        features: ['image', 'depth_map'],
        num_rows: 47584
    })
    validation: Dataset({
        features: ['image', 'depth_map'],
        num_rows: 654
    })
})

Loading Pretrained Unet for Depth

In [11]:
with open(parent_path+'config.yaml', 'r') as file:
    model_configs = yaml.safe_load(file)
model_configs['model']['channels']=1
model_configs['model']['base filters']=16

In [12]:
from model import *

Creating New Network by mixing the two

In [13]:
class Resnet_UNet(nn.Module):
  """
  Residual-Dense U-net for image denoising.
  """
  def __init__(self,**kwargs):
      super().__init__()
      channels = kwargs['channels']
      filters_0 = kwargs['base filters']
      filters_1 = 2 * filters_0
      filters_2 = 4 * filters_0
      filters_3 = 8 * filters_0

      # Encoder:
      # Level 0:
      self.drop = nn.Dropout(p=0.5)
      self.input_block = InputBlock(4, filters_0)
      self.block_0_0 = DenoisingBlock(filters_0, filters_0 // 2, filters_0)
      self.block_0_1 = DenoisingBlock(filters_0, filters_0 // 2, filters_0)
      self.down_0 = DownsampleBlock(filters_0, filters_1)

      # Level 1:
      self.block_1_0 = DenoisingBlock(filters_1, filters_1 // 2, filters_1)
      self.block_1_1 = DenoisingBlock(filters_1, filters_1 // 2, filters_1)
      self.down_1 = DownsampleBlock(filters_1, filters_2)

      # Level 2:
      self.block_2_0 = DenoisingBlock(filters_2, filters_2 // 2, filters_2)
      self.block_2_1 = DenoisingBlock(filters_2, filters_2 // 2, filters_2)
      self.down_2 = DownsampleBlock(filters_2, filters_3)

      # Level 3 (Bottleneck)
      self.block_3_0 = DenoisingBlock(filters_3, filters_3 // 2, filters_3)
      self.block_3_1 = DenoisingBlock(filters_3, filters_3 // 2, filters_3)

      # Decoder
      # Level 2:
      self.up_2 = UpsampleBlock(filters_3, filters_2, filters_2)
      self.block_2_2 = DenoisingBlock(filters_2, filters_2 // 2, filters_2)
      self.block_2_3 = DenoisingBlock(filters_2, filters_2 // 2, filters_2)

      # Level 1:
      self.up_1 = UpsampleBlock(filters_2, filters_1, filters_1)
      self.block_1_2 = DenoisingBlock(filters_1, filters_1 // 2, filters_1)
      self.block_1_3 = DenoisingBlock(filters_1, filters_1 // 2, filters_1)

      # Level 0:
      self.up_0 = UpsampleBlock(filters_1, filters_0, filters_0)
      self.block_0_2 = DenoisingBlock(filters_0, filters_0 // 2, filters_0)
      self.block_0_3 = DenoisingBlock(filters_0, filters_0 // 2, filters_0)

      self.output_block = OutputBlock(filters_0, channels)


  def forward(self, inputs):
      inputs = self.drop(inputs)
      out_0 = self.input_block(inputs)    # Level 0
      out_0 = self.block_0_0(out_0)
      out_0 = self.block_0_1(out_0)

      out_1 = self.down_0(out_0)          # Level 1
      out_1 = self.block_1_0(out_1)
      out_1 = self.block_1_1(out_1)

      out_2 = self.down_1(out_1)          # Level 2
      out_2 = self.block_2_0(out_2)
      out_2 = self.block_2_1(out_2)

      out_3 = self.down_2(out_2)          # Level 3 (Bottleneck)

      out_3 = self.block_3_0(out_3)
      out_3 = self.block_3_1(out_3)



      out_4 = self.up_2([out_3, out_2])   # Level 2
      out_4 = self.block_2_2(out_4)
      out_4 = self.block_2_3(out_4)

      out_5 = self.up_1([out_4, out_1])   # Level 1
      out_5 = self.block_1_2(out_5)
      out_5 = self.block_1_3(out_5)

      out_6 = self.up_0([out_5, out_0])   # Level 0
      out_6 = self.block_0_2(out_6)
      out_6 = self.block_0_3(out_6)

      return self.output_block(out_6), out_3


Loading Optimizers, Schedulers and Training the network

In [14]:
train_data = ds['train']
val_data = ds['validation']

In [15]:
from tqdm import tqdm
from datetime import datetime
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

```
Sanity Testing the Segmentaion Loss before plugging it into the loss model
```

In [19]:
def add_noise_inloop(depth,cfg=rgb_config):
  noise  =(torch.randn(depth.shape,device=device))*1e-3
  noise[:,1:,:,:] = 0
  depth = depth+noise
  return depth, noise[:,0,:,:].unsqueeze(1)

In [20]:
def test_loop():
  error = []
  color=config["color"]
  truedepth = config["depth"]
  rawdepth= config["rawDepth"]
  length = len(color)
  with torch.no_grad():
    for i in (range(length)):
      depth_img = torch.tensor(rawdepth[i],dtype=torch.float32).to(device).reshape(1,1,480,640)
      color_img  = torch.tensor(color[i],dtype=torch.float32).to(device).permute(2,0,1).unsqueeze(0)/255
      stacked = torch.hstack((depth_img,color_img))
      Z,n= add_noise_inloop(stacked,rgb_config)
      noise_estimate = ResUnet(Z)
      denoised_depth = (Z[:,0,:,:].unsqueeze(1) - n_estimate)
      mask =depth_img> 0
      masked_denoised_depth =denoised_depth*mask
      gt = truedepth[i]
      gt_tensor = torch.tensor(gt,dtype=torch.float).to(device)
      loss = (masked_denoised_depth-gt_tensor*mask).to("cpu").detach().numpy()
      error.append(loss)
    mae = [np.mean(abs(i)) for i in error]
    rmse = [np.sqrt(np.mean(i**2)) for i in error]
    print("Average mean squared error is ",np.mean(mae)*1000,"mm")
    print("Average root mean squared error is ",np.mean(rmse)*1000,"mm")

In [21]:
mse_loss = nn.MSELoss()
n_epochs = 5
lr = 1e-4
best_vloss = 1_000_000.
ResUnet = Resnet_UNet(**model_configs['model'])
ResUnet.to(device)
bs = 4
freq = bs * 200;
optimizer= torch.optim.AdamW(filter(lambda p:p.requires_grad,ResUnet.parameters()),lr=1e-3,weight_decay = 1e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
for epoch in (range(n_epochs)):
  train_data.shuffle()
  val_data.shuffle()

  running_loss = 0

  ## Training Loop
  for i in tqdm(range(0,len(train_data),bs)):
    with torch.autocast(device_type='cuda', dtype=torch.float16):
      optimizer.zero_grad()
      color = train_data[i:i+bs]['image'].permute(0,3,1,2)/255 ## Channels first and normalizing to 0 and 1
      depth = train_data[i:i+bs]['depth_map'].unsqueeze(0).permute(1,0,2,3)
      stacked = torch.hstack((depth,color))
      Z,n = add_noise_inloop(stacked,rgb_config)
      n_estimate,denoised_embedding = ResUnet(Z)

      _,true_encoding = ResUnet(stacked)
      denoised_depth_estimate = (Z[:,0,:,:].unsqueeze(1) - n_estimate)
      loss = mse_loss(depth,denoised_depth_estimate)
      loss.backward(retain_graph=True)
      loss = mse_loss(true_encoding,denoised_embedding)
      loss.backward()
      optimizer.step()
      running_loss+=loss.item()
      if(i%freq==0 and i!=0):
        avg_loss = running_loss/freq
        running_loss = 0
        running_vloss = 0;
        with torch.no_grad():
          for count in range(len(val_data)):
            optimizer.zero_grad()
            color = train_data[i]['image'].permute(2,0,1).unsqueeze(0)/255 ## Channels first and normalizing to 0 and 1
            depth = train_data[i]['depth_map'].reshape(1,1,480,640)
            stacked = torch.hstack((depth,color))
            Z,n = add_noise_inloop(stacked,rgb_config)
            n_estimate,_ = ResUnet(Z)
            denoised_depth_estimate = (Z[:,0,:,:].unsqueeze(1) - n_estimate)
            loss = mse_loss(depth,denoised_depth_estimate)
            running_vloss+=loss.item()
          avg_vloss = running_vloss/len(val_data)
          print("Loss train {} valid {}".format(avg_loss,avg_vloss))
          if avg_loss < best_vloss:
            best_vloss = avg_loss
            model_path = 'model_{}_{}'.format(timestamp, i)
            torch.save(ResUnet.state_dict(), parent_path+"Autoencoder_AWGNMSE.pth")
        test_loop()
        scheduler.step()


  2%|▏         | 200/11896 [03:04<3:01:40,  1.07it/s]

Loss train 7.3315465419909745e-06 valid 0.00038551513100468725


  2%|▏         | 201/11896 [04:06<62:03:06, 19.10s/it]

Average mean squared error is  16.728054732084274 mm
Average root mean squared error is  35.343047231435776 mm


  2%|▏         | 211/11896 [04:16<3:56:54,  1.22s/it]


KeyboardInterrupt: ignored

Eval

In [None]:

error = []
color=config["color"]
truedepth = config["depth"]
rawdepth= config["rawDepth"]
length = len(color)
denoised_depth_images = np.zeros_like(color)
with torch.no_grad():
  for i in (range(length)):
    depth_img = torch.tensor(rawdepth[i],dtype=torch.float32).to(device).reshape(1,1,480,640)
    color_img  = torch.tensor(color[i],dtype=torch.float32).to(device).permute(2,0,1).unsqueeze(0)/255
    stacked = torch.hstack((depth_img,color_img))
    Z,n= add_noise_inloop(stacked,rgb_config)
    noise_estimate = ResUnet(Z)
    denoised_depth = (Z[:,0,:,:].unsqueeze(1) - n_estimate)
    mask =depth_img> 0
    masked_denoised_depth =denoised_depth*mask
    gt = truedepth[i]
    gt_tensor = torch.tensor(gt,dtype=torch.float).to(device)
    loss = (masked_denoised_depth-gt_tensor*mask).to("cpu").detach().numpy()
    denoised_depth_images[i,...] = denoised_depth.detach().to("cpu").numpy()
    error.append(loss)
mae = [np.mean(abs(i)) for i in error]
rmse = [np.sqrt(np.mean(i**2)) for i in error]
print("Average mean squared error is ",np.mean(mae)*1000,"mm")
print("Average root mean squared error is ",np.mean(rmse)*1000,"mm")
random_indice = np.random.randint(low=0,high = len(mae),size=5)
denoised_images = denoised_depth_images[random_indice]
mean_squared_error = (truedepth[random_indice] - denoised_depth_images[random_indice])**2
fig,axs = plt.subplots(5,3)
for i in range(0,5):
      axs[i,0].imshow(truedepth[random_indice[i]],cmap="jet")
      axs[i,1].imshow(denoised_depth_images[random_indice[i]],cmap="jet")
      axs[i,2].imshow(mean_squared_error[i],cmap='jet') ## There will be a band of error on the borders, because of how the sensor was setup
plt.show()