# Initialization

## Mount to drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/My\ Drive/VUB/Thesis

/content/drive/My Drive/VUB/Thesis


## Install dependencies

In [None]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Import Libraries

In [None]:
import sys
import array
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import json
import math
import gc
import os

os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
os.environ['DISPLAY']="0.0"
import cv2
import open3d as o3d

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import RandomSampler
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

from torchmetrics.functional import peak_signal_noise_ratio, structural_similarity_index_measure, mean_squared_error

import OpenEXR
import Imath
from kornia.contrib.extract_patches import extract_tensor_patches
%env PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128

env: PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128


# Constants

In [None]:
DATA_ROOT_DIR_NAME = "./data"

TRAIN_DIR_NAME = DATA_ROOT_DIR_NAME + "/train"
VALID_DIR_NAME = DATA_ROOT_DIR_NAME + "/valid"
TEST_DIR_NAME = DATA_ROOT_DIR_NAME + "/test"

CLEAN_DIR_NAME =  "/clean"
NOISY_DIR_NAME = "/noisy"
INFO_DIR_NAME = "/info"

EXR_REGEX = "/depth_S{:06d}_C{:02d}.exr"
JSON_REGEX = "/info_S{:06d}_C{:02d}.json"

INTRINSIC = "intrinsic"
EXTRINSIC = "extrinsic"

CACHE_DIR_NAME = DATA_ROOT_DIR_NAME + "/cache"
TRAIN_CACHE_CLEAN = CACHE_DIR_NAME + "/train_clean.pickle"
VALID_CACHE_CLEAN = CACHE_DIR_NAME + "/valid_clean.pickle"
TEST_CACHE_CLEAN = CACHE_DIR_NAME + "/test_clean.pickle"
TRAIN_CACHE_NOISY = CACHE_DIR_NAME + "/train_noisy.pickle"
VALID_CACHE_NOISY = CACHE_DIR_NAME + "/valid_noisy.pickle"
TEST_CACHE_NOISY = CACHE_DIR_NAME + "/test_noisy.pickle"

TENSORBOARD = SummaryWriter()

SAVE_MODEL_NAME = 'unet_denoiser_model.pth'

NO_CAMERAS = 3
NO_SAMPLES = 1500
TRAIN_SAMPLES = int(0.6 * NO_SAMPLES * NO_CAMERAS) // 3
VALID_SAMPLES = int(0.2 * NO_SAMPLES * NO_CAMERAS) // 3
TEST_SAMPLES = int(0.2 * NO_SAMPLES * NO_CAMERAS) // 3

PATCH_SIZE = 64
STRIDE = PATCH_SIZE // 2
PADDING = 0
VALID_PATCH_THRESHOLD = (PATCH_SIZE * PATCH_SIZE) * 0.15
BATCH_SIZE = 128

# Data Gathering and Pre-Processing

## Pre-process data

In [None]:
# B = Batch size
# N = number of patches
# C = channels
# H = height
# W = width

def read_depth_image(file_path: str, scene: int, camera_no: int, clean:bool=True):
    # returns a tensor (C x H x W)
    depth_regex = file_path + CLEAN_DIR_NAME + EXR_REGEX if clean else file_path + NOISY_DIR_NAME + EXR_REGEX
    exr_file_name = depth_regex.format(scene, camera_no)
    depth = cv2.imread(exr_file_name, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
    return T.ToTensor()(depth)

def extract_samples(file_path: str, dataset: str, clean=True, start_range=None, end_range=None):
    samples = list()
    if start_range is None:
      start_range = 0 if dataset == 'train' else TRAIN_SAMPLES if dataset == 'valid' else TRAIN_SAMPLES + VALID_SAMPLES
    if end_range is None:
      end_range = TRAIN_SAMPLES if dataset == 'train' else TRAIN_SAMPLES + VALID_SAMPLES if dataset == 'valid' else NO_SAMPLES
    for scene in range(start_range, end_range):
        for camera in range(NO_CAMERAS):
            # Read each sample
            # (1024 x 1024) depth map size
            sample = read_depth_image(file_path, scene, camera, clean)
            # Add to final list
            samples.append(sample)
    return samples

def extract_valid_patches(patches):
    # patches = tensor (N x H x W)
    no_patches = patches.shape[0]
    valid_patches = list()
    for idx in range(no_patches):
        patch = patches[idx,:]
        if torch.count_nonzero(patch) > VALID_PATCH_THRESHOLD:
            valid_patches.append(patch)
    return valid_patches

def normalize_patch(patch):
    # patches = tensor (H x W)
    mask = patch > 0  # mask to only consider valid depth values
    valid_idx = torch.nonzero(patch)
    valid_values = patch[valid_idx[:,0], valid_idx[:,1]]
    std, mean = torch.std_mean(valid_values)
    eps = torch.tensor(1e-5)
    scale = torch.max(std, eps)
    patch = torch.where(patch > 0, (patch-mean)/scale, 0)
    return patch, scale

def normalize_patches(patches):
    # patches = tensor (N x H x W)
    normalized_patches = list()
    for patch in patches:
        normalized_patch = normalize_patch(patch)
        normalized_patches.append(normalized_patch)
    return normalized_patches

def extract_patches_from_sample(depth):
    # depth = tensor (C x H x W)
    # Split the depth map into patches
    depth = depth.unsqueeze(0) # add a dimention for B => (B x C x H x W)
    patches = extract_tensor_patches(depth, PATCH_SIZE, STRIDE, PADDING) # (B x N x C x H x W)
    patches = patches.squeeze(2) # remove channels
    patches = patches.squeeze(0) # remove batch size => (N x H x W)
    valid_patches = extract_valid_patches(patches)
    return valid_patches

def extract_patches(samples):
    final_patches = list()
    for sample in tqdm(samples):
        patches = extract_patches_from_sample(sample)
        final_patches.extend(patches)
    return final_patches

def compare_patches(patch, patch_nosisy):
    # Convert the depth map tensor to a NumPy array
    depth_map_np = patch.numpy()
    depth_map_np_n = patch_nosisy.numpy()

    # Display the depth map using Matplotlib
    f, axarr = plt.subplots(1, 2, figsize=(10,10))
    axarr[0].imshow(depth_map_np, cmap='inferno')
    axarr[1].imshow(depth_map_np_n, cmap='inferno')

def compare_patches_3(patch1, patch2, patch3):
    # Convert the depth map tensor to a NumPy array
    patch1_np = patch1.numpy()
    patch2_np = patch2.numpy()
    patch3_np = patch3.numpy()

    # Display the depth map using Matplotlib
    f, axarr = plt.subplots(1, 3, figsize=(10,10))
    axarr[0].imshow(patch1_np, cmap='inferno')
    axarr[0].set_title('Clean')
    axarr[1].imshow(patch2_np, cmap='inferno')
    axarr[1].set_title('Predicted')
    axarr[2].imshow(patch3_np, cmap='inferno')
    axarr[2].set_title('Noisy')

## Define dataset

In [None]:
class DepthMapDataset(Dataset):
    def __init__(self, file_path, dataset, start, end):
        self.clean_data = self.extract_data(file_path, dataset, True, start, end)
        self.noisy_data = self.extract_data(file_path, dataset, False, start, end)
    
    def extract_data(self, file_path, dataset, clean:bool, start=None, end=None):
        data = self.get_cached_files(dataset, clean)
        clean_noisy = 'clean' if clean else 'noisy'
        if data is not None:
            print(f"Found cached files for {clean_noisy} {dataset} dataset with {len(data)} elements.")
            return data
        samples = extract_samples(file_path, dataset, clean, start, end)
        patches = extract_patches(samples)
        self.cache_files(dataset, patches, clean)
        print(f"Cached files for {clean_noisy} {dataset} dataset.")
        return patches

    def cache_files(self, dataset, data, clean):
        if dataset == 'train':
            if clean:
                torch.save(data, TRAIN_CACHE_CLEAN)
            else:
                torch.save(data, TRAIN_CACHE_NOISY)
        elif dataset == 'valid':
            if clean:
                torch.save(data, VALID_CACHE_CLEAN)
            else:
                torch.save(data, VALID_CACHE_NOISY)
        elif dataset == 'test':
            if clean:
                torch.save(data, TEST_CACHE_CLEAN)
            else:
                torch.save(data, TEST_CACHE_NOISY)

    def get_cached_files(self, dataset, clean):
        data = None
        if dataset == 'train' and clean:
            data = torch.load(TRAIN_CACHE_CLEAN) if os.path.exists(TRAIN_CACHE_CLEAN) else None
        elif dataset == 'train' and not clean:
            data = torch.load(TRAIN_CACHE_NOISY) if os.path.exists(TRAIN_CACHE_NOISY) else None
        elif dataset == 'valid' and clean:
            data = torch.load(VALID_CACHE_CLEAN) if os.path.exists(VALID_CACHE_CLEAN) else None
        elif dataset == 'valid' and not clean:
            data = torch.load(VALID_CACHE_NOISY) if os.path.exists(VALID_CACHE_NOISY) else None
        elif dataset == 'test' and clean:
            data = torch.load(TEST_CACHE_CLEAN) if os.path.exists(TEST_CACHE_CLEAN) else None
        elif dataset == 'test' and not clean:
            data = torch.load(TEST_CACHE_NOISY) if os.path.exists(TEST_CACHE_NOISY) else None
        return data
        
    def __len__(self):
        return len(self.clean_data)
    
    def __getitem__(self, index):
        clean_patch = self.clean_data[index]
        noisy_patch = self.noisy_data[index]
        return clean_patch, noisy_patch

In [None]:
def get_loader(file_path=TRAIN_DIR_NAME, dataset='train', start=0, end=100, batch_size=32, num_workers=4, shuffle=False, pin_memory=True):
    dataset = DepthMapDataset(file_path, dataset, start, end)
    loader = DataLoader(
        dataset=dataset, 
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=shuffle, 
        pin_memory=pin_memory)
    
    return loader, dataset

In [None]:
train_loader, train_dataset = get_loader(file_path=TRAIN_DIR_NAME,
                                         dataset='train',
                                         start=0,
                                         end=120,
                                         batch_size=BATCH_SIZE, 
                                         shuffle=False)
valid_loader, valid_dataset = get_loader(file_path=VALID_DIR_NAME,
                                         dataset='valid',
                                         start=900,
                                         end=940,
                                         batch_size=BATCH_SIZE, 
                                         shuffle=False)
test_loader, test_dataset = get_loader(file_path=TEST_DIR_NAME,
                                       dataset='test',
                                       start=1200,
                                       end=1210,
                                       batch_size=BATCH_SIZE, 
                                       shuffle=False)

Found cached files for clean train dataset with 76948 elements.
Found cached files for noisy train dataset with 76948 elements.
Found cached files for clean valid dataset with 19543 elements.
Found cached files for noisy valid dataset with 19543 elements.
Found cached files for clean test dataset with 4034 elements.
Found cached files for noisy test dataset with 4034 elements.


# Model

## Hyperparameters

In [None]:
LEARNING_RATE = 8e-4
EPOCHS = 10
PRETRAIN_EPOCHS = 30

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Model Architecture

### Greedy Layer-Wise Unsupervised Pretraining 

In [None]:
class InputLayerPretrainingAE(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(InputLayerPretrainingAE, self).__init__()
        self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=3, padding=1)  
        self.conv_t = nn.ConvTranspose2d(ch_out, ch_in, kernel_size=3, padding=1)

    def forward(self, x):
        x = F.relu(self.conv(x))
        x = F.relu(self.conv_t(x))         
        return x

In [None]:
class InnnerLayer1PretrainingAE(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(InnnerLayer1PretrainingAE, self).__init__()
        ch_inner = ch_in // 2
        self.conv_1 = nn.Conv2d(ch_in, ch_inner, kernel_size=3, padding=1)
        self.conv_2 = nn.Conv2d(ch_in + ch_inner, ch_out, kernel_size=3, padding=1)
 
        self.conv_t_1 = nn.ConvTranspose2d(ch_out, ch_in + ch_inner, kernel_size=4, padding=1)
        self.conv_t_2 = nn.ConvTranspose2d(ch_in + ch_inner, ch_in, kernel_size=4, padding=1)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = F.relu(self.conv_2(x))
        x = F.relu(self.conv_t_1(x))
        x = F.relu(self.conv_t_2(x))
        return x

## Pretraining Input Layer

In [None]:
gc.collect()
torch.cuda.empty_cache()
torch.autograd.set_detect_anomaly(True)

# Define the denoising autoencoder model and optimizer
model = InputLayerPretrainingAE(1, 16).to(device)
print(f"Running on device {device} for InputLayerPretrainingAE")
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Define the loss function
criterion = nn.MSELoss()

# Train the denoising autoencoder
for epoch in range(PRETRAIN_EPOCHS):
    # Training
    train_loss = 0.0
    model.train()
    for idx, data in enumerate(tqdm(train_loader)):
        clean_patch, noisy_patch = data

        # Send data to cuda
        if torch.cuda.is_available():
            noisy_patch = noisy_patch.cuda()

        # Normalize pathces
        normalized_noisy_patch, scale = normalize_patch(noisy_patch)
        normalized_noisy_patch = normalized_noisy_patch.unsqueeze(1)

        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        output = model(normalized_noisy_patch)

        # De-normalize
        output = output.squeeze(1)
        output = torch.mul(output, scale)
        
        # Compute the loss
        loss = criterion(output, noisy_patch)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        train_loss = train_loss + loss.item()
    TENSORBOARD.add_scalar("Pre-training InputLayerPretrainingAE loss", train_loss, epoch)
    print(f"Epoch [{epoch+1}/{EPOCHS}]: Train Loss: {train_loss:.6f}")
TENSORBOARD.flush()
TENSORBOARD.close()

Running on device cuda:0 for InputLayerPretrainingAE


100%|██████████| 602/602 [00:05<00:00, 102.97it/s]


Epoch [1/10]: Train Loss: 158.772846


100%|██████████| 602/602 [00:05<00:00, 120.21it/s]


Epoch [2/10]: Train Loss: 88.060491


100%|██████████| 602/602 [00:05<00:00, 116.56it/s]


Epoch [3/10]: Train Loss: 85.419655


100%|██████████| 602/602 [00:05<00:00, 118.40it/s]


Epoch [4/10]: Train Loss: 83.646832


100%|██████████| 602/602 [00:05<00:00, 116.85it/s]


Epoch [5/10]: Train Loss: 82.206766


100%|██████████| 602/602 [00:04<00:00, 121.37it/s]


Epoch [6/10]: Train Loss: 80.974592


100%|██████████| 602/602 [00:04<00:00, 122.87it/s]


Epoch [7/10]: Train Loss: 79.959357


100%|██████████| 602/602 [00:05<00:00, 118.52it/s]


Epoch [8/10]: Train Loss: 79.098727


100%|██████████| 602/602 [00:05<00:00, 117.70it/s]


Epoch [9/10]: Train Loss: 78.300343


100%|██████████| 602/602 [00:05<00:00, 120.11it/s]


Epoch [10/10]: Train Loss: 77.478631


100%|██████████| 602/602 [00:05<00:00, 117.12it/s]


Epoch [11/10]: Train Loss: 76.718261


100%|██████████| 602/602 [00:05<00:00, 120.27it/s]


Epoch [12/10]: Train Loss: 76.152082


100%|██████████| 602/602 [00:05<00:00, 119.02it/s]


Epoch [13/10]: Train Loss: 75.633844


100%|██████████| 602/602 [00:05<00:00, 118.61it/s]


Epoch [14/10]: Train Loss: 75.148022


100%|██████████| 602/602 [00:04<00:00, 122.91it/s]


Epoch [15/10]: Train Loss: 74.706849


100%|██████████| 602/602 [00:05<00:00, 117.76it/s]


Epoch [16/10]: Train Loss: 74.293692


100%|██████████| 602/602 [00:05<00:00, 120.08it/s]


Epoch [17/10]: Train Loss: 73.907569


100%|██████████| 602/602 [00:04<00:00, 121.01it/s]


Epoch [18/10]: Train Loss: 73.544148


100%|██████████| 602/602 [00:05<00:00, 115.41it/s]


Epoch [19/10]: Train Loss: 73.208178


100%|██████████| 602/602 [00:05<00:00, 119.43it/s]


Epoch [20/10]: Train Loss: 72.886385


100%|██████████| 602/602 [00:05<00:00, 119.19it/s]


Epoch [21/10]: Train Loss: 72.582083


100%|██████████| 602/602 [00:05<00:00, 119.19it/s]


Epoch [22/10]: Train Loss: 72.293182


100%|██████████| 602/602 [00:04<00:00, 121.52it/s]


Epoch [23/10]: Train Loss: 72.018085


100%|██████████| 602/602 [00:05<00:00, 115.50it/s]


Epoch [24/10]: Train Loss: 71.756047


100%|██████████| 602/602 [00:05<00:00, 116.07it/s]


Epoch [25/10]: Train Loss: 71.504920


100%|██████████| 602/602 [00:05<00:00, 115.47it/s]


Epoch [26/10]: Train Loss: 71.264614


100%|██████████| 602/602 [00:05<00:00, 110.94it/s]


Epoch [27/10]: Train Loss: 71.033795


100%|██████████| 602/602 [00:05<00:00, 113.89it/s]


Epoch [28/10]: Train Loss: 70.812190


100%|██████████| 602/602 [00:05<00:00, 113.35it/s]


Epoch [29/10]: Train Loss: 70.598999


100%|██████████| 602/602 [00:05<00:00, 115.21it/s]

Epoch [30/10]: Train Loss: 70.393576





## Pretrain First Inner Layer

In [None]:
class PretrainedInputLayerDataset(Dataset):
    def __init__(self, dataset: DepthMapDataset):
        self.clean_data = self.transform_data(dataset.clean_data)
        self.noisy_data = self.transform_data(dataset.noisy_data)
    
    def transform_data(self, data):
        new_data = list()
        for idx in range(len(data) // 2):
            patch = data[idx] # 64 x 64
            patch_processed = patch.unsqueeze(0).unsqueeze(0) # 1 x 1 x 64 x 64
            patch_after_input_layer = model(patch_processed)
            new_data.append(patch_after_input_layer)
        return new_data
      
    def __len__(self):
        return len(self.clean_data)
    
    def __getitem__(self, index):
        clean_patch = self.clean_data[index]
        noisy_patch = self.noisy_data[index]
        return clean_patch, noisy_patch

In [None]:
def get_loader_for_inner_1(init_dataset, batch_size=32, num_workers=4, shuffle=False, pin_memory=True):
    dataset = PretrainedInputLayerDataset(init_dataset)
    loader = DataLoader(
        dataset=dataset, 
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=shuffle, 
        pin_memory=pin_memory)
    
    return loader, dataset

In [None]:
# model.to('cpu')
# train_loader_inner1, train_dataset_inner1 = get_loader_for_inner_1(init_dataset=train_dataset,
#                                                                    batch_size=BATCH_SIZE, 
#                                                                    shuffle=True)
# valid_loader_inner1, valid_dataset_inner1 = get_loader_for_inner_1(init_dataset=valid_dataset,
#                                                                    batch_size=BATCH_SIZE,
#                                                                    shuffle=True)

KeyboardInterrupt: ignored

In [None]:
print(len(train_dataset_inner1))
print(len(valid_dataset_inner1))
print(len(test_dataset_inner1))

### Save pretrained weights

In [None]:
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

conv.weight 	 torch.Size([16, 1, 3, 3])
conv.bias 	 torch.Size([16])
conv_t.weight 	 torch.Size([16, 1, 3, 3])
conv_t.bias 	 torch.Size([1])


In [None]:
weights = {
    'conv_input.weight': model.state_dict()['conv.weight'],
    'conv_input.bias': model.state_dict()['conv.bias']
}
torch.save(weights, 'pretrained_weights_input_layer.pth')