In [None]:
pip install h5py

In [None]:
import os
import zipfile
import sys
from urllib.request import FancyURLopener
import shutil
from PIL import Image
import numpy as np
import scipy.ndimage
import scipy.io as sio
import h5py
import cv2
from tqdm import tqdm

In [None]:
import torch
from math import exp
import torch.nn.functional as F

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import pprint
from genericpath import isfile

In [None]:
import torchvision, torch
import torch.nn as nn
from torchsummary import summary
from torchvision import transforms as T
from torch import optim
import copy

# Download and Setup of Data

In [None]:
def download(url, destination, tmp_dir='/tmp'):
    def _progress(count, block_size, total_size):
        sys.stdout.write('\rDownloading %s %.1f%%' % (url,
          float(count * block_size) / float(total_size) * 100.0))
        sys.stdout.flush()
    urlretrieve = FancyURLopener().retrieve
    if url.endswith('.zip'):
        local_zip_path = os.path.join(tmp_dir, 'datasets_download.zip')
        urlretrieve(url, local_zip_path, _progress)
        with zipfile.ZipFile(local_zip_path, "r") as zip_ref:
            zip_ref.extractall('/content/nyu_depth_v2/temp')
        os.remove(local_zip_path)
    else:
        urlretrieve(url, destination, _progress)

In [None]:
def dataset_download(url, destination):
    if not os.path.isfile(destination):
        download(url, destination)

In [None]:
NYUD_URL = 'http://horatio.cs.nyu.edu/mit/silberman/nyu_depth_v2/nyu_depth_v2_labeled.mat'
NYUD_SPLITS_URL = 'http://horatio.cs.nyu.edu/mit/silberman/indoor_seg_sup/splits.mat'

In [None]:
def save_nyu_depth_v2_dataset(source_dir, target_dir):
    if not os.path.isdir(source_dir):
        os.makedirs(source_dir)
    nyud_file_path = os.path.join(source_dir, 'nyu_depth_v2_labeled.mat')
    splits_file_path = os.path.join(source_dir, 'splits.mat')

    nyud_gdrive_file_path = './dataset/nyu_depth_v2_labeled.mat'
    splits_gdrivefile_path = './dataset/splits.mat'

    if(not os.path.isfile(nyud_gdrive_file_path) and not not os.path.isfile(splits_gdrivefile_path)):
        dataset_download(NYUD_URL, nyud_file_path)
        dataset_download(NYUD_SPLITS_URL, splits_file_path)
    else:
        return nyud_gdrive_file_path,splits_gdrivefile_path
      
    return nyud_file_path,splits_file_path

In [None]:
nyud_file_path,splits_file_path = save_nyu_depth_v2_dataset('./content/nyu_depth_v2','./content/nyu_depth_v2/labeled')
print("{}\n{}".format(nyud_file_path,splits_file_path))

In [None]:
def get_dataset(source_dir, target_dir):
    print("Loading dataset: NYU Depth V2")
    nyud_dict = h5py.File(nyud_file_path, 'r')
    splits_dict = scipy.io.loadmat(splits_file_path)
    return nyud_dict, splits_dict

In [None]:
target_dir = './content/nyu_depth_v2/'
nyud_dict, splits_dict = get_dataset(nyud_file_path,target_dir)
pprint.pprint(nyud_dict.keys())
images = np.asarray(nyud_dict['images'])


In [None]:
depths = np.asarray(nyud_dict['depths'])

In [None]:
print(len(depths))

In [None]:
train_split_indicies = int(795*(1-0.2))
print(train_split_indicies)
print()
pprint.pprint(len(splits_dict['trainNdxs'][:train_split_indicies, 0] - 1))
pprint.pprint(len(splits_dict['trainNdxs'][train_split_indicies:795, 0] - 1))

In [None]:
def get_train_validation_split(splits_dict,dataset_size = 100,split_percent = 0.2):
    indices = splits_dict['trainNdxs'][:, 0] - 1
    train_split_indicies = int(dataset_size*(1-split_percent))

    train_indices = splits_dict['trainNdxs'][:, 0] - 1
    print("Training Data Size: ",len(indices[:]))

    validation_indices = splits_dict['testNdxs'][:, 0] - 1
    print("Validation Data Size: ",len(validation_indices))
    
#     test_indices = splits_dict['testNdxs'][:, 0] - 1
#     print("Testing Data Size: ",len(test_indices))
    return train_indices, validation_indices

In [None]:
train_indices, validation_indices = get_train_validation_split(splits_dict,dataset_size = 795,split_percent = 0.2)

In [None]:
def get_images_depths(images,depths, train_indices):
    images_train = np.take(images, train_indices, axis=0)
    images_train = images_train.swapaxes(2, 3)
    print(images_train.shape)

    depths_train = np.expand_dims(depths.swapaxes(1, 2), 1)
    depths_train = np.take(depths_train, train_indices, axis=0)
    print(depths_train.shape)
    return images_train,depths_train

train_images, train_depths = get_images_depths(images,depths,train_indices)
validation_images, validation_depths = get_images_depths(images,depths,validation_indices)
#test_images,test_depths = get_images_depths(images,depths,test_indices)

# Visualization of Training and Test Data

**Training Data Visualization**

In [None]:
print(len(train_images))
fig = plt.figure(figsize=(20,20))
k=1
for sample_idx in range(8):
    print("Data Type: {}, Pre-Transpose: {}".format(type(train_images[sample_idx]),train_images[sample_idx].shape))
    plt.subplot(5,4, k)
    plt.imshow(train_images[sample_idx].transpose(1,2,0),interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    plt.subplot(5,4, k)
    plt.imshow(train_depths[sample_idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
fig.tight_layout()
fig.show()

**Test Data Visualization**

In [None]:
print(len(validation_images))
fig = plt.figure(figsize=(20,20))
k=1
for sample_idx in range(7):
    print("Data Type: {}, Pre-Transpose: {}".format(type(validation_images[sample_idx]),validation_images[sample_idx].shape))
    plt.subplot(5,4, k)
    plt.imshow(validation_images[sample_idx].transpose(1,2,0),cmap='gray',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    plt.subplot(5,4, k)
    plt.imshow(validation_depths[sample_idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
fig.tight_layout()
fig.show()

# Generate Dataset and Dataloader

In [None]:
import random
from torchvision.transforms.functional import hflip

In [None]:
class NYUDepthDataset(torch.utils.data.Dataset):
    def __init__(self,images,indices,depths,transform=None,train=True):
        self.images = images
        self.indicies = indices
        self.maps = depths
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self,index):
        image = torch.from_numpy(self.images[index]).float().div(255)
        dmap = torch.from_numpy(self.maps[index]).float().div(255)*1000
        dmap = torch.clamp(dmap, 10, 1000)
        
        if(self.transform):
            image = self.transform(image)
            dmap = self.transform(dmap)
        if random.random() > 0.5:
            image = hflip(image)
            #image = image[[2,1,0],:,:]
            dmap = hflip(dmap)
            
        return image,dmap

In [None]:
from torchvision.transforms import transforms
train_dataset = NYUDepthDataset(train_images,train_indices,train_depths,transform = transforms.Compose([T.Resize((320,320))]))
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=2,shuffle=False)


validation_dataset = NYUDepthDataset(validation_images,validation_indices,validation_depths,transform = transforms.Compose([T.Resize((320,320))]))
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset,batch_size=2,shuffle=True)

# test_dataset = NYUDepthDataset(test_images,test_indices,test_depths,transform = transforms.Compose([T.Resize((320,320))]))
# test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=2,shuffle=True)

In [None]:
counter=0
for batch_idx,(features,targets) in enumerate(train_loader):
    counter+=1
print("Total Number of Batches: ",counter)

**Visualize Train Loader**

In [None]:
examples = iter(train_loader)

In [None]:
example_data,dmap = next(examples)
# print(example_data[0].shape)
# print(dmap[0].shape)
k=1
fig = plt.figure(figsize=(20,20))
for idx in range(2):
    plt.subplot(5,4, k)
    img = example_data.numpy()
    dmap_n = dmap.numpy()
    plt.imshow(img[idx].transpose(1,2,0),interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    plt.subplot(5,4, k)
    plt.imshow(dmap_n[idx].transpose(1,2,0),cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
fig.tight_layout()
fig.show()

In [None]:
valexamples = iter(validation_loader)

In [None]:

example_data,example_targets = next(examples)
k=1
fig = plt.figure(figsize=(20,20))
for idx in range(2):
    plt.subplot(5,4, k)
    img = example_data.numpy()
    plt.imshow(img[idx].transpose(1,2,0),cmap='gray',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    plt.subplot(5,4, k)
    plt.imshow(example_targets[idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
fig.tight_layout()
fig.show()

# Building U-Net Model

In [None]:
def double_conv(in_c,out_c):
    conv = nn.Sequential(
        nn.Conv2d(in_c,out_c,kernel_size=3,padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_c,out_c,kernel_size=3,padding=1),
        nn.ReLU(inplace=True)
    )
    return conv

In [None]:
def crop_image(tensor,target_tensor):
    target_size = target_tensor.size()[2]
    tensor_size = tensor.size()[2]
    delta = tensor_size-target_size
    delta = delta//2
    return tensor[:,:,delta:tensor_size-delta,delta:tensor_size-delta]

In [None]:
class UNet(nn.Module):
    def __init__(self):
        super(UNet,self).__init__()
        self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2,stride=2)
        self.down_conv_1 = double_conv(3,64)
        self.down_conv_2 = double_conv(64,128)
        self.down_conv_3 = double_conv(128,256)
        self.down_conv_4 = double_conv(256,512)
        self.down_conv_5 = double_conv(512,1024)

        self.up_trans_1 = nn.ConvTranspose2d(
            in_channels=1024,
            out_channels=512,
            kernel_size=2,
            stride=2)
    
        self.up_conv_1 = double_conv(1024,512)

        self.up_trans_2 = nn.ConvTranspose2d(
            in_channels=512,
            out_channels=256,
            kernel_size=2,
            stride=2)
    
        self.up_conv_2 = double_conv(512,256)

        self.up_trans_3 = nn.ConvTranspose2d(
            in_channels=256,
            out_channels=128,
            kernel_size=2,
            stride=2)
        
        self.up_conv_3 = double_conv(256,128)
    
        self.up_trans_4 = nn.ConvTranspose2d(
            in_channels=128,
            out_channels=64,
            kernel_size=2,
            stride=2)
        
        self.up_conv_4 = double_conv(128,64)

        self.out = nn.Conv2d(
            in_channels=64,
            out_channels=1,
            kernel_size=1
        )

    def forward(self,image):
        #encoder
        x1 = self.down_conv_1(image) #
        x2 = self.max_pool_2x2(x1)
        x3 = self.down_conv_2(x2) #
        x4 = self.max_pool_2x2(x3) 
        x5 = self.down_conv_3(x4) # 
        x6 = self.max_pool_2x2(x5) 
        x7 = self.down_conv_4(x6) #
        x8 = self.max_pool_2x2(x7)
        x9 = self.down_conv_5(x8)
        x10 = self.max_pool_2x2(x9)
        #decoder
        x = self.up_trans_1(x9)
        y = crop_image(x7,x)
        x = self.up_conv_1(torch.cat([x,y],1))

        x = self.up_trans_2(x)
        y = crop_image(x5,x)
        x = self.up_conv_2(torch.cat([x,y],1))

        x = self.up_trans_3(x)
        y = crop_image(x3,x)
        x = self.up_conv_3(torch.cat([x,y],1))

        x = self.up_trans_4(x)
        y = crop_image(x1,x)
        x = self.up_conv_4(torch.cat([x,y],1))
        x = self.out(x)
        return x

In [None]:
model = UNet()
summary(model.to('cuda'),(3,320,320))

**Model Output before Training**

In [None]:
examples = iter(train_loader)

In [None]:
model.to('cpu')
example_data,example_targets = next(examples)
output = model(example_data)
k=1
fig = plt.figure(figsize=(21,10))
for idx in range(2):
    plt.subplot(2,6, k)
    image_num = example_data.numpy()
    plt.imshow(image_num[idx].transpose(1,2,0),cmap='gray',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    
    plt.subplot(2,6, k)
    image_num = example_data.numpy()
    plt.imshow(example_targets[idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    
    plt.subplot(2,6, k)
    output_num = output.to('cpu').detach().numpy()
    print(output.shape)
    plt.imshow(output_num[idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
fig.tight_layout()
fig.show()

# Define Custom Loss Function

In [None]:
def gaussian(window_size, sigma):
    gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
    return gauss/gauss.sum()

In [None]:
def create_window(window_size, channel=1):
    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
    window = _2D_window.expand(channel, 1, window_size, window_size).contiguous()
    return window

In [None]:
def ssim(img1, img2, val_range, window_size=11, window=None, size_average=True, full=False):
    L = val_range

    padd = 0
    (_, channel, height, width) = img1.size()
    if window is None:
        real_size = min(window_size, height, width)
        window = create_window(real_size, channel=channel).to(img1.device)

    mu1 = F.conv2d(img1, window, padding=padd, groups=channel)
    mu2 = F.conv2d(img2, window, padding=padd, groups=channel)

    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1 * mu2

    sigma1_sq = F.conv2d(img1 * img1, window, padding=padd, groups=channel) - mu1_sq
    sigma2_sq = F.conv2d(img2 * img2, window, padding=padd, groups=channel) - mu2_sq
    sigma12 = F.conv2d(img1 * img2, window, padding=padd, groups=channel) - mu1_mu2

    C1 = (0.01 * L) ** 2
    C2 = (0.03 * L) ** 2

    v1 = 2.0 * sigma12 + C2
    v2 = sigma1_sq + sigma2_sq + C2
    cs = torch.mean(v1 / v2)  # contrast sensitivity

    ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)

    if size_average:
        ret = ssim_map.mean()
    else:
        ret = ssim_map.mean(1).mean(1).mean(1)

    if full:
        return ret, cs

    return ret

In [None]:
def DepthNorm(x, maxDepth):
    return maxDepth / x

In [None]:
def gradient_loss(gen_frames, gt_frames, alpha=1):

    def gradient(x):
        # idea from tf.image.image_gradients(image)
        # https://github.com/tensorflow/tensorflow/blob/r2.1/tensorflow/python/ops/image_ops_impl.py#L3441-L3512
        # x: (b,c,h,w), float32 or float64
        # dx, dy: (b,c,h,w)

        h_x = x.size()[-2]
        w_x = x.size()[-1]
        # gradient step=1
        left = x
        right = F.pad(x, [0, 1, 0, 0])[:, :, :, 1:]
        top = x
        bottom = F.pad(x, [0, 0, 0, 1])[:, :, 1:, :]

        # dx, dy = torch.abs(right - left), torch.abs(bottom - top)
        dx, dy = right - left, bottom - top 
        # dx will always have zeros in the last column, right-left
        # dy will always have zeros in the last row,    bottom-top
        dx[:, :, :, -1] = 0
        dy[:, :, -1, :] = 0

        return dx, dy

    # gradient
    gen_dx, gen_dy = gradient(gen_frames)
    gt_dx, gt_dy = gradient(gt_frames)
    #
    grad_diff_x = torch.abs(gt_dx - gen_dx)
    grad_diff_y = torch.abs(gt_dy - gen_dy)

    # condense into one tensor and avg
    return torch.mean(grad_diff_x ** alpha + grad_diff_y ** alpha)

# Train and Validate Model

In [None]:
def get_device():
    if(torch.cuda.is_available()):
        return torch.device("cuda")
    else:
        return torch.device("cpu")

In [None]:
def plot_images_dmap(images,dmap):
    fig = plt.figure(figsize=(21,10))
    plt.subplot(2,2, 1)
    plt.imshow(image_num[0].transpose(1,2,0),interpolation='none')
    plt.subplot(2,2, 2)
    plt.imshow(dmap[0][0],cmap='plasma',interpolation='none')
    plt.xticks([])
    plt.yticks([])
    fig.show()

In [None]:
def train(model,data_loader, optimizer, criterion, device):
    model.to(device)
    model.train()
    correct = 0
    total = 0
    num_batches = 0
    train_loss = 0.0
    train_acc = 0.0
    for batch_idx, (feature_data,labels) in tqdm(enumerate(data_loader)):
        feature_data = feature_data.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        
        depth_n = DepthNorm(labels,1000.0)
        output = model(feature_data)
        l_depth = criterion(output, depth_n)
        l_ssim = torch.clamp((1 - ssim(output, depth_n, val_range = 1000.0 / 10.0)) * 0.5, 0, 1)
        l_grad = gradient_loss(output,depth_n)
        loss = (0.01*l_grad) + (1.0 * l_ssim) + (0.1 * l_depth)
        loss = (1.0 * l_ssim) + (0.1 * l_depth)
        loss.backward()
        optimizer.step()
        total += labels.size(0)        
        # train_loss += loss.data.item()
        train_loss += (1/(batch_idx+1))*(loss.item()/feature_data.size(0) - train_loss)
        num_batches+=1
    return train_loss

In [None]:
def validate(model,data_loader, optimizer, criterion, device):
    model.to(device)
    model.eval()
    
    correct = 0
    total = 0
    
    num_batches = 0
    val_loss = 0.0
    val_acc = 0.0
    for batch_idx,(feature_data,labels) in tqdm(enumerate(data_loader)):
        feature_data = feature_data.to(device)
        labels = labels.to(device)
        output = model(feature_data)
        loss = criterion(output,labels)
        total += labels.size(0)
        # val_loss += loss.item()
        val_loss = (1/(batch_idx+1))*(loss.item()/feature_data.size(0) - val_loss)
        num_batches+=1
    return val_loss

In [None]:
def plot_loss(num_epochs,train_losses,test_losses):
    
    # Using Numpy to create an array X
    X = range(num_epochs)
    
    # Assign variables to the y axis part of the curve
    y = train_losses
    z = test_losses
    
    plt.plot(X,y,color='blue')
    plt.plot(X,z,color='red')
    plt.legend(['Train Loss','Validation Loss'],loc='upper right')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title("Training and Validation Losses")

In [None]:
def fit_model(model,num_epochs,train_dl,validation_dl,best_model):
    test_losses=[]
    test_accuracies = []
    train_losses=[]
    train_accuracies=[]
    
    min_loss=float('inf')
    min_train_loss = float('inf')
    learn_rate = 0.01
    
    device = get_device()
    #criterion = torch.nn.MSELoss()
    criterion = nn.L1Loss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
    
    for epoch in range(num_epochs):
        running_train_loss = train(model,train_dl, optimizer, criterion, device)
        test_loss = validate(model,validation_dl, optimizer, criterion, device)
        
        # if(num_epochs%10==0):
        #   if(epoch%10==0):
        #     print("Epoch:{}, Train Loss: {:.4f}, Valid Loss: {:.4f}".format(epoch,train_loss,test_loss))
        # else:
        print("Epoch:{}/{}, Running Train Loss: {:.4f},Valid Loss: {:.4f}".format(epoch,num_epochs,running_train_loss,test_loss))

        train_losses.append(running_train_loss)
        test_losses.append(test_loss)
        if(min_loss>test_loss):
            best_model = copy.deepcopy(model)
            min_loss = test_loss
            min_train_loss = running_train_loss
            print("Saving Best Model with Min Validation Loss: ", min_loss)
    
    plot_loss(num_epochs,train_losses,test_losses)
    torch.save(best_model,'mono_depth_unet.pt')
    return best_model,min_loss,min_train_loss

In [None]:
num_epochs = 50
model = UNet()
best_model = model
best_model,train_loss,val_loss = fit_model(model,num_epochs,train_loader,validation_loader,best_model)

In [None]:
torch.save(model,'mono_depth_unet_ep12.pt')

In [None]:
best_model = torch.load('mono_depth_unet.pt')
best_model.to('cpu')
best_model.eval()
examples = enumerate(validation_loader)
batch_idx, (example_data,example_targets) = next(examples)
output = best_model(example_data)
k=1
fig = plt.figure(figsize=(20,10))
for idx in range(2):
    plt.subplot(2,6, k)
    image_num = example_data.numpy()
    plt.imshow(image_num[idx].transpose(1,2,0),cmap='gray',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    plt.subplot(2,6, k)
    plt.imshow(example_targets[idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    plt.subplot(2,6, k)
    output_num = output.to('cpu').detach().numpy()
    print(output.shape)
    plt.imshow(output_num[idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
fig.tight_layout()
fig.show()

In [None]:
best_model = torch.load('mono_depth_unet_ep12.pt')
best_model.to('cpu')
best_model.eval()
examples = enumerate(train_loader)
batch_idx, (example_data,example_targets) = next(examples)
output = best_model(example_data)
k=1
fig = plt.figure(figsize=(20,10))
for idx in range(2):
    plt.subplot(2,6, k)
    image_num = example_data.numpy()
    plt.imshow(image_num[idx].transpose(1,2,0),cmap='gray',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    plt.subplot(2,6, k)
    plt.imshow(example_targets[idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
    plt.subplot(2,6, k)
    output_num = output.to('cpu').detach().numpy()
    print(output.shape)
    plt.imshow(output_num[idx][0],cmap='plasma',interpolation='none')
    k+=1
    plt.xticks([])
    plt.yticks([])
fig.tight_layout()
fig.show()