In [None]:
!pip install Pillow==5.3.0 

In [None]:
!pip install torch
!pip install torchvision
!pip install Pillow==5.3.0

In [None]:
!pip install -U pillow

In [None]:
from PIL import Image
def register_extension(id, extension): Image.EXTENSION[extension.lower()] = id.upper()
Image.register_extension = register_extension
def register_extensions(id, extensions): 
  for extension in extensions: register_extension(id, extension)
Image.register_extensions = register_extensions

In [None]:
import torch
import numpy as np
from scipy.io import loadmat
from scipy.io import savemat
import scipy
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from skimage.transform import resize
import glob
import cv2

from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

import os

import pandas as pd
from skimage import io, transform
from torch.utils.data import Dataset, DataLoader



from torchvision import datasets
import torch.optim as optim
import time
from tqdm import tqdm
torch.set_default_tensor_type('torch.DoubleTensor')

In [None]:
torch.cuda.is_available()

In [None]:
import zipfile
zip_ref = zipfile.ZipFile('split_data.zip', 'r')
zip_ref.extractall('data/')
zip_ref.close()


In [None]:
image_path = 'data/split_data/images/'
depth_path = 'data/split_data/depths/'

i_name = glob.glob(image_path+'*.png')
i_id = [x.split('/')[-1] for x in i_name]

i_files = [image_path+x for x in i_id]
d_files = [depth_path+x for x in i_id]

images = [cv2.imread(file) for file in i_files]
depth = [cv2.imread(file,cv2.IMREAD_GRAYSCALE) for file in d_files]



In [None]:
print len(images), len(depth), type(images[0]), images[0].shape, depth[0].shape

In [None]:
class coarseNet(nn.Module):
    def __init__(self,init_weights=True):
        
        super(coarseNet, self).__init__()
        
        self.relu = nn.ReLU(inplace=True)
        
        self.conv1 = nn.Conv2d(3,96,kernel_size=11,stride=4,padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.conv2 = nn.Conv2d(96,256,kernel_size=5,stride=1, padding=0)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True)
        self.conv3 = nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=0)
        self.conv4 = nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=0)
        self.conv5 = nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=0)
        
        self.fc1 = nn.Linear(21504, 4096)
        self.fc2 = nn.Linear(4096, 4070)
        
        if init_weights:
            self._initialize_weights()
            

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.relu(x)
        x = self.conv4(x)
        x = self.relu(x)
        x = self.conv5(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = x.view(-1, 1, 55, 74)

        return x

    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.normal_(0, 0.01)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()
                
                
class fineNet(nn.Module):
    def __init__(self, init_weights=True):
        
        super(fineNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=9,stride=2,padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5, stride=1,padding=2 )
        self.conv3 = nn.Conv2d(64, 1, kernel_size=5, stride=1, padding=2)
        self.relu = nn.ReLU(inplace=True)
        if init_weights:
            self._initialize_weights()


    def forward(self, x, y):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.relu(x)
        x = x[:, :, 0:55, 0:74]
        return x
    
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.normal_(0, 0.01)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

In [None]:
input_height = 480
input_width = 640
output_height = 55
output_width = 74

class data(Dataset):
    def __init__(self, ids, img_dir, dep_dir, transform=None):
        self.frame = ids
        self.img_dir = img_dir
        self.dep_dir = dep_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.frame[idx])
        dep_name = os.path.join(self.dep_dir, self.frame[idx])
        image = cv2.imread(img_name)
        depth = cv2.imread(dep_name, cv2.IMREAD_GRAYSCALE)
        image = np.transpose(image, (2, 1, 0))
        depth = np.transpose(depth, (1, 0))
        image = transform.resize(image, (3, 240,320), mode='symmetric', preserve_range=True)
        depth = transform.resize(depth, (55,74), mode='symmetric', preserve_range=True)
        
        sample = {'image': image, 'depth': depth}

        if self.transform:
            sample = self.transform(sample)

        return sample
    
    
class Rescale(object):
    def __init__(self, output_size,depth_size):
        assert isinstance(output_size, (int, tuple))
        assert isinstance(depth_size, (int, tuple))
        self.output_size = output_size
        self.depth_size = depth_size

    def __call__(self, sample):
        image, depth = sample['image'], sample['depth']
        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size
        new_h, new_w = int(new_h), int(new_w)
        img = transform.resize(image, (new_h, new_w))
        h, w = depth.shape
        if isinstance(self.depth_size, int):
            if h > w:
                new_h, new_w = self.depth_size * h / w, self.depth_size
            else:
                new_h, new_w = self.depth_size, self.depth_size * w / h
        else:
            new_h, new_w = self.depth_size
        dep = transform.resize(depth, (new_h, new_w))
        return {'image': img, 'depth': dep}

class ToTensor(object):
    def __call__(self, sample):
        image, depth = sample['image'], sample['depth']
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(np.float32(image)),
                'depth': torch.from_numpy(np.float32(depth))}
        

In [None]:
i_name = glob.glob('data/split_data/images/*.png')
i_id = [x.split('/')[-1] for x in i_name]
dataset = data(ids=i_id, img_dir='data/split_data/images/', dep_dir='data/split_data/images/') 
dataset_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=0)


In [None]:
def train_coarse_gpu(model, criterion, optimizer,dataset_loader,n_epochs,print_every):
    start = time.time()
    losses = []
    print("Training for %d epochs..." % n_epochs)
    for epoch in tqdm(range(1, n_epochs + 1)):
        loss = 0
        for data in dataset_loader:
            inputs=data["image"].cuda()
            depths=data["depth"].cuda()        
            inputs, depths = Variable(inputs), Variable(depths)
            optimizer.zero_grad()    
            outputs = model(inputs)
            outputs = outputs.view(outputs.shape[0], outputs.shape[2], outputs.shape[3])
            loss = criterion(outputs, depths)
            loss += loss.data.item()
            print(loss)
            loss.backward()
            optimizer.step() 
        if epoch % print_every == 0:
            print('[(%d %d%%) %.4f]' % (epoch, epoch / n_epochs * 100, loss))
            print(loss, '\n')
    return losses

def train_fine_gpu(model, coarse, criterion, optimizer,dataset_loader, n_epochs,print_every):
    start = time.time()
    losses = []
    print("Training for %d epochs..." % n_epochs)
    for epoch in tqdm(range(1, n_epochs + 1)):
        loss = 0
        for data in dataset_loader:
            inputs=data["image"].cuda()
            depths=data["depth"].cuda()        
            inputs, depths = Variable(inputs), Variable(depths)
            optimizer.zero_grad()                
            y = coarse(inputs)
            outputs = model(inputs,y)
            loss = criterion(outputs, depths)
            loss += loss.data.item()
            print(loss)
            loss.backward()
            optimizer.step()        
        if epoch % print_every == 0:
            print('[(%d %d%%) %.4f]' % ( epoch, epoch / n_epochs * 100, loss))
            print(loss, '\n')
    return losses

In [None]:

gpu_dtype = torch.cuda.FloatTensor

coarse_net_gpu = coarseNet()
coarse_net_gpu.cuda()
optimizer_coarse = optim.Adadelta(coarse_net_gpu.parameters(), lr=1e-2)
criterion = nn.MSELoss().cuda()

torch.cuda.random.manual_seed(12345)
loss = train_coarse_gpu(coarse_net_gpu, criterion, optimizer_coarse, dataset_loader, 1, 1)

In [None]:

gpu_dtype = torch.cuda.FloatTensor

fine_net_gpu = fineNet()
fine_net_gpu.cuda()
optimizer_fine = optim.Adadelta(fine_net_gpu.parameters(), lr=1e-2)
criterion = nn.MSELoss().cuda()

torch.cuda.random.manual_seed(12345)
loss = train_fine_gpu(fine_net_gpu, coarse_net_gpu, criterion, optimizer_fine, dataset_loader, 1, 1)

In [None]:
def test_threshold_error(ground_truth, predicted_image, delta):

    eps = 1e-9
    ground_truth = ground_truth.reshape(-1)
    predicted_image = predicted_image.reshape(-1)

    arr1 = ground_truth / (predicted_image + eps)
    arr2 = predicted_image / (ground_truth + eps)
    
    max_arr = np.maximum(arr1, arr2)
    indicator_arr = (max_arr < delta).astype('float')

    return np.mean(indicator_arr)
  
def scale_invariant_error(ground_truth, predicted_image):

    eps = 1e-9
    ground_truth = np.abs(ground_truth.reshape(-1))
    predicted_image = np.abs(predicted_image.reshape(-1))
    
    log_ground_truth = np.log(ground_truth+eps)
    log_predicted_image = np.log(predicted_image+eps)
    
    diff = log_ground_truth - log_predicted_image
    diff_sqr = np.square(diff)
    
    part1 = np.mean(diff_sqr)
    part2 = np.square(np.sum(diff_sqr)) / np.square(diff_sqr.shape[0])
    


    return part1 - part2

def rmse_linear_error(ground_truth, predicted_image):
  
      ground_truth = ground_truth.reshape(-1)
      predicted_image = predicted_image.reshape(-1)
      
      return np.mean(np.square(ground_truth -predicted_image))

def rmse_log_error(ground_truth, predicted_image):
      eps = 1e-9
      ground_truth = np.log(np.abs(ground_truth.reshape(-1))+eps)
      predicted_image = np.log(np.abs(predicted_image.reshape(-1))+eps)
      
      return np.mean(np.square(ground_truth -predicted_image))
    
def abs_relative_error(ground_truth, predicted_image):
  
      eps = 1e-9
      ground_truth = ground_truth.reshape(-1)
      predicted_image = predicted_image.reshape(-1)
      
      diff = ground_truth - predicted_image
      rel_diff = np.abs(diff) / (ground_truth+eps)
      
      return np.mean(diff)

def squared_relative_error(ground_truth, predicted_image):
  
      eps = 1e-9
      ground_truth = ground_truth.reshape(-1)
      predicted_image = predicted_image.reshape(-1)
      
      diff = ground_truth - predicted_image
      rel_diff = np.square(diff) / np.abs((ground_truth+eps))
      
      return np.mean(diff)

In [None]:
count = 0
threshold_error = 0
for data in dataset_loader:
      inputs=data["image"].cuda()
      depths=data["depth"].cuda()        
      inputs, depths = Variable(inputs), Variable(depths)
      y = coarse_net_gpu(inputs)
      outputs = fine_net_gpu(inputs,y)
      outputs = outputs.cpu()
      depths = depths.cpu()
      outputs = outputs.detach().numpy()
      depths = depths.detach().numpy()
          
      threshold_error = threshold_error + test_threshold_error(outputs, depths, 1.96)
      count = count + 1
threshold_error /= count
print("The threshold error on the test images is: ", threshold_error)

In [None]:
count = 0
error = 0
for data in dataset_loader:
      inputs=data["image"].cuda()
      depths=data["depth"].cuda()        
      inputs, depths = Variable(inputs), Variable(depths)
      y = coarse_net_gpu(inputs)
      outputs = fine_net_gpu(inputs,y)
      outputs = outputs.cpu()
      depths = depths.cpu()
      outputs = outputs.detach().numpy()
      depths = depths.detach().numpy()
      error = error + np.abs(scale_invariant_error(outputs, depths))
      count = count + 1
error = error / count
print("The scale invariant error on the test images is: ", error)

In [None]:
count = 0
error = 0
for data in dataset_loader:
      inputs=data["image"].cuda()
      depths=data["depth"].cuda()        
      inputs, depths = Variable(inputs), Variable(depths)
      y = coarse_net_gpu(inputs)
      outputs = fine_net_gpu(inputs,y)
      outputs = outputs.cpu()
      depths = depths.cpu()
      outputs = outputs.detach().numpy()
      depths = depths.detach().numpy()
      error = error + rmse_linear_error(outputs, depths)
      count = count + 1
error = error / count
print("The RMSE on the test images is: ", error)

In [None]:
count = 0
error = 0
for data in dataset_loader:
      inputs=data["image"].cuda()
      depths=data["depth"].cuda()        
      inputs, depths = Variable(inputs), Variable(depths)
      y = coarse_net_gpu(inputs)
      outputs = fine_net_gpu(inputs,y)
      outputs = outputs.cpu()
      depths = depths.cpu()
      outputs = outputs.detach().numpy()
      depths = depths.detach().numpy()
      error = error + rmse_log_error(outputs, depths)
      count = count + 1
error = error / count
print("The RMSE (log) on the test images is: ", error)

In [None]:
count = 0
error = 0
for data in dataset_loader:
      inputs=data["image"].cuda()
      depths=data["depth"].cuda()        
      inputs, depths = Variable(inputs), Variable(depths)
      y = coarse_net_gpu(inputs)
      outputs = fine_net_gpu(inputs,y)
      outputs = outputs.cpu()
      depths = depths.cpu()
      outputs = outputs.detach().numpy()
      depths = depths.detach().numpy()
      error = error + np.abs(abs_relative_error(outputs, depths))
      count = count + 1
error = error / count
print("The absolute relative error on the test images is: ", error)

In [None]:
count = 0
error = 0
for data in dataset_loader:
      inputs=data["image"].cuda()
      depths=data["depth"].cuda()        
      inputs, depths = Variable(inputs), Variable(depths)
      y = coarse_net_gpu(inputs)
      outputs = fine_net_gpu(inputs,y)
      outputs = outputs.cpu()
      depths = depths.cpu()
      outputs = outputs.detach().numpy()
      depths = depths.detach().numpy()
      error = error + np.abs(squared_relative_error(outputs, depths))
      count = count + 1
error = error / count
print("The squared relative error on the test images is: ", error)

In [None]:
class experiment_coarseNet(nn.Module):
    def __init__(self,init_weights=True):
        
        super(experiment_coarseNet, self).__init__()
        
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout2d()
        self.conv1 = nn.Conv2d(3,16,kernel_size=11,stride=4,padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16,32,kernel_size=5,stride=1, padding=0)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0)
        self.bn4 = nn.BatchNorm2d(128)
        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=0)
        self.bn5 = nn.BatchNorm2d(256)       
        self.fc1 = nn.Linear(21504, 4096)
        
        self.fc2 = nn.Linear(4096, 4070)
        
        if init_weights:
            self._initialize_weights()
            

    def forward(self, x):
        x = self.conv1(x)
        x = self.dropout(x)
        x = self.pool1(x)
        x = self.relu(x)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.dropout(x)
        x = self.pool2(x)
        x = self.relu(x)
        x = self.bn2(x)
        x = self.conv3(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.bn3(x)

        x = self.conv4(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.bn4(x)
        
        x = self.conv5(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.bn5(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = x.view(-1, 1, 55, 74)

        return x

    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.normal_(0, 0.01)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()
                
                
class experiment_fineNet(nn.Module):
    def __init__(self, init_weights=True):
        
        super(experiment_fineNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=9,stride=2,padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5, stride=1,padding=2 )
        self.bn2 = nn.BatchNorm2d(64)

        self.conv3 = nn.Conv2d(64, 64, kernel_size=5, stride=1, padding=2)
        self.bn3 = nn.BatchNorm2d(64)

        self.relu = nn.ReLU(inplace=True)
        if init_weights:
            self._initialize_weights()


    def forward(self, x, y):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.relu(x)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.bn2(x)
        x = self.conv3(x)
        x = self.relu(x)
        x = self.bn3(x)
        x = x[0, 0, 0:55, 0:74]
        return x
    
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.normal_(0, 0.01)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

In [None]:

gpu_dtype = torch.cuda.FloatTensor

coarse_net_gpu = experiment_coarseNet()
coarse_net_gpu.cuda()
optimizer_coarse = optim.Adadelta(coarse_net_gpu.parameters(), lr=1e-1)
criterion = nn.MSELoss().cuda()

torch.cuda.random.manual_seed(12345)
loss = train_coarse_gpu(coarse_net_gpu, criterion, optimizer_coarse, dataset_loader, 1, 1)

In [None]:

gpu_dtype = torch.cuda.FloatTensor

fine_net_gpu = experiment_fineNet()
fine_net_gpu.cuda()
optimizer_fine = optim.Adadelta(fine_net_gpu.parameters(), lr=1e-2)
criterion = nn.MSELoss().cuda()

torch.cuda.random.manual_seed(12345)
loss = train_fine_gpu(fine_net_gpu, coarse_net_gpu, criterion, optimizer_fine, dataset_loader, 1, 1)