In [None]:
import os
from io import BytesIO
import numpy as np

# Import libraries and modules required for the implementation of the depth estimation model
from zipfile import ZipFile
from sklearn.utils import shuffle
import tensorflow as tf
from keras.layers import Conv2D, UpSampling2D, LeakyReLU, Concatenate
from keras import Model
from keras.applications import DenseNet169, DenseNet121
import keras.backend as K

# **Dataset Loading**

In [None]:
class DataLoader:
    # Load and preprocess the training data from a CSV file
    def __init__(self, csv_file='/kaggle/input/nyu-depth-v2/nyu_data/data/nyu2_train.csv'):
        self.shape_rgb = (480, 640, 3)
        self.shape_depth = (240, 320, 1)
        self.read_data(csv_file)

    # Resize the image
    def resize_img(self, img, resolution=480):
        return resize(img, (resolution, int(resolution * 4 / 3)), preserve_range=True, mode='reflect', anti_aliasing=True)

    # Read data from the CSV file
    def read_data(self, csv_file):
        data = [row.split(',') for row in open(csv_file, 'r').read().split('\n') if len(row) > 0]
        data = shuffle(data, random_state=0)
        self.filenames = [f"/kaggle/input/nyu-depth-v2/nyu_data/{i[0]}" for i in data]
        self.labels = [f"/kaggle/input/nyu-depth-v2/nyu_data/{i[1]}" for i in data]
        self.length = len(self.filenames)

    # Preprocess the data by decoding the images and converting them to float32
    def parse_function(self, filename, label):
        img = tf.image.decode_jpeg(tf.io.read_file(filename))
        depth = tf.image.decode_jpeg(tf.io.read_file(label))
        rgb = tf.image.convert_image_dtype(img, dtype=tf.float32)
        depth = tf.image.convert_image_dtype(tf.image.resize(depth, [self.shape_depth[0], self.shape_depth[1]]) / 255.0, dtype=tf.float32)
        depth = 1000 / tf.clip_by_value(depth * 1000, 10, 1000)
        return rgb, depth

    # Batch the dataset
    def get_batched_dataset(self, batch_size):
        dataset = tf.data.Dataset.from_tensor_slices((self.filenames, self.labels))
        dataset = dataset.shuffle(buffer_size=self.length, reshuffle_each_iteration=True)
        dataset = dataset.repeat()
        dataset = dataset.map(map_func=self.parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.batch(batch_size=batch_size)
        return dataset

# **Model**

In [None]:
class UpscaleBlock(Model):
    # Define an upscaling block with skip connections
    def __init__(self, filters, name):
        super(UpscaleBlock, self).__init__()
        self.up = UpSampling2D(size=(2, 2), interpolation='bilinear', name=name + '_upsampling2d')
        self.concat = Concatenate(name=name + '_concat')
        self.convA = Conv2D(filters, 3, 1, 'same', name=name + '_convA')
        self.reluA = LeakyReLU(alpha=0.2)
        self.convB = Conv2D(filters, 3, 1, 'same', name=name + '_convB')
        self.reluB = LeakyReLU(alpha=0.2)

    # Define the forward pass through the block
   
    def call(self, x):
        upsampled = self.up(x[0]) # Upsample the input tensor
        concatenated = self.concat([upsampled, x[1]]) # Concatenate the upsampled tensor with the skip connection
        convA_output = self.reluA(self.convA(concatenated)) # Perform the convolution and apply the LeakyReLU activation function
        convB_output = self.reluB(self.convB(convA_output)) # Perform another convolution and apply the LeakyReLU activation function
        return convB_output

In [None]:
class Encoder(Model):
    # Define an encoder based on the DenseNet-169 architecture
    def __init__(self):
        super(Encoder, self).__init__()
        self.base_model = DenseNet169(input_shape=(None, None, 3), include_top=False, weights='imagenet')
        print(f'Base model loaded {DenseNet169.__name__}')
        
        layer_names = ['pool1', 'pool2_pool', 'pool3_pool', 'conv1/relu']
        outputs = [self.base_model.get_layer(name).output for name in layer_names]
        outputs.insert(0, self.base_model.outputs[-1])
        self.encoder = Model(inputs=self.base_model.inputs, outputs=outputs)

    # Define the forward pass through the encoder
    def call(self, x):
        return self.encoder(x)

In [None]:
class Decoder(Model):
    # Define a decoder with skip connections and upscaling blocks
    def __init__(self, decode_filters):
        super(Decoder, self).__init__()
        self.conv2 = Conv2D(decode_filters, 1, padding='same', name='conv2')
        self.up1 = UpscaleBlock(decode_filters // 2, name='up1')
        self.up2 = UpscaleBlock(decode_filters // 4, name='up2')
        self.up3 = UpscaleBlock(decode_filters // 8, name='up3')
        self.up4 = UpscaleBlock(decode_filters // 16, name='up4')
        self.conv3 = Conv2D(1, 3, 1, padding='same', name='conv3')

    # Define the forward pass through the decoder
    def call(self, features):
        x, pool1, pool2, pool3, conv1 = features
        up0 = self.conv2(x) # Perform a convolution on the input tensor
        up1 = self.up1([up0, pool3]) # Apply an upscaling block with skip connections
        up2 = self.up2([up1, pool2]) # Apply another upscaling block with skip connections
        up3 = self.up3([up2, pool1]) # Apply another upscaling block with skip connections
        up4 = self.up4([up3, conv1]) # Apply another upscaling block with skip connections
        return self.conv3(up4)

In [None]:
class DepthEstimate(Model):
    # Define the full depth estimation model
    def __init__(self):
        super(DepthEstimate, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder(decode_filters=int(self.encoder.layers[-1].output[0].shape[-1] // 2))
        print('\nModel created.')

    # Define the forward pass through the depth estimation model
    def call(self, x):
        return self.decoder(self.encoder(x))

# **Loss**

In [None]:
def depth_loss_function(y_true, y_pred, theta=0.1, maxDepthVal=100.0):
    # Define the depth loss function
    def mean_abs_diff(a, b):
        return K.mean(K.abs(a - b), axis=-1)
    def image_gradients(img):
        return tf.image.image_gradients(img)

    l_depth = mean_abs_diff(y_pred, y_true) # Compute the mean absolute difference between the predicted and ground truth depth maps
    dy_true, dx_true = image_gradients(y_true) # Compute the image gradients of the ground truth depth map
    dy_pred, dx_pred = image_gradients(y_pred) # Compute the image gradients of the predicted depth map
    l_edges = mean_abs_diff(dy_pred, dy_true) + mean_abs_diff(dx_pred, dx_true) # Compute the mean absolute difference between the gradients of the predicted and ground truth depth maps

    l_ssim = K.clip((1 - tf.image.ssim(y_true, y_pred, maxDepthVal)) * 0.5, 0, 1) # Compute the structural similarity index (SSIM) between the predicted and ground truth depth maps

    # Compute the final loss by combining the mean absolute differences and the SSIM
    return l_ssim + K.mean(l_edges) + theta * K.mean(l_depth)

# **Evaluate**

In [None]:
# Load test data
def load_test_data():
    # Load the test data from a ZIP archive
    print('Loading test data...', end='')

    def extract_zip(input_zip):
        with ZipFile(input_zip) as zf:
            return {name: zf.read(name) for name in zf.namelist()}

    data = extract_zip('nyu_test.zip')

    def load_data_from_zip(key):
        return np.load(BytesIO(data[key]))

    rgb = load_data_from_zip('eigen_test_rgb.npy')
    depth = load_data_from_zip('eigen_test_depth.npy')
    crop = load_data_from_zip('eigen_test_crop.npy')

    print('Test data loaded.\n')

    return rgb, depth, crop

In [None]:
def DepthNorm(x, maxDepth):
    return maxDepth / x

In [None]:
def predict(model, images, minDepth=10, maxDepth=1000, batch_size=2):
    # Use the trained depth estimation model to make predictions on input images
    def preprocess_images(images):
        if images.ndim == 2:
            images = np.stack((images, images, images), axis=2)
        if images.ndim == 3:
            images = images[np.newaxis, ...]
        return images

    images = preprocess_images(images)
    predictions = model.predict(images, batch_size=batch_size)

    return np.clip(DepthNorm(predictions, maxDepth=1000), minDepth, maxDepth) / maxDepth

In [None]:
def scale_up(scale, images):
    # Scale up the images
    def resize_image(img, scale):
        output_shape = (scale * img.shape[0], scale * img.shape[1])
        return resize(img, output_shape, order=1, preserve_range=True, mode='reflect', anti_aliasing=True)

    return np.array([resize_image(img, scale) for img in images])

In [None]:
def evaluate(model, rgb, depth, crop, batch_size=6):
    # Evaluate the performance of the depth estimation model
    def compute_errors(gt, pred):
        thresh = np.maximum((gt / pred), (pred / gt))
        a1, a2, a3 = [(thresh < 1.25 ** i).mean() for i in range(1, 4)]
        abs_rel = np.mean(np.abs(gt - pred) / gt)
        rmse = np.sqrt(np.mean((gt - pred) ** 2))
        log_10 = np.mean(np.abs(np.log10(gt) - np.log10(pred)))
        return a1, a2, a3, abs_rel, rm
    depth_scores = np.zeros((6, len(rgb)))

    for i in range(0, len(rgb), batch_size):
        x = rgb[i:i + batch_size]
        true_y = depth[i:i + batch_size]
        pred_y = 10 * scale_up(2, predict(model, x / 255)[:, :, :, 0])
        pred_y_flip = 10 * scale_up(2, predict(model, x[..., ::-1, :] / 255)[:, :, :, 0])

        true_y = true_y[:, crop[0]:crop[1] + 1, crop[2]:crop[3] + 1]
        pred_y = pred_y[:, crop[0]:crop[1] + 1, crop[2]:crop[3] + 1]
        pred_y_flip = pred_y_flip[:, crop[0]:crop[1] + 1, crop[2]:crop[3] + 1]

        for j, (gt, p, pf) in enumerate(zip(true_y, pred_y, pred_y_flip)):
            depth_scores[:, i + j] = compute_errors(gt, 0.5 * (p + np.fliplr(pf)))

    mean_errors = depth_scores.mean(axis=1)

    print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format('a1', 'a2', 'a3', 'rel', 'rms', 'log_10'))
    print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors))

In [None]:
# set fitting parameters
batch_size     = 8
learning_rate  = 0.0001
epochs         = 10

# model creation
model = DepthEstimate()

dl = DataLoader()
train_generator = dl.get_batched_dataset(batch_size)

print('Data loader ready.')

optimizer = tf.keras.optimizers.Adam(lr=learning_rate, amsgrad=True)

# model compilation using custom loss function and the Adam optimizer
model.compile(loss=depth_loss_function, optimizer=optimizer)

# checkpoint saving, used for evaluation
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, verbose=1)

# model fitting
model.fit(train_generator, epochs=5, steps_per_epoch=dl.length//batch_size, callbacks=[cp_callback])