In [None]:
!pip install segmentation-models



In [None]:
import random
import cv2
import numpy as np
# import imutils
import os

import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import EfficientNetB7, EfficientNetB6, EfficientNetB4, EfficientNetB3
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import backend as K
import segmentation_models as sm

Segmentation Models: using `keras` framework.


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
print(tf.__version__)


2.17.1


In [None]:
# Constants

VAL_SIZE = 0.3
HEIGHT = 256
WIDTH = 256
# HEIGHT = 240
# WIDTH = 320
BATCH_SIZE = 16
EPOCHS = 15
INIT_LR = 1e-4
# BACKBONE = "EfficientNetB4"
BACKBONE = "efficientnetb4"
FREEZE_BACKBONE = True
DECODER_FILTERS  = [256, 128, 64, 32, 16]
DEPTH_LOSS = [1.0, 1.0, 0.3]

TRAIN_PATH = "./data/nyu2_train.csv"
TEST_PATH = "./data/nyu2_test.csv"

BASE_DIR = '/content/drive/My Drive/Tugas/smt7/Computer Vision/Tubes CompVis'
HISTORY_SAVE_PATH = BASE_DIR + '/training results/tf_' + BACKBONE + '_4_.csv'
MODEL_SAVE_PATH = BASE_DIR + '/training results/tf_' + BACKBONE + '_4_.keras'

In [None]:
os.makedirs(BASE_DIR + '/training results', exist_ok=True)

## Data Preparation

In [None]:
# download nyu depth v2 dataset from gitlab
!git clone https://gitlab.com/siddinc/new_depth.git ./data

fatal: destination path './data' already exists and is not an empty directory.


In [None]:
# loading the dataset as img and depth map filepaths

import csv

def read_csv(csv_file_path):
  with open(csv_file_path, 'r') as f:
    csv_reader = csv.reader(f, delimiter=',')
    return [('./' + row[0], './' + row[1]) for row in csv_reader if len(row) > 0]

def train_val_split(train_paths, val_size):
  random.shuffle(train_paths)
  len_train_paths = len(train_paths)
  i = int(len_train_paths*(1.0 - val_size))
  train = train_paths[0:i]
  val = train_paths[i:len(train_paths)]
  return train, val

def load_train_paths(train_path):
  train_paths = read_csv(train_path)
  labels = {img_path: dm_path for img_path, dm_path in train_paths}
  x_paths = [img_path for img_path, dm in train_paths]
  x_train_paths, x_val_paths = train_val_split(x_paths, VAL_SIZE)

  partition = {
    'train': x_train_paths,
    'validation': x_val_paths
  }
  return partition, labels

In [None]:
# preprocessing the dataset

def normalize_img(img):
    norm_img = (img - img.min()) / (img.max() - img.min())
    return norm_img

def preprocess_image(img_path, horizontal_flip=False):
  image = cv2.imread(img_path)
  if image is None:
    print(f"Error: Could not load image at {img_path}")
    # Handle the error appropriately, e.g., return a default image or skip this sample
    return np.zeros((HEIGHT, WIDTH, 3), dtype=np.float32)
  image = cv2.resize(image, (WIDTH, HEIGHT), interpolation=cv2.INTER_NEAREST)
  image = image.astype("float")
  image = normalize_img(image)

  if horizontal_flip:
    image = cv2.flip(image, 1)
  return image

def preprocess_depth_map(depth_map_path, horizontal_flip=False):
  depth_map = cv2.imread(depth_map_path)
  depth_map = cv2.cvtColor(depth_map, cv2.COLOR_BGR2GRAY)
  depth_map = cv2.resize(depth_map, (WIDTH, HEIGHT), interpolation=cv2.INTER_NEAREST)
  depth_map = depth_map.astype("float")
  depth_map = normalize_img(depth_map)

  if horizontal_flip:
    depth_map = cv2.flip(depth_map, 1)

  depth_map = np.reshape(depth_map, (depth_map.shape[0], depth_map.shape[1], 1))
  return depth_map

In [None]:
# data generator

from tensorflow.keras.utils import Sequence

class DataGenerator(Sequence):
  def __init__(self, list_IDs, labels, batch_size=16, dim=(128,128), n_channels=3, shuffle=True, pred=False):
    self.dim = dim
    self.batch_size = batch_size
    self.labels = labels
    self.list_IDs = list_IDs
    self.n_channels = n_channels
    self.shuffle = shuffle
    self.pred = pred
    self.on_epoch_end()

  def __len__(self):
    return int(np.floor(len(self.list_IDs) / self.batch_size))

  def __getitem__(self, index):
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
    list_IDs_temp = [self.list_IDs[k] for k in indexes]
    if self.pred:
      X = self.__data_generation(list_IDs_temp)
      return X
    X, y = self.__data_generation(list_IDs_temp)
    return X, y

  def on_epoch_end(self):
    self.indexes = np.arange(len(self.list_IDs))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)

  def __data_generation(self, list_IDs_temp):
    X = np.empty((self.batch_size, self.dim[0], self.dim[1],self.n_channels))

    if not self.pred:
      y = np.empty((self.batch_size, self.dim[0], self.dim[1], 1))

      for i, ID in enumerate(list_IDs_temp):
        res = random.choice([True, False])
        X[i,] = preprocess_image(ID, res)
        y[i,] = preprocess_depth_map(self.labels[ID], res)
      return X, y
      # return tf.cast(X, tf.float32), tf.cast(y, tf.float32)
    else:
      for i, ID in enumerate(list_IDs_temp):
        res = random.choice([True, False])
        X[i,] = preprocess_image(ID, res)
      return X
      # return tf.cast(X, tf.float32)

In [None]:
partition, labels = load_train_paths(TRAIN_PATH)
print(len(partition['train']), len(partition['validation']))

22437 9617


In [None]:
training_generator = DataGenerator(list_IDs=partition['train'], labels=labels, batch_size=BATCH_SIZE, dim=(HEIGHT, WIDTH), n_channels=3, shuffle=True, pred=False)
validation_generator = DataGenerator(list_IDs=partition['validation'], labels=labels, batch_size=BATCH_SIZE, dim=(HEIGHT, WIDTH), n_channels=3, shuffle=True, pred=False)

## Modelling

In [None]:
# Define model
model = sm.Unet(
    backbone_name=BACKBONE,
    input_shape=(HEIGHT, WIDTH, 3),
    classes=1, # Single-channel output for depth estimation
    activation='relu', # Linear activation for regression
    encoder_weights='imagenet',
    encoder_freeze=FREEZE_BACKBONE
)

In [None]:
# model = build_unet(input_shape=(HEIGHT, WIDTH, 3), backbone_name=BACKBONE, pretrained_weights="imagenet")

In [None]:
from io import StringIO
import csv
# Capture the model summary
def save_model_summary_to_csv(model, csv_filename):
    # Capture summary in a StringIO object
    stream = StringIO()
    model.summary(print_fn=lambda x: stream.write(x + '\n'))

    # Move back to the start of the stream
    stream.seek(0)

    # Write to a CSV file
    with open(csv_filename, 'w', newline='') as f:
        writer = csv.writer(f)
        for line in stream.readlines():
            writer.writerow([line.strip()])

# Save the model summary
save_model_summary_to_csv(model, 'model_summary.csv')

print("Model summary saved to 'model_summary.csv'")


Model summary saved to 'model_summary.csv'


In [None]:
model.summary()

## Training

In [None]:
# Custom callback

class MetricsToCSVCallback(tf.keras.callbacks.Callback):
  def __init__(self, csv_file_path):
    super(MetricsToCSVCallback, self).__init__()
    self.csv_file_path = csv_file_path
    self.csv_file = None
    self.csv_writer = None
    self.header_written = False

  def on_epoch_end(self, epoch, logs=None):
    logs = logs or {}
    row = {'epoch': epoch + 1, **logs}

    # Write to CSV
    if not self.header_written:
      # Open file and write header if it doesn't exist
      self.csv_file = open(self.csv_file_path, mode='w', newline='')
      self.csv_writer = csv.DictWriter(self.csv_file, fieldnames=row.keys())
      self.csv_writer.writeheader()
      self.header_written = True

    # Write row
    self.csv_writer.writerow(row)
    self.csv_file.flush()  # Ensure the data is written immediately

  def on_train_end(self, logs=None):
    if self.csv_file:
      self.csv_file.close()

In [None]:
# custom loss
def depth_loss(y_true, y_pred):
  w1, w2, w3 = DEPTH_LOSS[0], DEPTH_LOSS[1], DEPTH_LOSS[2]

  l_depth = K.mean(K.abs(y_pred - y_true), axis=-1)

  dy_true, dx_true = tf.image.image_gradients(y_true)
  dy_pred, dx_pred = tf.image.image_gradients(y_pred)
  l_edges = K.mean(K.abs(dy_pred - dy_true) + K.abs(dx_pred - dx_true), axis=-1)

  l_ssim = K.clip((1 - tf.image.ssim(y_true, y_pred, 1.0)) * 0.5, 0, 1)

  return (w1 * l_ssim) + (w2 * K.mean(l_edges)) + (w3 * K.mean(l_depth))

#custom soft accuracy
def depth_acc(y_true, y_pred):
  # Ensure y_true and y_pred have the same data type
  y_true = tf.cast(y_true, tf.float32)
  y_pred = tf.cast(y_pred, tf.float32)
  return K.mean(K.equal(K.round(y_true), K.round(y_pred)))

In [None]:
# Instantiate callback
save_history_callback = MetricsToCSVCallback(HISTORY_SAVE_PATH)

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=MODEL_SAVE_PATH,
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)

def poly_decay(epoch):
  maxEpochs = EPOCHS
  baseLR = INIT_LR
  power = 1.0
  alpha = baseLR * (1 - (epoch / float(maxEpochs))) ** power
  return alpha

callbacks = [LearningRateScheduler(poly_decay),
             save_history_callback,
             model_checkpoint]

In [None]:
opt = Adam(learning_rate=INIT_LR, amsgrad=True)
model.compile(optimizer=opt, loss=depth_loss, metrics=[depth_acc])

In [None]:
import time

start_time = time.time()

history = model.fit(training_generator, validation_data=validation_generator, epochs=EPOCHS, callbacks=callbacks)

training_time = time.time() - start_time

Epoch 1/15


  self._warn_if_super_not_called()


[1m1402/1402[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 304ms/step - depth_acc: 0.6631 - loss: 0.3918
Epoch 1: val_loss improved from inf to 0.17551, saving model to /content/drive/My Drive/Tugas/smt7/Computer Vision/Tubes CompVis/training results/tf_efficientnetb4_4_.keras
[1m1402/1402[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m691s[0m 382ms/step - depth_acc: 0.6632 - loss: 0.3917 - val_depth_acc: 0.8326 - val_loss: 0.1755 - learning_rate: 1.0000e-04
Epoch 2/15
[1m1402/1402[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303ms/step - depth_acc: 0.8272 - loss: 0.1728
Epoch 2: val_loss improved from 0.17551 to 0.15070, saving model to /content/drive/My Drive/Tugas/smt7/Computer Vision/Tubes CompVis/training results/tf_efficientnetb4_4_.keras
[1m1402/1402[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m520s[0m 370ms/step - depth_acc: 0.8272 - loss: 0.1728 - val_depth_acc: 0.8613 - val_loss: 0.1507 - learning_rate: 9.3333e-05
Epoch 3/15
[1m1402/1402[0m [32m━━━

In [None]:
print(f"Training time: {training_time} seconds")

Training time: 8103.292143583298 seconds


In [None]:
# Save manually for backup
model.save(BASE_DIR + '/training results/tf_' + BACKBONE + '_backup.keras')

In [None]:
from tensorflow.keras.models import load_model

r = tf.keras.models.load_model(BASE_DIR + '/training results/tf_efficientnetb4_4_best.keras', custom_objects={'depth_loss': depth_loss, 'depth_acc': depth_acc})

ValueError: File not found: filepath=/content/drive/My Drive/Tugas/smt7/Computer Vision/Tubes CompVis/training results/tf_efficientnetb4_4_best.keras. Please ensure the file is an accessible `.keras` zip file.

In [None]:
img_dm_pairs = read_csv('./data/nyu2_test.csv')
labels = {i: j for i,j in img_dm_pairs}
test_paths = [i for i,j in img_dm_pairs]
print(len(test_paths))
partition = {'test': test_paths}

In [None]:
img_dm_pairs = read_csv('./data/nyu2_train.csv')
labels2 = {i: j for i,j in img_dm_pairs}
train_paths2 = [i for i,j in img_dm_pairs]
print(len(train_paths2))
partition2 = {'train': train_paths2}

In [None]:
x_test = np.empty((len(test_paths), HEIGHT, WIDTH, 3))
y_test = np.empty((len(test_paths), HEIGHT, WIDTH, 1))

for i, ID in enumerate(partition['test'][:]):
  x_test[i, ] = preprocess_image(ID, horizontal_flip=False)
  y_test[i, ] = preprocess_depth_map(labels[ID], horizontal_flip=False)

In [None]:
print(r.evaluate(x_test, y_test))

In [None]:
# print(model.evaluate(x_test, y_test))

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("rmzhang0526/nyu-depth-v2-labeled")

print("Path to dataset files:", path)

In [None]:
# egen data generate
import h5py
import numpy as np

# Load the .mat file (adjust the file path as needed)
with h5py.File(path + '/nyu_depth_v2_labeled.mat', 'r') as file:
    # Access the data
    rgb_images = np.array(file['images'])  # Shape: (N, 3, 480, 640)
    depth_maps = np.array(file['depths'])  # Shape: (N, 480, 640)

# Transpose rgb_images to shape (N, 480, 640, 3) for better compatibility
rgb_images = rgb_images.transpose(0, 3, 2, 1)
depth_maps = depth_maps.transpose(0, 2, 1)

# Eigen test indices (replace with actual indices)
#984
# eigen_test_indices = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984])  # Replace with the actual indices you want to use
# 654
eigen_test_indices = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653])
# 100
# eigen_test_indices = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100])

eigen_test_rgb = rgb_images[eigen_test_indices]
eigen_test_depth = depth_maps[eigen_test_indices]

# Define crop values (Eigen et al. crop values)
crop = [20, 460, 24, 616]  # [y_start, y_end, x_start, x_end]

# Crop the depth maps based on Eigen crop values
eigen_test_depth_cropped = eigen_test_depth[:, crop[0]:crop[1]+1, crop[2]:crop[3]+1]

# Save the generated arrays to .npy files
np.save('eigen_test_rgb.npy', eigen_test_rgb)
np.save('eigen_test_depth.npy', eigen_test_depth_cropped)
np.save('eigen_test_crop.npy', crop)

print("Eigen RGB, depth, and crop files generated and saved.")

In [None]:
from tensorflow.keras.layers import AveragePooling2D
from sklearn.metrics import r2_score

r2_scores = []

# Load test data
def load_test_data():
    print('Loading test data...', end='')

    rgb = np.load('eigen_test_rgb.npy')
    depth = np.load('eigen_test_depth.npy')
    crop = np.load('eigen_test_crop.npy')
    print('Test data loaded.\n')

    return rgb, depth, crop

def DepthNorm(x, maxDepth):
    return maxDepth / x

def predict(model, images, minDepth=10, maxDepth=1000, batch_size=6):
    # Support multiple RGBs, one RGB image, even grayscale

    if len(images.shape) < 3: images = np.stack((images,images,images), axis=2)
    if len(images.shape) < 4: images = images.reshape((1, images.shape[0], images.shape[1], images.shape[2]))
    # Compute predictions
    images = tf.image.resize(images, [HEIGHT, WIDTH])
    predictions = model.predict(images, batch_size=batch_size)

    return predictions

def scale_up(scale, images):
    from skimage.transform import resize
    scaled = []
    for i in range(len(images)):
        img = images[i]
        output_shape = (scale * img.shape[0], scale * img.shape[1])
        scaled.append( resize(img, output_shape, order=1, preserve_range=True, mode='reflect', anti_aliasing=True ) )
    return np.stack(scaled)

def evaluate(model, rgb, depth, crop, batch_size=6):
    def compute_errors(gt, pred):
        # print(f"GT: min={gt.min()}, max={gt.max()}, mean={gt.mean()}")
        # print(f"Pred: min={pred.min()}, max={pred.max()}, mean={pred.mean()}")
        epsilon = 1e-6
        pred = cv2.resize(pred, (gt.shape[1], gt.shape[0]))
        gt = np.where(gt > 0, gt, epsilon)
        pred = np.where(pred > 0, pred, epsilon)

        gt_pool = np.expand_dims(np.expand_dims(gt, axis=0), axis=-1)  # Shape: (1, H, W, 1)
        pred_pool = np.expand_dims(np.expand_dims(pred, axis=0), axis=-1)  # Shape: (1, H, W, 1)
        gt_pool = AveragePooling2D(pool_size=(7, 7), strides=(2, 2))(gt_pool)
        pred_pool = AveragePooling2D(pool_size=(7, 7), strides=(2, 2))(pred_pool)
        gt_pool = np.squeeze(gt_pool)  # Shape: (H_new, W_new)
        pred_pool = np.squeeze(pred_pool)  # Shape: (H_new, W_new)
        # Flatten the depth maps
        ground_truth_flat = gt_pool.flatten()
        predicted_flat = pred_pool.flatten()

        # Compute R^2 score for the current image and store it
        r2 = r2_score(ground_truth_flat, predicted_flat)
        r2_scores.append(r2)
        thresh = np.maximum((gt / pred), (pred / gt))

        a1 = (thresh < 1.25   ).mean()
        a2 = (thresh < 1.25 ** 2).mean()
        a3 = (thresh < 1.25 ** 3).mean()

        abs_rel = np.mean((np.abs(gt - pred) / gt))

        rmse = (gt - pred) ** 2
        rmse = np.sqrt(rmse.mean())

        log_10 = (np.abs(np.log10(gt)-np.log10(pred))).mean()

        return a1, a2, a3, abs_rel, rmse, log_10

    depth_scores = np.zeros((6, len(rgb))) # six metrics

    bs = batch_size

    for i in range(len(rgb)//bs):    #len(rgb)//bs
        x = rgb[(i)*bs:(i+1)*bs,:,:,:]
        # Compute results
        true_y = depth[(i)*bs:(i+1)*bs,:,:]
        pred_y = scale_up(2, predict(model, x/255, minDepth=10, maxDepth=1000, batch_size=bs)[:,:,:,0]) * 10.0
        # Test time augmentation: mirror image estimate
        pred_y_flip = scale_up(2, predict(model, x[...,::-1,:]/255, minDepth=10, maxDepth=1000, batch_size=bs)[:,:,:,0]) * 10.0

        pred_y = pred_y[:,crop[0]:crop[1]+1, crop[2]:crop[3]+1]

        # Compute errors per image in batch
        for j in range(len(true_y)):
#             errors = compute_errors(true_y[j], (0.5 * pred_y[j]) + (0.5 * np.fliplr(pred_y_flip[j])))
            errors = compute_errors((true_y[j]), (0.75 * pred_y[j]))
            for k in range(len(errors)):
                depth_scores[k][(i*bs)+j] = errors[k]

    e = depth_scores.mean(axis=1)

    print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format('a1', 'a2', 'a3', 'rel', 'rms', 'log_10'))
    print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(e[0],e[1],e[2],e[3],e[4],e[5]))

In [None]:
rgb, depth, crop = load_test_data()

In [None]:
evaluate(r, rgb, depth, crop)

# Ignore me

In [None]:
from tensorflow.keras.applications import EfficientNetV2L

inspect_backbone = EfficientNetV2L(include_top=False, input_shape=(HEIGHT, WIDTH, 3))

inspect_backbone.summary()

In [None]:
import pandas as pd

# Load Dataset from CSV
def load_data_from_csv(csv_file, base_dir):
  images = []
  depths = []

  # Read the CSV file
  df = pd.read_csv(csv_file, header=None)
  df = df[:100]
  img_paths = df[0].values
  depth_paths = df[1].values

  for img_path, depth_path in zip(img_paths, depth_paths):
    # Load the image
    img = cv2.imread(os.path.join(base_dir, img_path))
    depth = cv2.imread(os.path.join(base_dir, depth_path), cv2.IMREAD_GRAYSCALE)

    if img is None or depth is None:
      print(f"Error loading {img_path} or {depth_path}")
      continue

    # Resize to match the model's input dimensions
    img = cv2.resize(img, (WIDTH, HEIGHT))
    depth = cv2.resize(depth, (WIDTH, HEIGHT))

    # Normalize images and depth maps
    images.append(img / 255.0)
    depths.append(depth / 255.0)

  return np.array(images), np.array(depths).reshape(-1, HEIGHT, WIDTH, 1)

In [None]:
from matplotlib import pyplot as plt
import random

def visualize_inference(model, images, depths, index=None):
  """
  Visualizes a random prediction from the test set.
  """
  # Choose a random index if not provided
  if index is None:
    index = random.randint(0, len(images) - 1)

  # Get the input image and corresponding ground truth depth
  input_image = images[index]
  true_depth = depths[index].squeeze()  # Remove single channel dimension for visualization

  # Predict depth using the model
  predicted_depth = model.predict(input_image[np.newaxis, ...])[0].squeeze()

  # Plot input image, ground truth, and predicted depth
  plt.figure(figsize=(15, 5))

  plt.subplot(1, 3, 1)
  plt.imshow(input_image)
  plt.title("Input Image")
  plt.axis("off")

  plt.subplot(1, 3, 2)
  plt.imshow(true_depth, cmap='viridis')
  plt.title("Ground Truth Depth")
  plt.axis("off")

  plt.subplot(1, 3, 3)
  plt.imshow(predicted_depth, cmap='viridis')
  plt.title("Predicted Depth")
  plt.axis("off")

  plt.show()

# Prepare the test data
asdfimages_test, asdfdepths_test = load_data_from_csv(TEST_PATH, './')

In [None]:
visualize_inference(r, asdfimages_test, asdfdepths_test)

In [None]:
print("Evaluate")
result = model.evaluate(validation_generator)
dict(zip(model.metrics_names, result))