In [1]:
# Settings
SPLIT = 0 #-1
NUM_OF_CLASSES = 35
IMAGE_SIZE_REDUCE_FACTOR = 3
PREV_MODEL_NAME = "segformerb3"

# Hyperparams 
BATCH_SIZE = 4
LEARNING_RATE = 0.00001
NUM_OF_INTERMEDIATE = 3

In [2]:
# Load predictions
import numpy as np

# files = "Processed Data/images_"
# with np.load(files + "%d_predictions_%d_%s.npz" % (SPLIT, IMAGE_SIZE_REDUCE_FACTOR, PREV_MODEL_NAME)) as data:
#     x_train = data["train"].astype(np.float16)
#     x_val = data["val"].astype(np.float16)
#     x_test = data["test"].astype(np.float16)

In [3]:
# Load Ontology
import pandas as pd

ontology = pd.read_csv("Rellis-3D/ontology.csv")[["class_name", "output_value", "display_color"]].values.tolist()
colors = {v[0]: v[2] for v in ontology}
ontology = {v[0]: v[1] for v in ontology}

# Remove extra classes
del ontology["void"]
# del ontology["dirt"]
del ontology["uphill"]
del ontology["downhill"]

# Process colors
colors = {c: (int(colors[c][1:3], 16), int(colors[c][3:5], 16), int(colors[c][5:7], 16)) for c in ontology.keys()}

# Convert ontology to color map
ontology = list(ontology.values())
ontology = [i in ontology for i in range(NUM_OF_CLASSES)]
    
NUM_OF_CLASSES = ontology.count(True)

In [4]:
# Process y images
# from pickle import load
# from multiprocessing import Pool

# def pre_process_images(img):
#     return img[1][:, :, ontology]

# y_train = load(open(files + "train_%d.pickle" % SPLIT, "rb"))

# # Normalize images
# print("Processing train")
# with Pool(64) as p:
#     y_train = p.map(pre_process_images, y_train)
# y_train = np.array(y_train).reshape((len(y_train),) + y_train[0].shape).astype(np.float16)
    
# y_val = load(open(files + "val_%d.pickle" % SPLIT, "rb"))

# # Normalize images
# print("Processing val")
# with Pool(64) as p:
#     y_val = p.map(pre_process_images, y_val)
# y_val = np.array(y_val).reshape((len(y_val),) + y_val[0].shape).astype(np.float16)
    
# y_test = load(open(files + "test_%d.pickle" % SPLIT, "rb"))

# # Normalize images
# print("Processing test")
# with Pool(64) as p:
#     y_test = p.map(pre_process_images, y_test)
# y_test = np.array(y_test).reshape((len(y_test),) + y_test[0].shape).astype(np.float16)

In [5]:
# Load pre-training data
import os
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

files = "Processed Data/"

with np.load(files + "%s_%d.npz" % ("train", SPLIT)) as data:
    x_train, y_train = data["img_oh_ds"].astype(np.float16), data["img_oh"].astype(np.float16)
    
with np.load(files + "%s_%d.npz" % ("val", SPLIT)) as data:
    x_val, y_val = data["img_oh_ds"].astype(np.float16), data["img_oh"].astype(np.float16)

with np.load(files + "%s_%d.npz" % ("test", SPLIT)) as data:
    x_test, y_test = data["img_oh_ds"].astype(np.float16), data["img_oh"].astype(np.float16)

In [6]:
# This class streams data to the model
# https://stackoverflow.com/a/71592809
from tensorflow.keras.utils import Sequence

class DataGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y

train = DataGenerator(x_train, y_train, BATCH_SIZE)
val = DataGenerator(x_val, y_val, BATCH_SIZE)
test = DataGenerator(x_test, y_test, BATCH_SIZE)

2023-05-31 23:40:23.344022: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-31 23:40:23.383933: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
import tensorflow as tf

for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

In [8]:
# Create a distribution strategy
# https://www.tensorflow.org/guide/distributed_training

# dist_strategy = tf.distribute.experimental.CentralStorageStrategy()
# dist_strategy = tf.distribute.MirroredStrategy()
dist_strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")

2023-05-31 23:40:25.599337: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:227] Using CUDA malloc Async allocator for GPU: 0
2023-05-31 23:40:25.599424: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22086 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:18:00.0, compute capability: 8.9
2023-05-31 23:40:25.600080: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:227] Using CUDA malloc Async allocator for GPU: 1
2023-05-31 23:40:25.600130: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22275 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:af:00.0, compute capability: 8.6


In [9]:
# Create upsizer
from tensorflow.keras import layers, models, optimizers, metrics

# https://stackoverflow.com/a/71310084 says to put this here
# with dist_strategy.scope():
upsizer = models.Sequential()

# For some reason, this is really slow.  Maybe x_train[0].shape should be in parahentesies?  For some reason the input layer doesnt show up
upsizer.add(layers.Input(shape=(x_train[0].shape)))
upsizer.add(layers.Conv2D(NUM_OF_CLASSES * 2, 3, activation="elu", padding="same"))

for _ in range(NUM_OF_INTERMEDIATE):
    upsizer.add(layers.Conv2D(NUM_OF_CLASSES * 4, 3, activation="elu", padding="same"))

upsizer.add(layers.Conv2DTranspose(NUM_OF_CLASSES * 4, 1, strides=IMAGE_SIZE_REDUCE_FACTOR, padding="same"))

for _ in range(NUM_OF_INTERMEDIATE):
    upsizer.add(layers.Conv2D(NUM_OF_CLASSES * 4, 3 * IMAGE_SIZE_REDUCE_FACTOR, activation="elu", padding="same"))

upsizer.add(layers.Conv2D(NUM_OF_CLASSES, 3 * IMAGE_SIZE_REDUCE_FACTOR, activation="softmax", padding="same"))

opt = optimizers.Adam(learning_rate=LEARNING_RATE)

model_metrics = [
    metrics.TopKCategoricalAccuracy(k=1, name='Top 1 Accuracy'),
    metrics.TopKCategoricalAccuracy(k=3, name='Top 3 Accuracy'),
    metrics.TopKCategoricalAccuracy(k=5, name='Top 5 Accuracy'),
    metrics.MeanIoU(num_classes=NUM_OF_CLASSES, sparse_y_true=False, sparse_y_pred=False)
]

#     upsizer.compile(opt, loss="categorical_crossentropy", metrics=model_metrics)
upsizer.compile(opt, loss="categorical_focal_crossentropy", metrics=model_metrics)

upsizer.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 400, 640, 38)      6536      
                                                                 
 conv2d_1 (Conv2D)           (None, 400, 640, 76)      26068     
                                                                 
 conv2d_2 (Conv2D)           (None, 400, 640, 76)      52060     
                                                                 
 conv2d_3 (Conv2D)           (None, 400, 640, 76)      52060     
                                                                 
 conv2d_transpose (Conv2DTr  (None, 1200, 1920, 76)    5852      
 anspose)                                                        
                                                                 
 conv2d_4 (Conv2D)           (None, 1200, 1920, 76)    467932    
                                                        

In [None]:
# Fit model
from tensorflow.keras.callbacks import EarlyStopping

upsizer.fit(
    train,
    batch_size = BATCH_SIZE,
    epochs = 100,
    callbacks = [EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)],
    validation_data = val,
    validation_batch_size = BATCH_SIZE
)

Epoch 1/100


2023-05-31 23:40:26.244529: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


In [None]:
# Calculate metrics on the test set
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

upsizer.evaluate(test)
y_pred = upsizer.predict(x_test, batch_size=BATCH_SIZE)

reshape_x, reshape_y = y_test.shape[0] * y_test.shape[1] * y_test.shape[2], y_test.shape[3]
reshape_x, reshape_y = np.argmax(y_test.reshape(reshape_x, reshape_y), axis=-1), np.argmax(y_pred.reshape(reshape_x, reshape_y), axis=-1)

# Classification Report
print(classification_report(reshape_x, reshape_y, target_names=ontology_labels))

# Confusion Matrix
con_matrix = confusion_matrix(reshape_x, reshape_y)
con_disp = ConfusionMatrixDisplay(confusion_matrix=con_matrix, display_labels=ontology_labels)
con_disp.plot()
plt.show()

# IoU Per Class
false_neg, false_pos = np.sum(con_matrix, axis=0), np.sum(con_matrix, axis=1)
print("Class\t\t\tIoU")
for i, clas in enumerate(ontology_labels):
    tp = con_matrix[i, i]
    iou = tp / (tp+ (false_pos[i] - tp) + (false_neg[i] - tp))
    print("%s\t\t\t%.2f" % (clas, iou * 100))

# Latest run: loss: 0.4106 - Top 1 Accuracy: 0.8647 - Top 3 Accuracy: 0.9856 - Top 5 Accuracy: 0.9921 - mean_io_u: 0.2326

# Top runs:
# Densenet: loss: 0.4264 - Top 1 Accuracy: 0.8653 - Top 3 Accuracy: 0.9846 - Top 5 Accuracy: 0.9921 - mean_io_u: 0.2334
# Nvidia: loss: 0.4958 - Top 1 Accuracy: 0.8499 - Top 3 Accuracy: 0.9796 - Top 5 Accuracy: 0.9917 - mean_io_u: 0.2415
# Nvidia b3: loss: 0.3464 - Top 1 Accuracy: 0.8948 - Top 3 Accuracy: 0.9865 - Top 5 Accuracy: 0.9904 - mean_io_u_1: 0.2907

In [None]:
# Uncomment and save if best
from tensorflow import saved_model

saved_model.save(upsizer, "Saved Models/best_upsizer_%s" % PREV_MODEL_NAME)

In [None]:
# Display 5 test images
import matplotlib.pyplot as plt

def pre_process_rgb_img(img):
    return img[0].astype(np.float16)/255

rbg_test = load(open(files + "train_%d.pickle" % SPLIT, "rb"))

# Normalize images
print("Processing train")
with Pool(64) as p:
    rbg_test = p.map(pre_process_rgb_img, rbg_test[:10])
rbg_test = np.array(rbg_test).reshape((len(rbg_test),) + rbg_test[0].shape).astype(np.float16)

for original, image in zip(y_test[:10, :, :], upsizer.predict(x_test[:10, :, :])):
    colored_image = np.argmax(image, axis=-1)
    colored_image = np.array(list(colors.values()))[colored_image]
    plt.imshow(original.astype(np.float32))
    plt.imshow(colored_image, alpha=0.5)
    plt.show()