In [1]:
# Based on this https://github.com/huggingface/notebooks/blob/main/examples/semantic_segmentation-tf.ipynb
# PARAMS
SPLIT = -1
IMAGE_SIZE_REDUCE_FACTOR = 3
NUM_OF_CLASSES = 35
IMAGE_SHAPE = (1200//IMAGE_SIZE_REDUCE_FACTOR, 1920//IMAGE_SIZE_REDUCE_FACTOR)

# MODEL PARAMS
DROPOUT = 0.5
ENCODER_BLOCKS = 4 # If changed, need to add strides, etc
# https://huggingface.co/docs/transformers/v4.27.2/en/model_doc/segformer#transformers.SegformerConfig.depths
BATCH_SIZE = 8
LEARNING_RATE = 0.00001/5/2 # Added the over 2 because now were continuing training from before

In [2]:
# Load Ontology
import pandas as pd

ontology = pd.read_csv("Rellis-3D/ontology.csv")[["class_name", "output_value", "display_color"]].values.tolist()
colors = {v[0]: v[2] for v in ontology}
ontology = {v[0]: v[1] for v in ontology}

# Remove extra classes
del ontology["void"]
# del ontology["dirt"]
del ontology["uphill"]
del ontology["downhill"]

# Extra Ontology
# Prob should copy elsewhere
label2id = ontology
id2label = {v: k for k, v in label2id.items()}

# Process colors
colors = {c: (int(colors[c][1:3], 16), int(colors[c][3:5], 16), int(colors[c][5:7], 16)) for c in ontology.keys()}

# Convert ontology to color map
ontology = list(ontology.values())
ontology = [i in ontology for i in range(NUM_OF_CLASSES)]

NUM_OF_CLASSES = ontology.count(True)

print(label2id)

{'dirt': 1, 'grass': 3, 'tree': 4, 'pole': 5, 'water': 6, 'sky': 7, 'vehicle': 8, 'object': 9, 'asphalt': 10, 'building': 12, 'log': 15, 'person': 17, 'fence': 18, 'bush': 19, 'concrete': 23, 'barrier': 27, 'puddle': 31, 'mud': 33, 'rubble': 34}


In [3]:
# Code to deal with changes in files needed for segformer
import os
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

import tensorflow as tf
import numpy as np


def channel_first(images):
    return np.transpose(images, axes=[0, 3, 1, 2])

def convert_labels_to_argmaxes(images):
    return np.argmax(images, axis=-1)

2023-06-13 22:23:19.589329: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-13 22:23:19.627603: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Load and process images
files = "Processed Data/"

with np.load(files + "%s_%d.npz" % ("train", SPLIT)) as data:
    x_train, y_train = data["img_depth_ds"].astype(np.float16), data["img_oh_ds"].astype(np.float16)
    
with np.load(files + "%s_%d.npz" % ("val", SPLIT)) as data:
    x_val, y_val = data["img_depth_ds"].astype(np.float16), data["img_oh_ds"].astype(np.float16)

In [5]:
print(y_train.shape)
x_train, y_train = channel_first(x_train), convert_labels_to_argmaxes(y_train)
x_val, y_val = channel_first(x_val), convert_labels_to_argmaxes(y_val)

(3301, 400, 640, 19)


In [6]:
# Setup model
from transformers import SegformerConfig, SegformerImageProcessor

config = SegformerConfig(
                            num_channels=4,
                            hidden_dropout_prob=DROPOUT,
                            num_encoder_blocks=ENCODER_BLOCKS,
                            num_labels=NUM_OF_CLASSES,
                            id2label=id2label,
                            label2id=label2id,
                        )

preprocessor = SegformerImageProcessor(do_resize=False,
                                       size={"height": IMAGE_SHAPE[0], "width": IMAGE_SHAPE[1]},
                                       do_rescale=False,
                                       do_normalize=False,
                                       do_reduce_labels=False
                                      )

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
# Preprocess images
train = preprocessor.preprocess(x_train, segmentation_maps=y_train, return_tensors="np", data_format=None) # , data_format="channels_first")
val = preprocessor.preprocess(x_val, segmentation_maps=y_val, return_tensors="np", data_format=None) # , data_format="channels_first")

In [8]:
# This class streams data to the model
# https://stackoverflow.com/a/71592809
from tensorflow.keras.utils import Sequence

class DataGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y

train = DataGenerator(train["pixel_values"], train["labels"].astype(np.int8), BATCH_SIZE)
val = DataGenerator(val["pixel_values"], val["labels"].astype(np.int8), BATCH_SIZE)

In [9]:
# Create a distribution strategy
# https://www.tensorflow.org/guide/distributed_training

# dist_strategy = tf.distribute.experimental.CentralStorageStrategy()

In [10]:
# Create model
from transformers import TFSegformerForSemanticSegmentation
from tensorflow.keras import models, optimizers, losses

# with dist_strategy.scope():
# segformer = TFSegformerForSemanticSegmentation(config,) Want to try a pre-trained model
# segformer = TFSegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b3-finetuned-cityscapes-1024-1024",
#                                                                 num_channels=3,
#                                                                 hidden_dropout_prob=DROPOUT,
#                                                                 num_encoder_blocks=ENCODER_BLOCKS,
#                                                                 num_labels=NUM_OF_CLASSES,
#                                                                 id2label=id2label,
#                                                                 label2id=label2id,
#                                                                 ignore_mismatched_sizes=True
#                                                               )

# previous segformer
# segformer = TFSegformerForSemanticSegmentation.from_pretrained("Saved Models/segformer")

segformer = TFSegformerForSemanticSegmentation.from_pretrained("Saved Models/segformer-b3-depth")

# segformer = TFSegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b3",
#                                                                 num_channels=4,
# #                                                                 hidden_dropout_prob=DROPOUT,
#                                                                 num_encoder_blocks=ENCODER_BLOCKS,
#                                                                 num_labels=NUM_OF_CLASSES,
#                                                                 id2label=id2label,
#                                                                 label2id=label2id,
#                                                                 ignore_mismatched_sizes=True
#                                                               )

opt = optimizers.Adam(learning_rate=LEARNING_RATE)

categorical_focal = tf.keras.losses.CategoricalFocalCrossentropy()
def sparse_categorical_focal(y_true, y_pred):
    y_true = tf.one_hot(y_true, depth=NUM_OF_CLASSES, axis=1)
    scale_down = 4 # y_true.shape[2] // y_pred.shape[2]
    y_true = tf.nn.avg_pool(
        y_true, scale_down, scale_down, "VALID", data_format="NCHW", name=None
    )
    return categorical_focal(y_true, y_pred)

segformer.compile(opt, loss=sparse_categorical_focal) # No loss to use default huggingface loss

2023-06-13 22:27:53.495580: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:227] Using CUDA malloc Async allocator for GPU: 0
2023-06-13 22:27:53.495664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22051 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:18:00.0, compute capability: 8.9
2023-06-13 22:27:53.496290: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:227] Using CUDA malloc Async allocator for GPU: 1
2023-06-13 22:27:53.496335: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22266 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:af:00.0, compute capability: 8.6
2023-06-13 22:27:54.617322: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-06-13 22:27:55.092919: W tensorflow/compiler/xla/stream_executor/gpu/asm_compil



All model checkpoint layers were used when initializing TFSegformerForSemanticSegmentation.

All the layers of TFSegformerForSemanticSegmentation were initialized from the model checkpoint at Saved Models/segformer-b3-depth.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFSegformerForSemanticSegmentation for predictions without further training.


In [11]:
# My own metrics callback
# Some from here (show_predictions) https://keras.io/examples/vision/segformer/
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import metrics

class MetricCallback(Callback):
    def __init__(self, validation, patience=None, **kwargs):
        super().__init__(**kwargs)
        self.validation = validation
        self.metrics = [  
            metrics.SparseTopKCategoricalAccuracy(k=1, name='Top 1 Accuracy'),
            metrics.SparseTopKCategoricalAccuracy(k=3, name='Top 3 Accuracy'),
            metrics.SparseTopKCategoricalAccuracy(k=5, name='Top 5 Accuracy'),
            metrics.MeanIoU(num_classes=NUM_OF_CLASSES, sparse_y_true=True, sparse_y_pred=False, name='Mean IoU')
        ]
        
        # For early stopping
        self.patience = patience
        self.past_val_losses = []
        self.best = None
    
    def on_epoch_end(self, epoch, logs=None):
        self.run_metrics()
        self.early_stopping()
    
    def early_stopping(self):
        if self.patience:
            best_loss = min(self.past_val_losses)
            if best_loss == self.past_val_losses[-1]:
                self.best_weights = self.model.get_weights()
            elif best_loss not in self.past_val_losses[-self.patience:]:
                self.model.stop_training = True
                self.model.set_weights(self.best_weights)
            print("Best Val Loss: %.5f" % best_loss)
    
    def upscale_logits(self, pred_masks, size, np=True):
        pred_masks = tf.transpose(pred_masks, perm=[0, 2, 3, 1])
            
        pred_masks = tf.image.resize(
            pred_masks,
            size=size,
            method="bilinear",
        )
        
        if np:
            return pred_masks.numpy()
        return pred_masks
    
    def run_metrics(self):
        metrics_res = [[] for _ in self.metrics]
        
        for samples in self.validation:
            images, masks = samples[0], samples[1]
            pred_masks = self.model.predict(images, verbose=0).logits
            pred_masks = self.upscale_logits(pred_masks, samples[1].shape[1:], np=False)
            
#             pred_masks = tf.argmax(pred_masks, axis=-1)
            
            for metric in self.metrics:
                metric.update_state(y_true=masks, y_pred=pred_masks)
            
            # Class based metrics
            
        for metric in self.metrics:
                print("%s: %.5f" % (metric.name, metric.result().numpy()))
                metric.reset_state()
        
        val_loss = self.model.evaluate(self.validation, verbose=0)
        print("Val Loss: %.5f" % val_loss)
        self.past_val_losses.append(val_loss)
 

In [12]:
# Fit model
from tensorflow.keras.callbacks import EarlyStopping

segformer.fit(
    train,
    batch_size = BATCH_SIZE,
    epochs = 300,
    callbacks = [
        # EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
        MetricCallback(val, patience=5)
    ],
)

Epoch 1/300


2023-06-13 22:27:58.561839: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-06-13 22:28:46.240804: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape intf_segformer_for_semantic_segmentation/decode_head/dropout_84/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2023-06-13 22:28:59.698437: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.


Top 3 Accuracy: 0.78973
Top 5 Accuracy: 0.82476
Mean IoU: 0.09762


2023-06-13 22:33:55.897402: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 11.05243
Best Val Loss: 11.05243
Epoch 2/300
Top 3 Accuracy: 0.75966
Top 5 Accuracy: 0.77957
Mean IoU: 0.08984


2023-06-13 22:38:18.943554: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 11.00578
Best Val Loss: 11.00578
Epoch 3/300
Top 3 Accuracy: 0.55517
Top 5 Accuracy: 0.58233
Mean IoU: 0.04908


2023-06-13 22:42:34.587940: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 10.91402
Best Val Loss: 10.91402
Epoch 4/300
Top 3 Accuracy: 0.46178
Top 5 Accuracy: 0.50942
Mean IoU: 0.04777


2023-06-13 22:46:50.463841: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 11.67637
Best Val Loss: 10.91402
Epoch 5/300
Top 3 Accuracy: 0.37985
Top 5 Accuracy: 0.41167
Mean IoU: 0.04359


2023-06-13 22:51:06.641426: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 11.29208
Best Val Loss: 10.91402
Epoch 6/300
Top 3 Accuracy: 0.37015
Top 5 Accuracy: 0.39707
Mean IoU: 0.03764


2023-06-13 22:55:24.684557: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 11.10769
Best Val Loss: 10.91402
Epoch 7/300
Top 3 Accuracy: 0.42792
Top 5 Accuracy: 0.47295
Mean IoU: 0.03899


2023-06-13 22:59:38.023321: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 11.56352
Best Val Loss: 10.91402
Epoch 8/300
Top 3 Accuracy: 0.51872
Top 5 Accuracy: 0.55219
Mean IoU: 0.04973


2023-06-13 23:03:54.910893: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 11.37056
Best Val Loss: 10.91402


<keras.src.callbacks.History at 0x7fb0ec45b5b0>

In [13]:
# Model summary, just to see number of neurons
segformer.summary()

Model: "tf_segformer_for_semantic_segmentation"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 segformer (TFSegformerMain  multiple                  44075264  
 Layer)                                                          
                                                                 
 decode_head (TFSegformerDe  multiple                  3166483   
 codeHead)                                                       
                                                                 
Total params: 47241747 (180.21 MB)
Trainable params: 47240211 (180.21 MB)
Non-trainable params: 1536 (6.00 KB)
_________________________________________________________________


In [14]:
# Load test set
with np.load(files + "%s_%d.npz" % ("train", SPLIT)) as data:
    x_test, y_test = data["img_depth_ds"].astype(np.float16), data["img_oh_ds"].astype(np.float16)

x_test, y_test = channel_first(x_test), convert_labels_to_argmaxes(y_test)
test = preprocessor.preprocess(x_test, segmentation_maps=y_test, return_tensors="np", data_format=None)

# Create sequence
test = DataGenerator(test["pixel_values"], test["labels"].astype(np.int8), BATCH_SIZE)

In [15]:
# Evaluate
evaluator = MetricCallback(test)

evaluator.model = segformer
evaluator.run_metrics()

# Best stats:
"""
b2 ---
Top 1 Accuracy: 0.89751
Top 3 Accuracy: 0.99120
Top 5 Accuracy: 0.99471
Mean IoU: 0.34790
Val Loss: 0.32299

b3 --- Can prob improve with better hyperparams
Top 1 Accuracy: 0.91141
Top 3 Accuracy: 0.99060
Top 5 Accuracy: 0.99402
Mean IoU: 0.30426
Val Loss: 0.27632

b3-depth ---
Top 1 Accuracy: 0.91694
Top 3 Accuracy: 0.99749
Top 5 Accuracy: 0.99911
Mean IoU: 0.46011
Val Loss: 0.21744
"""

Top 1 Accuracy: 0.26293
Top 3 Accuracy: 0.51466
Top 5 Accuracy: 0.55209
Mean IoU: 0.04016


2023-06-13 23:13:11.732114: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Val Loss: 11.10847


'\nb2 ---\nTop 1 Accuracy: 0.89751\nTop 3 Accuracy: 0.99120\nTop 5 Accuracy: 0.99471\nMean IoU: 0.34790\nVal Loss: 0.32299\n\nb3 --- Can prob improve with better hyperparams\nTop 1 Accuracy: 0.91141\nTop 3 Accuracy: 0.99060\nTop 5 Accuracy: 0.99402\nMean IoU: 0.30426\nVal Loss: 0.27632\n\nb3-depth ---\nTop 1 Accuracy: 0.91694\nTop 3 Accuracy: 0.99749\nTop 5 Accuracy: 0.99911\nMean IoU: 0.46011\nVal Loss: 0.21744\n'

In [None]:
# Display 5 test images
import matplotlib.pyplot as plt

print(x_test.shape)

for original, image in zip(x_test[0:10, :3, :, :], segformer.predict(x_test[0:10]).logits):
    image = evaluator.upscale_logits(np.expand_dims(image, axis=0), IMAGE_SHAPE)[0]
    original = np.transpose(original, axes=[1, 2, 0])
    colored_image = np.argmax(image, axis=-1)
    colored_image = np.array(list(colors.values()))[colored_image]
    plt.imshow(original.astype(np.float32))
    plt.imshow(colored_image, alpha=0.5)
    plt.show()

In [None]:
# # ONLY UNCOMMENT IF THIS IS THE BEST MODEL
# # TODO: Save figures for upscaling output, run in separate upscaler notebook
from tensorflow import device

train_to_be_upscaled = segformer.predict(train, batch_size=BATCH_SIZE).logits
val_to_be_upscaled = segformer.predict(val, batch_size=BATCH_SIZE).logits
test_to_be_upscaled = segformer.predict(test, batch_size=BATCH_SIZE).logits

with device("cpu:0"):
    train_to_be_upscaled = evaluator.upscale_logits(train_to_be_upscaled, IMAGE_SHAPE)
    val_to_be_upscaled = evaluator.upscale_logits(val_to_be_upscaled, IMAGE_SHAPE)
    test_to_be_upscaled = evaluator.upscale_logits(test_to_be_upscaled, IMAGE_SHAPE)


np.savez("Processed Data/images_%d_predictions_%d_segformer.npz" % (SPLIT, IMAGE_SIZE_REDUCE_FACTOR), train=train_to_be_upscaled, val=val_to_be_upscaled, test=test_to_be_upscaled)

In [None]:
# ONLY UNCOMMENT IF THIS IS THE BEST MODEL
from tensorflow import saved_model

segformer.save_pretrained("Saved Models/segformer-b3-depth")