In [9]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models
import time
import tensorflow_datasets as tfds
import numpy as np

# Load the Cityscapes dataset
dataset, info = tfds.load('cityscapes', split='train', with_info=True)
val_dataset = tfds.load('cityscapes', split='validation')

# Define preprocessing functions

def preprocess_image(features):
    image = features['image_left']
    label = features['segmentation_label']
    image = tf.image.resize(image, (256, 512))
    label = tf.image.resize(label, (256, 512), method='nearest')
    image = tf.cast(image, tf.float32) / 255.0
    label = tf.cast(label, tf.int32)
    return image, label
    

# Preprocess the dataset
dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(2).prefetch(tf.data.experimental.AUTOTUNE)

val_dataset = val_dataset.map(preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.batch(2).prefetch(tf.data.experimental.AUTOTUNE)

In [10]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, GlobalAveragePooling2D, Dense, Add, UpSampling2D, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

num_classes = 34  # Adjusted to match your model's output


# Define the model architecture
def SpatialPath(input_tensor):
    x = Conv2D(64, (7, 7), strides=2, padding='same')(input_tensor)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(128, (3, 3), strides=2, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(256, (3, 3), strides=2, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    return x

def AttentionRefinementModule(input_tensor):
    x = GlobalAveragePooling2D()(input_tensor)
    x = Dense(input_tensor.shape[-1], activation='sigmoid')(x)
    x = tf.keras.layers.Multiply()([input_tensor, x[:, tf.newaxis, tf.newaxis, :]])
    return x

def ContextPath(input_tensor):
    base_model = tf.keras.applications.Xception(include_top=False, weights='imagenet', input_tensor=input_tensor)
    feature_13 = base_model.get_layer('block13_sepconv2_bn').output
    feature_14 = base_model.get_layer('block14_sepconv2_act').output
    feature_13_arm = AttentionRefinementModule(feature_13)
    feature_14_arm = AttentionRefinementModule(feature_14)
    global_context = GlobalAveragePooling2D()(feature_14_arm)
    global_context = tf.keras.layers.Reshape((1, 1, -1))(global_context)
    global_context = tf.keras.layers.UpSampling2D(size=(tf.keras.backend.int_shape(feature_14_arm)[1], tf.keras.backend.int_shape(feature_14_arm)[2]), interpolation='nearest')(global_context)
    feature_14_arm = tf.keras.layers.Add()([feature_14_arm, global_context])
    feature_13_arm = UpSampling2D(size=(2, 2), interpolation='bilinear')(feature_13_arm)
    feature_14_arm = UpSampling2D(size=(4, 4), interpolation='bilinear')(feature_14_arm)
    return feature_13_arm, feature_14_arm

def FeatureFusionModule(spatial_out, context_out):
    concatenated = concatenate([spatial_out, context_out])
    x = Conv2D(256, (3, 3), padding='same')(concatenated)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    pooled = GlobalAveragePooling2D()(x)
    pooled = Dense(256, activation='relu')(pooled)
    pooled = Dense(256, activation='sigmoid')(pooled)
    pooled = tf.keras.layers.Reshape((1, 1, 256))(pooled)
    x = tf.keras.layers.Multiply()([x, pooled])
    return x

# Input layer
input_tensor = Input(shape=(256, 512, 3))

# Spatial Path
spatial_out = SpatialPath(input_tensor)

# Context Path
context_out_13, context_out_14 = ContextPath(input_tensor)

# Feature Fusion Module
fused_out = FeatureFusionModule(spatial_out, context_out_14)

# Upsample the final output
fused_out = UpSampling2D(size=(8, 8), interpolation='bilinear')(fused_out)

# Final classifier
output_tensor = Conv2D(34, (1, 1), activation='softmax')(fused_out)

# Create the model
model = Model(input_tensor, output_tensor)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

In [8]:
start_time = time.time()
history = model.fit(dataset, epochs=1, validation_data=val_dataset)
end_time = time.time()

# Print the time taken to train the model
print(f"Time taken to train the model: {end_time - start_time:.2f} seconds")

I0000 00:00:1722261607.443217  135246 service.cc:146] XLA service 0x7f3bd8006060 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1722261615.150165  135246 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce GTX 1050 Ti, Compute Capability 6.1
2024-07-29 16:00:51.289654: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
W0000 00:00:1722261656.899871  135246 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert
2024-07-29 16:01:01.257973: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907
2024-07-29 16:01:16.499861: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.45GiB with freed_by_count=0. The caller indicates that this

[1m1487/1488[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 442ms/step - accuracy: 0.7401 - loss: 0.9423

W0000 00:00:1722262365.153306  135246 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert
2024-07-29 16:12:56.490082: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.43GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


[1m1488/1488[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 467ms/step - accuracy: 0.7402 - loss: 0.9421

W0000 00:00:1722262447.229236  135246 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert


[1m1488/1488[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m951s[0m 523ms/step - accuracy: 0.7402 - loss: 0.9420 - val_accuracy: 0.6940 - val_loss: 1.0266
Time taken to train the model: 951.88 seconds
