In [1]:
!python --version
!nvcc --version
!pip list

Python 3.8.17
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:41:10_Pacific_Daylight_Time_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
Package                 Version
----------------------- ---------
absl-py                 0.15.0
anyio                   3.5.0
appdirs                 1.4.4
argon2-cffi             21.3.0
argon2-cffi-bindings    21.2.0
asttokens               2.0.5
astunparse              1.6.3
attrs                   22.1.0
Babel                   2.11.0
backcall                0.2.0
basemap-data            1.3.2
bayesian-optimization   1.4.3
bleach                  4.1.0
boto3                   1.24.28
botocore                1.27.59
Bottleneck              1.3.5
brotlipy                0.7.0
cachetools              5.3.1
certifi                 2023.7.22
cffi                    1.15.1
charset-normalizer      2.0.4
click                   8.0.4
cloudpickle         

In [2]:
import tensorflow as tf
tf.config.experimental.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import scipy.io
import shutil

In [4]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
def read_yolo_annotation_file(file_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()

    annotations = []
    for line in lines:
        parts = line.strip().split(' ')
        parts[0] = int(parts[0])
        parts[1], parts[2], parts[3], parts[4] = map(float, parts[1:])
        annotations.append(parts)

    return annotations

In [5]:
full_data_path = "Aero Landing Zone Object Detection.v5i.yolov8"
train_image_dir = os.path.join(full_data_path,'train','images')
train_label_dir = os.path.join(full_data_path, 'train','labels')
valid_image_dir = os.path.join(full_data_path,'valid','images')
valid_label_dir = os.path.join(full_data_path, 'valid','labels')
image_size = 400  # Adjust to your desired image size
batch_size = 32
patch_size = 100  # Size of the patches to be extracted from the input images
num_classes = 3

In [6]:
import os
import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import load_img, img_to_array

class ObjectDetectionDataLoader(Sequence):
    def __init__(self, image_dir, label_dir, image_size, batch_size=32, shuffle=True):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.image_size = image_size
        self.batch_size = batch_size
        self.shuffle = shuffle

        self.files = os.listdir(self.image_dir)
        self.indices = np.arange(len(self.files))

        if self.shuffle:
            np.random.shuffle(self.indices)

    def __len__(self):
        return int(np.ceil(len(self.indices) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size: (index + 1) * self.batch_size]
        batch_images = []
        batch_annotations = []
        batch_class_labels = []

        for idx in batch_indices:
            file = self.files[idx][:-4]
            annotations = read_yolo_annotation_file(os.path.join(self.label_dir, file + '.txt'))
            image = load_img(os.path.join(self.image_dir, file + '.jpg'),)
            image = image.resize((self.image_size, self.image_size))

            for annotation in annotations:
                batch_annotations.append(annotation[1:])
                batch_images.append(img_to_array(image))
                batch_class_labels.append(tf.one_hot(annotation[0], num_classes))  # Adjust based on your annotation format
            
        return np.array(batch_images), {'bounding_box': np.array(batch_annotations), 'class_predictions': np.array(batch_class_labels)}

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

train_loader = ObjectDetectionDataLoader(train_image_dir, train_label_dir, image_size, batch_size)
test_loader = ObjectDetectionDataLoader(valid_image_dir, valid_label_dir, image_size, batch_size, shuffle=False)

In [7]:
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

In [8]:
class Patches(layers.Layer):
    def __init__(self, patch_size):
        super().__init__()
        self.patch_size = patch_size

    #     Override function to avoid error while saving model
    def get_config(self):
        config = super().get_config().copy()
        config.update(
            {
                "input_shape": input_shape,
                "patch_size": patch_size,
                "num_patches": num_patches,
                "projection_dim": projection_dim,
                "num_heads": num_heads,
                "transformer_units": transformer_units,
                "transformer_layers": transformer_layers,
                "mlp_head_units": mlp_head_units,
            }
        )
        return config

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        # return patches
        return tf.reshape(patches, [batch_size, -1, patches.shape[-1]])

In [9]:
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super().__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    # Override function to avoid error while saving model
    def get_config(self):
        config = super().get_config().copy()
        config.update(
            {
                "input_shape": input_shape,
                "patch_size": patch_size,
                "num_patches": num_patches,
                "projection_dim": projection_dim,
                "num_heads": num_heads,
                "transformer_units": transformer_units,
                "transformer_layers": transformer_layers,
                "mlp_head_units": mlp_head_units,
            }
        )
        return config

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

In [10]:
def calculate_iou(y_true, y_pred):
    intersection = tf.reduce_sum(tf.minimum(y_true, y_pred))
    union = tf.reduce_sum(tf.maximum(y_true, y_pred))
    return intersection / union

In [11]:
from tensorflow.keras.utils import plot_model
from IPython.display import Image, display
stop_early_1 = keras.callbacks.EarlyStopping(monitor="val_class_predictions_accuracy", patience=40)
stop_early_2 = keras.callbacks.EarlyStopping(monitor="val_bounding_box_calculate_iou", patience=40)
def run_experiment(model, batch_size, num_epochs):

    # Train the model using both bounding_boxes_y and class_labels_y as y-values.
    history = []
    with tf.device('/GPU:0'):
#        history = model.fit(train_loader, epochs=10, validation_data=test_loader)
        history = model.fit(
            x = train_loader,
            batch_size=batch_size,
            epochs=num_epochs,
            validation_data=test_loader
            #callbacks=[stop_early_1, stop_early_2],
        )
    
    dot_img_file = os.path.join(new_folder, 'architecture.png')
    tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True)
    display(Image(dot_img_file))
    return history, model


In [12]:
#build and train
batch_size = 32
num_epochs = 1000
num_predictions = 4
grid_size = 3

def build_model():
    input_shape = (image_size, image_size, 3)  # input image shape
    learning_rate = 0.00685 # hp.Float("learning_rate", 0.0001, 0.01, step=0.00005)
    weight_decay = 0.0002 # hp.Float("weight_decay", 0.0001, 0.001, step=0.00005)
    num_epochs = 100
    projection_dim = 36 # hp.Int("projection_dim", min_value=1, max_value=64, step=1)
    num_heads = 8 # hp.Int("num_heads", min_value=1, max_value=32, step=1)
    # Size of the transformer layers
    transformer_units = [
        projection_dim * 2,
        projection_dim,
    ]

    transformer_layers = 58 # hp.Int("transformer_layers", min_value=1, max_value=100, step=1)
    mlp_head_units = [512, 64, 32]  # Size of the dense layers

    num_patches = (image_size // patch_size) ** 2

    inputs = layers.Input(shape=input_shape)
    # Create patches
    patches = Patches(patch_size)(inputs)
    # Encode patches
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.3)(representation)
    
    
    # Add MLP.
    #features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.3)
    
    #1
    representation = layers.Dense(units=704,activation="relu")(representation)
    representation = layers.Dropout(0.1)(representation)
    #2
    representation = layers.Dense(units=32, activation="relu")(representation)
    representation = layers.Dropout(0.6)(representation)
    #3
    representation = layers.Dense(units=960,activation="relu")(representation)
    representation = layers.Dropout(0.4)(representation)
    #4
    representation = layers.Dense(units=160, activation="relu")(representation)
    representation = layers.Dropout(0.6)(representation)
    #5
    representation = layers.Dense(units=352, activation="relu")(representation)
    representation = layers.Dropout(0.1)(representation)
    #6
    representation = layers.Dense(units=224, activation="relu")(representation)
    representation = layers.Dropout(0.7)(representation)
    #7
    representation = layers.Dense(units=480, activation="relu")(representation)
    representation = layers.Dropout(0.7)(representation)
    #8
    representation = layers.Dense(units=160, activation="relu")(representation)
    representation = layers.Dropout(0.3)(representation)
    #9
    representation = layers.Dense(units=736, activation="relu")(representation)
    representation = layers.Dropout(0.6)(representation)
    #10
    representation = layers.Dense(units=352, activation="relu")(representation)
    representation = layers.Dropout(0.5)(representation)
    #11
    representation = layers.Dense(units=320, activation="relu")(representation)
    representation = layers.Dropout(0.1)(representation)
    #12
    representation = layers.Dense(units=1024, activation="relu")(representation)
    representation = layers.Dropout(0.4)(representation)
    #13
    representation = layers.Dense(units=544, activation="relu")(representation)
    representation = layers.Dropout(0.4)(representation)
    #14
    representation = layers.Dense(units=384, activation="relu")(representation)
    representation = layers.Dropout(0.4)(representation)
    #15
    representation = layers.Dense(units=384, activation="relu")(representation)
    representation = layers.Dropout(0.3)(representation)
    #16
    representation = layers.Dense(units=896, activation="relu")(representation)
    representation = layers.Dropout(0.3)(representation)
    #17
    representation = layers.Dense(units=768, activation="relu")(representation)
    representation = layers.Dropout(0.7)(representation)
    
    
    # Final MLP head for bounding box prediction
    bounding_box = layers.Dense(4, name='bounding_box')(representation)

    # Final dense layer for class prediction
    class_predictions = layers.Dense(num_classes, activation='softmax', name='class_predictions')(representation)

    # Keras model with both bounding box and class predictions
    model = keras.Model(inputs=inputs, outputs=[bounding_box, class_predictions])
    
    optimizer = tfa.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )
    
    # Compile the model with appropriate loss functions
    model.compile(
        optimizer='adam',
        loss={'bounding_box': 'mse', 'class_predictions': 'categorical_crossentropy'},
        metrics={'class_predictions': 'accuracy', 'bounding_box': calculate_iou}
    )

    return model
print('shape of softmax: ' +  str(num_classes))
model = build_model()
# Train model
history = []
history, model = run_experiment(
    model, batch_size, num_epochs
)

shape of softmax: 80
Epoch 1/1000


KeyboardInterrupt: 

In [None]:
import keras_tuner as kt
# Instantiate the tuner]\
tuner = kt.Hyperband(build_model,
                     objective=[
                         kt.Objective("val_bounding_box_calculate_iou", direction="max"),
                         kt.Objective("val_class_predictions_accuracy", direction="max"),
                               ],
                     max_epochs=100,
                     factor=3,
                     hyperband_iterations=1,
                     directory="D:\kt_dir",
                     project_name="kt_hyperband",
                     overwrite=True)
# Display search space summary
tuner.search_space_summary()

In [None]:
with tf.device('/GPU:0'):
    tuner.search(        
        x = x_train,    # Your training image data
        y = {'bounding_box': bounding_boxes_y_train, 'class_predictions': class_labels_y_train},
        batch_size=batch_size,
        epochs=10,
        validation_split=0.2,
        verbose = 1,
        callbacks=[stop_early_1, stop_early_2]
                )
            

In [None]:
best_model =  tuner.get_best_models()[0]

In [None]:
best_hps = tuner.get_best_hyperparameters()[0]
print('hello')
print(type(best_hps))
print(best_hps.values)

In [None]:
print('hello')

In [None]:
plt.plot(history.history['class_predictions_accuracy'])
plt.plot(history.history['val_class_predictions_accuracy'])
plt.title('Prediction Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
model_file = os.path.join(new_folder, 'class predictions accuracy.png')
plt.savefig(model_file)
plt.show()

In [None]:
plt.plot(history.history['bounding_box_loss'])
plt.plot(history.history['val_bounding_box_loss'])
plt.title('Bounding Box Mean Squared Error')
plt.ylabel('Mean Squared Error')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
model_file = os.path.join(new_folder, 'bounding box mean squared error.png')
plt.savefig(model_file)
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss during Training')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
model_file = os.path.join(new_folder, 'loss.png')
plt.savefig(model_file,dpi=600, facecolor='w')
plt.show()

In [None]:
plt.plot(history.history['bounding_box_calculate_iou'])
plt.plot(history.history['val_bounding_box_calculate_iou'])
plt.title('Intersection over Union during Training')
plt.ylabel('IoU')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
model_file = os.path.join(new_folder, 'IoU.png')
plt.savefig(model_file,dpi=600, facecolor='w')
plt.show()


In [None]:
from IPython.display import display
from PIL import Image

missed = 0

for i in range(0, len(x_test)):
    image = x_test[i].numpy().astype("uint8")
    # Add an additional dimension for batch size
    image = np.expand_dims(image, axis=0)
    x_center_actual = bounding_boxes_y_test[i][0] * image_size
    y_center_actual = bounding_boxes_y_test[i][1] * image_size
    width_actual = bounding_boxes_y_test[i][2] * image_size
    height_actual = bounding_boxes_y_test[i][3] * image_size

    # Calculate x_min, y_min, x_max, and y_max based on the center coordinates, width/heigth and input size
    x_min_actual = int(x_center_actual - (width_actual / 2))
    y_min_actual = int( y_center_actual - (height_actual / 2))
    x_max_actual = x_min_actual + int(width_actual)
    y_max_actual = y_min_actual + int(height_actual)

    top_left_actual = (x_min_actual, y_min_actual)
    bottom_right_actual = (x_max_actual, y_max_actual)

    predictions = model(image)


    # Extract the bounding box center coordinates from predictions
    predicted_box_coords = predictions[0].numpy()[0]
    x_center, y_center = predicted_box_coords[0] * image_size,  predicted_box_coords[1] * image_size
    width, height = predicted_box_coords[2] * image_size,  predicted_box_coords[3] * image_size

    #Extract the prediction label
    predicted_labels = predictions[1].numpy()[0]
    #print('predicted_labels: ', predicted_labels)
    predicted_label =  np.argmax(predicted_labels)
    actual_label =  np.argmax(class_labels_y_test[i].numpy())
    
    # Calculate x_min, y_min, x_max, and y_max based on the center coordinates, width/heigth and input size
    x_min = int(x_center - (width / 2))
    y_min = int( y_center - (height / 2))
    x_max = x_min + int(width)
    y_max = y_min + int(height)

    top_left = (x_min, y_min)
    bottom_right = (x_max, y_max)

    # Convert image to RGB if it's grayscale
    image = np.squeeze(image)  # Remove the extra batch dimension
    if len(image.shape) == 2:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

    # Now use the tuple coordinates in the cv2.rectangle function
    color = (0, 255, 0)  # Set green for actual
    # Draw rectangles
    image = cv2.rectangle(image, top_left_actual, bottom_right_actual, color, 2)
    color = (255, 0, 0)  # Set red for prediction
    # Draw rectangles
    cv2.rectangle(image, top_left, bottom_right, color, 2)
    
    #only print the cases where the labels disagreed
    if actual_label != predicted_label:
        missed += 1
        print('predicted_box_coords: ', predicted_box_coords)
        print('actual_box_coords', bounding_boxes_y_test[i].numpy())
        print('predicted_label: ', predicted_label)
        print('actual_label: ', actual_label)
        # Convert the numpy array to a PIL Image
        image_pil = Image.fromarray(image)
        # Display the image and make it persist
        display(image_pil)

In [None]:
print(missed)