In [1]:
import os
import json
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer


In [2]:
# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus: 
    tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
import tensorflow as tf

def smooth_l1_loss(y_true, y_pred):
    """
    Compute the Smooth L1 loss between the true and predicted bounding box coordinates.

    :param y_true: Ground truth bounding box coordinates (tensor of shape (batch_size, num_boxes, 4))
    :param y_pred: Predicted bounding box coordinates (tensor of shape (batch_size, num_boxes, 4))
    :return: Smooth L1 loss (scalar value)
    """
    # Compute the absolute difference between true and predicted coordinates
    abs_diff = tf.abs(y_true - y_pred)

    # Determine where the absolute difference is less than or equal to 1
    mask = tf.cast(tf.less_equal(abs_diff, 1.0), dtype=tf.float32)

    # Calculate the smooth L1 loss
    l1_loss = mask * (0.5 * abs_diff ** 2) + (1 - mask) * (abs_diff - 0.5)

    # Return the mean loss over all bounding boxes
    return tf.reduce_mean(l1_loss)


In [21]:
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

# Assuming 'base_model' is the variable for your base VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Add the classification output
classification_output = Dense(1, activation='sigmoid', name='classification_output')(x)

# Add the bounding box regression output
bbox_regression_output = Dense(4, activation='linear', name='bbox_regression_output')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=[classification_output, bbox_regression_output])

# Compile the model with multiple outputs and losses
optimizer = Adam(learning_rate=0.0001)
losses = {
    "classification_output": "binary_crossentropy",
    "bbox_regression_output": "mean_squared_error",
}
model.compile(loss=losses, optimizer=optimizer, metrics=['accuracy'])


In [7]:
import os
import json

def load_data(aug_data_dir):
    image_files = []
    annotations = []
    
    label_dir = os.path.join(aug_data_dir, 'Labels').replace("\\", "/")
    image_dir = os.path.join(aug_data_dir, 'Images').replace("\\", "/")
    num = 0
    for file in os.listdir(label_dir):
        if file.endswith('.json'):
            if str(file) == "4A505756-D16F-4D0C-A6B4-663CDE06AB5B.77.json":
                print("FOUND")
                continue
            json_file = os.path.join(label_dir, file)

            # Load data
            with open(json_file, 'r') as f:
                data = json.load(f)

            # Extract image path and annotations
            image_file_name = f"{data['image'][:-4]}.{num}.jpg"
            image_file = os.path.join(image_dir, image_file_name).replace("\\", "/").replace("..", ".")
            num+=1
            if num == 80:
                num = 0
            image_files.append(image_file)

            image_annotations = []

            bbox_data = data['bbox']
            label = data['class']
            xmin = bbox_data[0]
            ymin = bbox_data[1]
            xmax = bbox_data[2]
            ymax = bbox_data[3]
            bbox = [xmin, ymin, xmax, ymax]

            annotation = {
                'class': label,
                'bbox': bbox
            }
            image_annotations.append(annotation)

            annotations.append(image_annotations)

    return image_files, annotations

aug_data_dir = 'aug_data'
image_files, annotations = load_data(aug_data_dir)


FOUND


In [8]:
len(image_files), len(annotations)

(5679, 5679)

In [9]:

len(image_files), len(annotations)

(5679, 5679)

In [10]:
img = cv2.imread(image_files[0])
print(img)

[[[  0   0   4]
  [  0   0   5]
  [  0   0   6]
  ...
  [  0   0  10]
  [  0   1   9]
  [  0   1   9]]

 [[  0   0   4]
  [  0   0   5]
  [  0   0   6]
  ...
  [  1   2  12]
  [  0   0  10]
  [  0   1   9]]

 [[  0   0   4]
  [  0   0   4]
  [  0   1   5]
  ...
  [  2   3  13]
  [  0   1  11]
  [  0   1  11]]

 ...

 [[ 95 117 142]
  [ 97 119 144]
  [ 98 120 145]
  ...
  [103 126 148]
  [106 127 149]
  [106 127 149]]

 [[ 96 120 142]
  [ 96 120 142]
  [ 96 120 142]
  ...
  [102 125 147]
  [105 125 150]
  [105 125 150]]

 [[ 95 120 140]
  [ 96 121 141]
  [ 96 120 142]
  ...
  [102 125 147]
  [105 125 150]
  [105 125 150]]]


In [15]:
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

def preprocess_data(image_files, annotations):
    images = []
    class_labels = []
    bbox_labels = []

    for image_file, annotation in zip(image_files, annotations):
        if image_file == "aug_data/Images/4A505756-D16F-4D0C-A6B4-663CDE06AB5B.77.jpg":
            continue
        img = cv2.imread(image_file)
        
        # Check if the image is loaded correctly
        if img is None:
            print(f"Error loading image: {image_file}")
            continue

        img = cv2.resize(img, (224, 224))
        img = img_to_array(img)
        img = preprocess_input(img)

        for obj in annotation:
            label = obj['class']
            bbox = obj['bbox']
            class_labels.append(label)
            bbox_labels.append(bbox)

        images.append(img)

    return np.array(images), np.array(class_labels), np.array(bbox_labels)

X, y_class, y_bbox = preprocess_data(image_files, annotations)


In [16]:
len(X), len(y_class), len(y_bbox)

(5678, 5678, 5678)

In [17]:
# Split the data into training and testing sets
X_train, X_test, y_class_train, y_class_test, y_bbox_train, y_bbox_test = train_test_split(X, y_class, y_bbox, test_size=0.2, random_state=42)


In [22]:
# Fit the model
hist = model.fit(
    X_train,
    {"classification_output": y_class_train, "bbox_regression_output": y_bbox_train},
    validation_data=(X_test, {"classification_output": y_class_test, "bbox_regression_output": y_bbox_test}),
    epochs=20,
    batch_size=32
)


InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [45]:
import cv2
import numpy as np

# 1. Load and preprocess the input image
input_image_path = 'aug_data/Images/4A505756-D16F-4D0C-A6B4-663CDE06AB5B.33.jpg'
input_image = cv2.imread(input_image_path)
original_image = input_image.copy()

input_image = cv2.resize(input_image, (224, 224))
input_image = img_to_array(input_image)
input_image = preprocess_input(input_image)
input_image = np.expand_dims(input_image, axis=0)

# 2. Make a prediction using the model
prediction = model.predict(input_image)
print(prediction)
predicted_class = np.argmax(prediction[0])
predicted_bbox = prediction[1][0]

# 3. Convert the predicted bounding box coordinates to their original scale
image_height, image_width, _ = original_image.shape
xmin = int(predicted_bbox[0] * image_width)
ymin = int(predicted_bbox[1] * image_height)
xmax = int(predicted_bbox[2] * image_width)
ymax = int(predicted_bbox[3] * image_height)

# 4. Draw the bounding box on the input image
color = (0, 255, 0)  # Green color for the bounding box
thickness = 2
cv2.rectangle(original_image, (xmin, ymin), (xmax, ymax), color, thickness)

# 5. Display the image with the bounding box
cv2.imshow('Predicted Bounding Box', original_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


[array([[0.99916804]], dtype=float32), array([[0.01691841, 0.26591307, 0.795064  , 0.7683325 ]], dtype=float32)]
