Virtual KITTI Dataloader

In [None]:
import pandas as pd
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt

annotation_folder = r'C:\Arbeitsordner\Abgaben_repo\vkitti_2.0.3_textgt\Scene01\30-deg-right'
data_folder = r'C:\Arbeitsordner\Abgaben_repo\vkitti_2.0.3_rgb\Scene01\30-deg-right\frames\rgb\Camera_0'

pose_df = pd.read_csv(os.path.join(annotation_folder, 'pose.txt'), delim_whitespace=True)
info_df = pd.read_csv(os.path.join(annotation_folder, 'info.txt'), delim_whitespace=True)
bbox_df = pd.read_csv(os.path.join(annotation_folder, 'bbox.txt'), delim_whitespace=True)
colors_df = pd.read_csv(os.path.join(annotation_folder, 'colors.txt'), delim_whitespace=True)
intrinsic_df = pd.read_csv(os.path.join(annotation_folder, 'intrinsic.txt'), delim_whitespace=True)
extrinsic_df = pd.read_csv(os.path.join(annotation_folder, 'extrinsic.txt'), delim_whitespace=True)

# mapping dictionary from labels to colors
label_to_color = {}
for _, row in colors_df.iterrows():
    label_to_color[row['Category']] = (row['r'], row['g'], row['b'])
    print(label_to_color)

# preprocess images
def preprocess_image(image, target_size=(224, 224)):
    image = cv2.resize(image, target_size)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.astype(np.float32) / 255.0
    return image

# Match RGB images to labels and 3D bounding boxes
def match_data(rgb_folder, pose_df, info_df, bbox_df, label_to_color):
    data = []
    for root, dirs, files in os.walk(rgb_folder):
        for file in files:
            if file.endswith('.jpg'): 

                frame_id = file.split('_')[1]
                frame_id = int(frame_id.split('.')[0])
                
                # Filter the bounding boxes for the current frame
                frame_bbox = bbox_df[bbox_df['frame'] == frame_id]
                print(frame_bbox)
                
                camera_id = 0 if 'Camera_0' in file else 1
                
                # Get the intrinsic parameters for the current camera
                intrinsic_params = intrinsic_df[(intrinsic_df['frame'] == frame_id) & (intrinsic_df['cameraID'] == camera_id)]
                K = intrinsic_params[['K[0,0]', 'K[1,1]', 'K[0,2]', 'K[1,2]']].values[0]
                
                # Get the extrinsic parameters for the current camera
                extrinsic_params = extrinsic_df[(extrinsic_df['frame'] == frame_id) & (extrinsic_df['cameraID'] == camera_id)]
                R = extrinsic_params[['r1,1', 'r1,2', 'r1,3', 'r2,1', 'r2,2', 'r2,3', 'r3,1', 'r3,2', 'r3,3']].values.reshape(3, 3)
                t = extrinsic_params[['t1', 't2', 't3']].values
                
             
                image_path = os.path.join(root, file)
                image = cv2.imread(image_path)
                
                # check how to get the correct label for the bounding boxes in each image? What file to use to get the labels?
                for idx, row in frame_bbox.iterrows():
                    track_id = row['trackID']
                    label_info = info_df[info_df['trackID'] == track_id]
                    label = label_info['label'].values[0]
                    
                    color = label_to_color.get(label, (255, 255, 255))  
                    
                   
                    
                    data.append({'image': preprocess_image(roi), 'label': label, 'color': color, 'bbox': [obj_left, obj_top, obj_right, obj_bottom]})
                    
    return data

data = match_data(annotation_folder, pose_df, info_df, bbox_df, label_to_color)
print(data)

In [21]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import os
import cv2

class VKittiDataset(tf.keras.utils.Sequence):
    def __init__(self, annotations_file, img_dir, batch_size=32, img_size=(375, 1242), shuffle=True):
        self.annotations = pd.read_csv(annotations_file, sep=" ", names=[
            'frame', 'tid', 'label', 'truncated', 'occluded', 'alpha', 'l', 't', 'r', 'b',
            'w3d', 'h3d', 'l3d', 'x3d', 'y3d', 'z3d', 'ry', 'rx', 'rz', 'truncr', 'occupr', 
            'orig_label', 'moving', 'model', 'color'])
        self.img_dir = img_dir
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.floor(len(self.annotations) / self.batch_size))
    
    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        batch_annotations = [self.annotations.iloc[i] for i in indices]
        X, bbox_y, label_y = self.__data_generation(batch_annotations)
        return X, {'bbox_output': bbox_y, 'label_output': label_y}
    
    def on_epoch_end(self):
        self.indices = np.arange(len(self.annotations))
        if self.shuffle:
            np.random.shuffle(self.indices)
    
    def __data_generation(self, batch_annotations):
        X = np.empty((self.batch_size, *self.img_size, 3))
        bbox_y = np.empty((self.batch_size, 7))  # 7 values for 3D bounding box
        label_y = np.empty((self.batch_size, 1))  # 1 value for object label
        
        for i, ann in enumerate(batch_annotations):
            img_path = os.path.join(self.img_dir, f"rgb_{str(ann['frame']).zfill(5)}.jpg")
            image = cv2.imread(img_path)
            if image is None:
                raise FileNotFoundError(f"Image not found at path: {img_path}")
            image = cv2.resize(image, self.img_size)
            X[i,] = image / 255.0  # Normalize image
            bbox_y[i,] = ann[['x3d', 'y3d', 'z3d', 'w3d', 'h3d', 'l3d', 'ry']].to_numpy()
            label_y[i,] = ann['label']  # Assuming the label column contains the object class
            
        return X, bbox_y, label_y

    def get_single_data_point(self, index):
        ann = self.annotations.iloc[index]
        img_path = os.path.join(self.img_dir, f"rgb_{str(ann['frame']).zfill(5)}.jpg")
        print(f"Loading image from: {img_path}")  # Print the image path
        image = cv2.imread(img_path)
        if image is None:
            raise FileNotFoundError(f"Image not found at path: {img_path}")
        image = cv2.resize(image, self.img_size)
        image = image / 255.0  # Normalize image
        bbox = ann[['x3d', 'y3d', 'z3d', 'w3d', 'h3d', 'l3d', 'ry']].to_numpy()
        label = ann['label']  # Assuming the label column contains the object class
        return image, bbox, label


# Specify the correct paths
annotations_file = r"C:\Arbeitsordner\Abgaben_repo\vkitti_2.0.3_textgt\Scene02\15-deg-right\info.txt"
img_dir = r"C:\Arbeitsordner\Abgaben_repo\vkitti_2.0.3_rgb\Scene02\15-deg-right\frames\rgb\Camera_0"

train_dataset = VKittiDataset(annotations_file=annotations_file, img_dir=img_dir, batch_size=4)

# Try to access a single data point
index = 0  # Replace with the index you want to check
image, bbox, label = train_dataset.get_single_data_point(index)

print("Image shape:", image.shape)
print("Bounding box:", bbox)
print("Label:", label)

# Display the image (optional)
import matplotlib.pyplot as plt

plt.imshow(image)
plt.title(f"Label: {label}")
plt.show()


Loading image from: C:\Arbeitsordner\Abgaben_repo\vkitti_2.0.3_rgb\Scene02\15-deg-right\frames\rgb\Camera_0\rgb_trackID.jpg


FileNotFoundError: Image not found at path: C:\Arbeitsordner\Abgaben_repo\vkitti_2.0.3_rgb\Scene02\15-deg-right\frames\rgb\Camera_0\rgb_trackID.jpg

KITTI Dataloader

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt  


def load_data(image_dir, label_dir):
    data = []
    image_files = sorted(os.listdir(image_dir))
    
    for image_file in image_files:
        if image_file.endswith('.png'):
            image_path = os.path.join(image_dir, image_file)
            label_path = os.path.join(label_dir, image_file[:-4] + ".txt")
            if os.path.isfile(label_path):
                labels = parse_label(label_path)
                data.append({
                    "image_path": image_path,
                    "labels": labels
                })
    return data

def parse_label(label_file):
    with open(label_file, 'r') as f:
        lines = f.readlines()

    labels = []
    for line in lines:
        line = line.strip()
        label_info = line.split(' ')
        if label_info[0] in ['Car', 'Truck', 'Pedestrian', 'Cyclist']:
            label = {
                "type": label_info[0],
                "truncated": float(label_info[1]),
                "occluded": int(label_info[2]),
                "alpha": float(label_info[3]),
                "bbox": [float(x) for x in label_info[4:8]],
                "dimensions": [float(x) for x in label_info[8:11]],
                "location": [float(x) for x in label_info[11:14]],
                "rotation_y": float(label_info[14]),
                "score": float(label_info[15]) if len(label_info) > 15 else None
            }
            labels.append(label)

    return labels

def visualize_data(data):
    for entry in data:
        image_path = entry["image_path"]
        labels = entry["labels"]
        image = cv2.imread(image_path)
        
        # Plot the image
        plt.figure(figsize=(10, 5))
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.axis('off')

        # Plot bounding boxes
        for label in labels:
            bbox = label["bbox"]
            cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
        
        plt.show()


image_dir = r"C:\Arbeitsordner\Abgaben_repo\Datasets\KITTI\data_object_image_3\training\image_3"
label_dir = r"C:\Arbeitsordner\Abgaben_repo\Datasets\KITTI\data_object_label_2\training\label_2"
data = load_data(image_dir, label_dir)
# visualize_data(data)


In [3]:
import tensorflow as tf

class MeanAveragePrecision(tf.keras.callbacks.Callback):
    def __init__(self, num_classes, **kwargs):
        super(MeanAveragePrecision, self).__init__(**kwargs)
        self.num_classes = num_classes
        self.mAP = tf.Variable(0.0, trainable=False)
        self.total_samples = tf.Variable(0, trainable=False)
        self.total_iou = tf.Variable([0.0] * num_classes, trainable=False)

    def update_state(self, y_true, y_pred, sample_weight=None):
        bbox_true, label_true = y_true
        bbox_pred, label_pred = y_pred

        batch_size = tf.shape(bbox_true)[0]

        for i in range(batch_size):
            bboxes_true = bbox_true[i]
            bboxes_pred = bbox_pred[i]
            labels_true = label_true[i]
            labels_pred = label_pred[i]

            # Calculate IoU for all true and predicted boxes
            iou = calculate_iou(tf.reshape(bboxes_true, (-1, 4)), tf.reshape(bboxes_pred, (-1, 4)))

            # Compute mAP for each class
            for cls in range(self.num_classes):
                true_cls_mask = tf.cast(tf.equal(labels_true, cls), dtype=tf.float32)
                pred_cls_mask = tf.cast(tf.equal(labels_pred, cls), dtype=tf.float32)

                true_positives = tf.reduce_sum(true_cls_mask * pred_cls_mask)
                false_positives = tf.reduce_sum((1 - true_cls_mask) * pred_cls_mask)
                false_negatives = tf.reduce_sum(true_cls_mask * (1 - pred_cls_mask))

                precision = true_positives / (true_positives + false_positives)
                recall = true_positives / (true_positives + false_negatives)

                average_precision = tf.cond(tf.equal(true_positives + false_positives, 0),
                                            lambda: tf.constant(0.0),
                                            lambda: precision * recall)

                self.total_iou[cls].assign_add(tf.reduce_sum(iou * true_cls_mask))
                self.mAP.assign_add(average_precision)
                self.total_samples.assign_add(1)

    def result(self):
        return tf.math.divide_no_nan(self.mAP, self.total_samples)

    def reset_states(self):
        self.mAP.assign(0.0)
        self.total_samples.assign(0)
        self.total_iou.assign([0.0] * self.num_classes)


In [19]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers
import numpy as np

def preprocess_image(image, bbox, label, input_shape, max_objects):
    max_objects = max_objects
    # Resize image
    image = tf.image.resize(image, (input_shape[0], input_shape[1]))
    # Normalize image
    image = image / 255.0  # Assuming input range [0, 255]
    
    
    
    # One-hot encode the labels
    label = tf.cast(label, tf.int32)
    
    # Pad the bounding boxes and labels to the max_objects
    bbox_padding = tf.maximum(max_objects - tf.shape(bbox)[1], 0)
    label_padding = tf.maximum(max_objects - tf.shape(label)[1], 0)
    bbox = tf.pad(bbox, [[0, 0], [0, bbox_padding], [0, 0]])
    label = tf.pad(label, [[0, 0], [0, label_padding]])
    print("test:", bbox, label)
    return image, bbox, label



def load_kitti_dataset(input_shape, max_objects):
    dataset = tfds.load('kitti', split='train[:5%]')
    train_dataset = dataset.batch(1) #['train']
    #test_dataset = dataset['test']

    # Preprocess dataset
    #train_dataset = train_dataset.map(lambda image, bbox, label: (image, bbox, label, tf.reduce_sum(label, axis=-1)))
    train_dataset = train_dataset.map(lambda x: preprocess_image(x['image'], x['objects']['bbox'], x['objects']['type'], input_shape, max_objects))
    #test_dataset = test_dataset.map(lambda x: preprocess_image(x['image'], x['objects']['bbox'], x['objects']['type'], input_shape))
    print(train_dataset)
    return train_dataset #, test_dataset


def create_model(input_shape, max_objects, num_classes):
    # Define input layer
    inputs = tf.keras.Input(shape=input_shape)

    # Feature extraction backbone (e.g., CNN)
    backbone = tf.keras.applications.ResNet50(
        include_top=False, weights='imagenet', input_tensor=inputs
    )

    # Additional layers for feature fusion
    x = backbone.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)

    label_output = layers.Dense(max_objects * 4, activation='softmax', name='label_output')(x)
    label_output = layers.Reshape((max_objects, 4), name='label_output_reshape')(label_output)

    # Output head for bounding box regression
    bbox_output = layers.Dense(4 * max_objects, name='bbox_output')(x)  # Output shape: (None, 4 * max_objects)
    bbox_output = layers.Reshape((max_objects, 4), name='bbox_output_2')(bbox_output)
    print("B:", bbox_output, "l:", label_output)

    # Define the model
    model = tf.keras.Model(inputs=inputs, outputs=[bbox_output, label_output])
    

    return model

def calculate_iou(boxes1, boxes2):
    # Calculate Intersection over Union (IoU) for two sets of bounding boxes
    boxes1 = tf.expand_dims(boxes1, -2)
    boxes2 = tf.expand_dims(boxes2, 0)

    # Calculate intersection areas
    intersect_mins = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    intersect_maxes = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
    intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.0)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]

    # Calculate box areas
    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    # Calculate union area
    union_area = boxes1_area + boxes2_area - intersect_area

    # Calculate IoU
    iou = intersect_area / union_area

    return iou



def main():
    # Define input shape and number of classes
    input_shape = (224, 224, 3)  # Example input shape
    num_classes = 8  # Example number of classes
    max_objects = 10  # Example maximum number of objects
    
    # Load and preprocess KITTI dataset
    train_dataset = load_kitti_dataset(input_shape, max_objects)
    for image, bbox, label in train_dataset.take(5):  # Print the first 5 samples
        print("Image shape:", image.shape)
        print("Bounding box shapes:", bbox)
        print("Labels:", label)
        print("Number of objects:", tf.shape(bbox)[1].numpy())  # Number of objects in the image
        print("-" * 50) 
       
    # Define and compile the model
    model = create_model(input_shape=input_shape, max_objects=max_objects, num_classes=num_classes)
    model.compile(
        loss={
            'bbox_output_2': "mean_squared_error",
            'label_output_reshape': tf.keras.losses.CategoricalCrossentropy()
        },
        loss_weights={'bbox_output_2': 0.5, 'label_output_reshape': 0.5},
        metrics={'bbox_output_2': 'mse', 'label_output_reshape': 'categorical_accuracy'},
        optimizer=tf.keras.optimizers.Adam()
    )
    
    # Define custom metric for mAP
    #mAP_metric = MeanAveragePrecision(num_classes=num_classes)

    # Train the model
    model.fit(train_dataset, epochs=5, batch_size=1) #, callbacks=[mAP_metric])

if __name__ == "__main__":
    main()


test: Tensor("Pad:0", shape=(None, None, 4), dtype=float32) Tensor("Pad_1:0", shape=(None, None), dtype=int32)
<MapDataset shapes: ((None, 224, 224, 3), (None, None, 4), (None, None)), types: (tf.float32, tf.float32, tf.int32)>
Image shape: (1, 224, 224, 3)
Bounding box shapes: tf.Tensor(
[[[0.40194666 0.398438   0.5124533  0.4485185 ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]], shape=(1, 10, 4), dtype=float32)
Labels: tf.Tensor([[0 0 0 0 0 0 0 0 0 0]], shape=(1, 10), dtype=int32)
Number of objects: 10
--------------------------------------------------
Image shape: (1, 224, 224, 3)
Bounding box shapes:

InvalidArgumentError:  Incompatible shapes: [1,10,4] vs. [1,11,4]
	 [[node gradient_tape/mean_squared_error/BroadcastGradientArgs
 (defined at C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\optimizer_v2\optimizer_v2.py:464)
]] [Op:__inference_train_function_205627]

Errors may have originated from an input operation.
Input Source operations connected to node gradient_tape/mean_squared_error/BroadcastGradientArgs:
In[0] gradient_tape/mean_squared_error/Shape_4:	
In[1] gradient_tape/mean_squared_error/Shape_5:

Operation defined at: (most recent call last)
>>>   File "c:\Users\elisa\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 197, in _run_module_as_main
>>>     return _run_code(code, main_globals, None,
>>> 
>>>   File "c:\Users\elisa\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 87, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\ipykernel_launcher.py", line 17, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\traitlets\config\application.py", line 982, in launch_instance
>>>     app.start()
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelapp.py", line 712, in start
>>>     self.io_loop.start()
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\tornado\platform\asyncio.py", line 215, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "c:\Users\elisa\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py", line 596, in run_forever
>>>     self._run_once()
>>> 
>>>   File "c:\Users\elisa\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py", line 1890, in _run_once
>>>     handle._run()
>>> 
>>>   File "c:\Users\elisa\AppData\Local\Programs\Python\Python39\lib\asyncio\events.py", line 80, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
>>>     await self.process_one()
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelbase.py", line 499, in process_one
>>>     await dispatch(*args)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
>>>     await result
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
>>>     reply_content = await reply_content
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
>>>     res = shell.run_cell(
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
>>>     return super().run_cell(*args, **kwargs)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 2940, in run_cell
>>>     result = self._run_cell(
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 2995, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 3194, in run_cell_async
>>>     has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 3373, in run_ast_nodes
>>>     if await self.run_code(code, result, async_=asy):
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\IPython\core\interactiveshell.py", line 3433, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "C:\Users\elisa\AppData\Local\Temp\ipykernel_24680\2422717117.py", line 129, in <module>
>>>     main()
>>> 
>>>   File "C:\Users\elisa\AppData\Local\Temp\ipykernel_24680\2422717117.py", line 126, in main
>>>     model.fit(train_dataset, epochs=5, batch_size=1) #, callbacks=[mAP_metric])
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1216, in fit
>>>     tmp_logs = self.train_function(iterator)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 878, in train_function
>>>     return step_function(self, iterator)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 867, in step_function
>>>     outputs = model.distribute_strategy.run(run_step, args=(data,))
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 860, in run_step
>>>     outputs = model.train_step(data)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 816, in train_step
>>>     self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\optimizer_v2\optimizer_v2.py", line 530, in minimize
>>>     grads_and_vars = self._compute_gradients(
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\optimizer_v2\optimizer_v2.py", line 583, in _compute_gradients
>>>     grads_and_vars = self._get_gradients(tape, loss, var_list, grad_loss)
>>> 
>>>   File "C:\Users\elisa\AppData\Roaming\Python\Python39\site-packages\keras\optimizer_v2\optimizer_v2.py", line 464, in _get_gradients
>>>     grads = tape.gradient(loss, var_list, grad_loss)
>>> 

In [8]:
for sample in train_dataset.take(5):  # Print the first 5 samples
    image, bbox, label = sample
    print("Image shape:", image.shape)
    print("Bounding box shape:", bbox.shape)
    print("Label shape:", label.shape)
    print("-" * 50)

NameError: name 'train_dataset' is not defined

In [2]:
pip install h5py


Collecting h5py
  Downloading h5py-3.11.0-cp39-cp39-win_amd64.whl (3.0 MB)
Installing collected packages: h5py
Successfully installed h5py-3.11.0
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.7.4 requires wheel<1.0,>=0.32.0, which is not installed.
tensorcross 0.4.3 requires scikit-learn, which is not installed.
You should consider upgrading via the 'c:\Users\elisa\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout

def create_model(input_shape):
    inputs = Input(shape=input_shape)
    
    # Shared base network
    x = Conv2D(16, (3, 3), activation='relu')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(32, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    # Bounding box regression head
    bbox_output = Dense(7, name='bbox_output')(x)  # 7 values: x3d, y3d, z3d, w3d, h3d, l3d, ry
    
    # Object classification head
    label_output = Dense(1, activation='softmax', name='label_output')(x)  

    model = Model(inputs=inputs, outputs=[bbox_output, label_output])
    
    return model

input_shape = (375, 1242, 3)
model = create_model(input_shape)
model.compile(optimizer='adam', 
              loss={'bbox_output': 'mean_squared_error', 'label_output': 'sparse_categorical_crossentropy'},
              metrics={'bbox_output': 'mse', 'label_output': 'accuracy'})


ModuleNotFoundError: No module named 'wrapt'

In [7]:
pip install pandas

Collecting pandas
  Downloading pandas-2.2.2-cp39-cp39-win_amd64.whl (11.6 MB)
Collecting tzdata>=2022.7
  Downloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)
Collecting pytz>=2020.1
  Downloading pytz-2024.1-py2.py3-none-any.whl (505 kB)
Installing collected packages: tzdata, pytz, pandas
Successfully installed pandas-2.2.2 pytz-2024.1 tzdata-2024.1
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\elisa\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.
