In [74]:
from pyquaternion import Quaternion
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.data_classes import RadarPointCloud, Box
from nuscenes.utils.geometry_utils import points_in_box, view_points
from nuscenes.scripts.export_2d_annotations_as_json import post_process_coords
import numpy as np
import pandas as pd
from PIL import Image
import io
import base64
from shapely.geometry import box as shapely_box, MultiPoint
import random
import os
import json
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import tensorflow as tf
from tqdm import tqdm
import multiprocessing
from sklearn.model_selection import train_test_split

In [88]:
# Get the number of available CPU cores
num_cores = multiprocessing.cpu_count()
print(num_cores)

# Set the environment variable to use all CPU cores
os.environ["OMP_NUM_THREADS"] = str(num_cores)

32


In [None]:
# Define the replacements for specific categories
category_replacements = {
    'human.pedestrian.construction_worker': 'pedestrian',
    'human.pedestrian.adult': 'pedestrian',
    'human.pedestrian.stroller': 'pedestrian',
    'human.pedestrian.police_officer': 'pedestrian',
    'human.pedestrian.personal_mobility': 'pedestrian',
    'human.pedestrian.wheelchair': 'pedestrian',
    'vehicle.bus.bendy': 'bus',
    'human.pedestrian.child': 'pedestrian',
    'vehicle.truck': 'truck',
    'vehicle.car': 'car',
    'vehicle.motorcycle': 'motorcycle',
    'vehicle.trailer': 'trailer',
    'vehicle.bicycle': 'bicycle',
    'movable_object.barrier': 'barrier',
    'vehicle.bus.rigid': 'bus',
    'vehicle.emergency.police': 'car'
}

# Initialize nuScenes dataset
nusc = NuScenes(version='v1.0-trainval', dataroot='', verbose=True)

def replace_categories(dataset, replacements):
    """
    Replace specified categories with new categories in the dataset.
    """
    for data in dataset:
        for annotation in data['annotations']:
            if annotation['category_name'] in replacements:
                annotation['category_name'] = replacements[annotation['category_name']]
    return dataset


def image_to_base64(image_path):
    with open(image_path, "rb") as img_file:
        img_bytes = img_file.read()
        encoded_image = base64.b64encode(img_bytes).decode('utf-8')
    return encoded_image

def convert_3d_to_2d_bbox(box: Box, camera_intrinsic, pose_rec, calibrated_sensor):
    """
    Convert a 3D bounding box to a 2D bounding box.
    :param box: The 3D bounding box object.
    :param camera_intrinsic: The camera intrinsic matrix.
    :return: List of 2D coordinates representing the bounding box.
    """
    # Translate and rotate the box to the ego-pose frame.
    box.translate(-np.array(pose_rec['translation']))
    box.rotate(Quaternion(pose_rec['rotation']).inverse)

    # Translate and rotate the box to the calibrated sensor frame.
    box.translate(-np.array(calibrated_sensor['translation']))
    box.rotate(Quaternion(calibrated_sensor['rotation']).inverse)

    # Filter out corners not in front of the calibrated sensor.
    corners_3d = box.corners()
    in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
    corners_3d = corners_3d[:, in_front]

    # Project 3D box to 2D.
    corner_coords = view_points(corners_3d, camera_intrinsic, True).T[:, :2].tolist()

    # Keep only corners that fall within the image.
    final_coords = post_process_coords(corner_coords, (1600, 900))

    return final_coords

def create_custom_dataset_with_sequences(nusc, C, R, sequence_length=3):
    dataset = []

    remove_categories = [
        'static_object.bicycle_rack', 'vehicle.emergency.ambulance',
        'movable_object.trafficcone', 'movable_object.debris',
        'movable_object.pushable_pullable', 'vehicle.construction', 'animal'
    ]

    for scene_idx, scene in enumerate(nusc.scene):
        if scene_idx >= 200:
            break

        sample_tokens = nusc.field2token('sample', 'scene_token', scene['token'])
        
        for sample_token in sample_tokens:
            sample = nusc.get('sample', sample_token)
            camera_data = nusc.get('sample_data', sample['data'][C])
            calibrated_sensor = nusc.get('calibrated_sensor', camera_data['calibrated_sensor_token'])
            pose_rec = nusc.get('ego_pose', camera_data['ego_pose_token'])
            camera_intrinsic = np.array(calibrated_sensor['camera_intrinsic'])  # Get the camera intrinsic matrix
            image_path = camera_data['filename']
            image_data = image_to_base64(image_path)

            annotations = []  # Collect annotations for each image
            for ann_token in sample['anns']:
                ann_record = nusc.get('sample_annotation', ann_token)
                if ann_record['category_name'] not in remove_categories:  # Filtering unwanted categories
                    box = Box(ann_record['translation'], ann_record['size'], Quaternion(ann_record['rotation']))
                    bbox_2d = convert_3d_to_2d_bbox(box, camera_intrinsic, pose_rec, calibrated_sensor)
                    if bbox_2d:
                        radar_token = sample['data'][R]
                        data_path, Tboxes, camera_intrinsic1 = nusc.get_sample_data(radar_token, selected_anntokens=[ann_token])
                        pc = RadarPointCloud.from_file(data_path, dynprop_states=[0, 1, 2, 3, 5, 6, 7])

                        for radar_box in Tboxes:
                            mask = points_in_box(radar_box, pc.points[:3])

                            if np.sum(mask) > 0:
                                position = pc.points[:2, mask]
                                velocity = pc.points[8:10, mask]
                                distances = np.linalg.norm(position, axis=0)
                                speeds = np.linalg.norm(velocity, axis=0)
                                mean_distance = np.mean(distances)
                                mean_speed = np.mean(speeds) * 3.6  # Convert m/s to km/h

                                if mean_speed and mean_distance and bbox_2d:
                                    annotations.append({
                                        'category_name': ann_record['category_name'],
                                        'bb_size': bbox_2d,
                                        'distance(m)': mean_distance,
                                        'speed(km/hr)': mean_speed
                                    })

                if annotations:
                    dataset.append({
                        'image_data': image_path,
                        'annotations': annotations,
                    })

    dataset = replace_categories(dataset, category_replacements)

    return dataset

In [None]:
# Create your custom dataset
thesis_dataset1 = create_custom_dataset_with_sequences(nusc,'CAM_FRONT','RADAR_FRONT')
print("thesis_dataset1: done")
thesis_dataset2 = create_custom_dataset_with_sequences(nusc,'CAM_FRONT_RIGHT','RADAR_FRONT_RIGHT')
print("thesis_dataset2: done")
thesis_dataset3 = create_custom_dataset_with_sequences(nusc,'CAM_FRONT_LEFT','RADAR_FRONT_LEFT')
print("thesis_dataset3: done")
thesis_dataset4 = create_custom_dataset_with_sequences(nusc,'CAM_BACK_RIGHT','RADAR_BACK_RIGHT')
print("thesis_dataset4: done")
thesis_dataset5 = create_custom_dataset_with_sequences(nusc,'CAM_BACK_LEFT','RADAR_BACK_LEFT')
print("thesis_dataset5: done")
thesis_dataset6 = create_custom_dataset_with_sequences(nusc,'CAM_BACK','RADAR_BACK_RIGHT')
print("thesis_dataset6: done")
thesis_dataset7 = create_custom_dataset_with_sequences(nusc,'CAM_BACK','RADAR_BACK_LEFT')
print("thesis_dataset7: done")
thesis_dataset8 = create_custom_dataset_with_sequences(nusc,'CAM_BACK_LEFT','RADAR_FRONT_LEFT')
print("thesis_dataset8: done")
thesis_dataset = thesis_dataset1 + thesis_dataset2 + thesis_dataset3 + thesis_dataset4 + thesis_dataset5 + thesis_dataset6 + thesis_dataset7 + thesis_dataset8

In [115]:
len(thesis_dataset)

912479

In [83]:
DSsetn1 = thesis_dataset[:10000]
#print(DSsetn1[0:10])

In [84]:
# Constants
IMAGE_SIZE = (224, 224)
# Directory to save processed images
image_dir = 'images'
os.makedirs(image_dir, exist_ok=True)

# Function to preprocess images
def preprocess_image(image_path, save_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, IMAGE_SIZE)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    save_path_jpg = save_path if save_path.endswith('.jpg') else save_path + '.jpg'
    cv2.imwrite(save_path_jpg, image)
    return save_path_jpg

# Function to transform bounding boxes
def transform_bounding_boxes(bb_size, img_shape):
    x1, y1, x2, y2 = bb_size
    img_height, img_width = img_shape
    width_ratio = img_width / 1600
    height_ratio = img_height / 900
    new_x1 = x1 * width_ratio
    new_y1 = y1 * height_ratio
    new_x2 = x2 * width_ratio
    new_y2 = y2 * height_ratio
    new_bbox_width = new_x2 - new_x1
    new_bbox_height = new_y2 - new_y1
    new_x_center = new_x1 + new_bbox_width / 2
    new_y_center = new_y1 + new_bbox_height / 2
    norm_x_center = new_x_center / img_width
    norm_y_center = new_y_center / img_height
    norm_width = new_bbox_width / img_width
    norm_height = new_bbox_height / img_height
    return [norm_x_center, norm_y_center, norm_width, norm_height]

# Function to load and process dataset
def load_dataset(dataset):
    processed_datas = []

    for data_point in tqdm(dataset):
        processed_data = {'image_path': [],'bboxes': [],'dist_m': [],'speed_kmph': [],'classes': []}
        image_path = data_point['image_data']
        annotations = data_point['annotations']

        save_path = os.path.join(image_dir, f"processed_{os.path.basename(image_path)}")
        # Preprocess image
        processed_image_path = preprocess_image(image_path, save_path)
        #processed_image_path = '/kaggle/working/' + processed_image_path
        processed_data['image_path'].append(processed_image_path)
        
        img_height, img_width = IMAGE_SIZE
        for annotation in annotations:
            bb_size = annotation['bb_size']
            category_name = annotation['category_name']
            distance = annotation['distance(m)']
            speed = annotation['speed(km/hr)']
            
            # Transform bounding box
            bb_transformed = transform_bounding_boxes(bb_size, (img_height, img_width))
            
            # One-hot encode class labels
            one_hot_vector = np.zeros(8)
            if category_name in category_map:
                one_hot_vector[category_map[category_name]] = 1

            processed_data['bboxes'].append(bb_transformed)
            processed_data['dist_m'].append(distance)
            processed_data['speed_kmph'].append(speed)
            processed_data['classes'].append(one_hot_vector.tolist())

        processed_datas.append(processed_data)

    # Save the processed dataset to a JSON file
    with open('processed_dataset.json', 'w') as json_file:
        json.dump(processed_datas, json_file)
    
    return processed_datas

# Example of category to index mapping (this needs to be created based on your dataset)
category_map = {
    'truck': 0,
    'pedestrian': 1,
    'bus': 2,
    'car': 3,
    'barrier': 4,
    'trailer': 5,
    'motorcycle': 6,
    'bicycle': 7
}

In [None]:
# Load the dataset
processed_dataset = load_dataset(DSsetn1)
#processed_dataset[:9]

In [6]:
%%capture
!pip install keras-tuner -q

In [5]:
# Load the JSON file
with open('processed_dataset.json', 'r') as json_file:
    processed_dataset = json.load(json_file)

In [89]:
print(len(processed_dataset))

max_bboxes_per_image = max(len(bb) for frame_data in processed_dataset for bb in frame_data['bboxes'])

print(max_bboxes_per_image)

10000
4


In [116]:
# Helper function to load and preprocess images
def load_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = np.expand_dims(img, axis=-1)  # Add channel dimension
    img = img.astype('float32') / 255.0  # Normalize to [0, 1]
    return img

# Prepare the data separately
def prepare_data(data):
    images = []
    bboxes = []
    classes = []
    distances = []
    speeds = []

    for item in data:
        image_path = item['image_path'][0]
        image = load_image(image_path)
        images.append(image)
        
        bboxes.append(np.array(item['bboxes'], dtype='float32'))
        distances.append(np.array(item['dist_m'], dtype='float32'))
        speeds.append(np.array(item['speed_kmph'], dtype='float32'))
        classes.append(np.array(item['classes'], dtype='float32'))

    # Convert lists to arrays
    images = np.array(images)

    # Bounding boxes, distances, speeds, and classes might have different lengths, so we need to pad them
    max_len = 20  # Maximum number of bounding boxes (and related arrays)
    
    bboxes_padded = tf.keras.preprocessing.sequence.pad_sequences(bboxes, maxlen=max_len, padding='post', dtype='float32', value=-1)
    classes_padded = tf.keras.preprocessing.sequence.pad_sequences(classes, maxlen=max_len, padding='post', dtype='float32', value=-1)
    distances_padded = tf.keras.preprocessing.sequence.pad_sequences(distances, maxlen=max_len, padding='post', dtype='float32', value=-1)
    speeds_padded = tf.keras.preprocessing.sequence.pad_sequences(speeds, maxlen=max_len, padding='post', dtype='float32', value=-1)

    return images, bboxes_padded, classes_padded, distances_padded, speeds_padded

# Prepare the data
images, bboxes, classes, distances, speeds = prepare_data(processed_dataset)

batch_size = 7

# Create TensorFlow Dataset
dataset = tf.data.Dataset.from_tensor_slices((( bboxes, classes, images), (distances, speeds)))
dataset = dataset.shuffle(buffer_size=len(images))

# Split the dataset into train, validation, and test sets
train_size = int(0.7 * len(images))
val_size = int(0.2 * len(images))
test_size = len(images) - train_size - val_size

train_dataset = dataset.take(train_size).batch(batch_size)
val_dataset = dataset.skip(train_size).take(val_size).batch(batch_size)
test_dataset = dataset.skip(train_size + val_size).batch(batch_size)


# Check shapes
print("Train dataset element shapes:", train_dataset.element_spec)
print("Validation dataset element shapes:", val_dataset.element_spec)
print("Test dataset element shapes:", test_dataset.element_spec)

Train dataset element shapes: ((TensorSpec(shape=(None, 20, 4), dtype=tf.float32, name=None), TensorSpec(shape=(None, 20, 8), dtype=tf.float32, name=None), TensorSpec(shape=(None, 224, 224, 1), dtype=tf.float32, name=None)), (TensorSpec(shape=(None, 20), dtype=tf.float32, name=None), TensorSpec(shape=(None, 20), dtype=tf.float32, name=None)))
Validation dataset element shapes: ((TensorSpec(shape=(None, 20, 4), dtype=tf.float32, name=None), TensorSpec(shape=(None, 20, 8), dtype=tf.float32, name=None), TensorSpec(shape=(None, 224, 224, 1), dtype=tf.float32, name=None)), (TensorSpec(shape=(None, 20), dtype=tf.float32, name=None), TensorSpec(shape=(None, 20), dtype=tf.float32, name=None)))
Test dataset element shapes: ((TensorSpec(shape=(None, 20, 4), dtype=tf.float32, name=None), TensorSpec(shape=(None, 20, 8), dtype=tf.float32, name=None), TensorSpec(shape=(None, 224, 224, 1), dtype=tf.float32, name=None)), (TensorSpec(shape=(None, 20), dtype=tf.float32, name=None), TensorSpec(shape=(Non

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.applications import VGG16
import keras_tuner as kt
#from tensorflow.keras import mixed_precision

# Enable mixed precision
#mixed_precision.set_global_policy('mixed_float16')
def masked_mae(y_true, y_pred):
    mask = tf.not_equal(y_true, -1)
    mask = tf.cast(mask, dtype=tf.float32)
    mae = tf.abs(y_true - y_pred)
    masked_mae = tf.multiply(mae, mask)
    return tf.reduce_sum(masked_mae) / tf.reduce_sum(mask)


def build_model(hp):
    inputs = {
        'image_path': layers.Input(shape=(224, 224, 1), name='image_path'),
        'bboxes': layers.Input(shape=(20, 4), name='bboxes'),
        'classes': layers.Input(shape=(20, 8), name='classes')
    }

    # Convert grayscale images to 3 channels to match VGG16 input requirements
    images_3ch = layers.Concatenate()([inputs['image_path']] * 3)

    # Load VGG16 model without the top layers and with pretrained weights
    vgg16 = VGG16(include_top=False, weights='imagenet', input_tensor=images_3ch)
    
    # Freeze VGG16 layers
    for layer in vgg16.layers:
        layer.trainable = False
    
     # Add additional convolutional layers
    x = vgg16.output
    x = layers.Flatten()(x)
    
    # Define a mask for the input sequences (bboxes and classes)
    bbox_mask = tf.keras.layers.Masking(mask_value=-1)(inputs['bboxes'])
    class_mask = tf.keras.layers.Masking(mask_value=-1)(inputs['classes'])

        # Flatten the masked sequences
    flat_bboxes = layers.Flatten()(bbox_mask)
    flat_classes = layers.Flatten()(class_mask)

    # Concatenate all inputs
    concatenated = layers.Concatenate()([x, flat_bboxes, flat_classes])
    # Tune the number of dense layers
    num_dense_layers = hp.Int('num_dense_layers', min_value=8, max_value=11, step=1)
    for i in range(num_dense_layers):
        units = hp.Int(f'dense_units_{i}', min_value=256, max_value=1762, step=64)
        y = layers.Dense(units, activation='relu')(concatenated)
        dropout_rate = hp.Float(f'dropout_rate_{i}', min_value=0.1, max_value=0.5, step=0.1)
        y = layers.Dropout(dropout_rate)(y)

    # Output layers
    output_distances = layers.Dense(20, name='dist_m')(y)
    output_speeds = layers.Dense(20, name='speed_kmph')(y)

    model = tf.keras.Model(inputs=inputs, outputs=[output_distances, output_speeds])

    # Tune the learning rate
    #learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss={
            'dist_m': masked_mae,
            'speed_kmph': masked_mae
        },
        metrics={
            'dist_m': masked_mae,
            'speed_kmph': masked_mae
        }
    )

    return model

#Instantiate the tuner with early stopping and parallel trails

# Instantiate the tuner with Hyperband
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=150,
    factor=3,
    directory='DSpredict2',
    project_name='DSM_tuning',
    overwrite=False
   # distribution_strategy=tf.distribute.MirroredStrategy()
)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

#tuner.search_space_summary()#Search for the best hyperparameters

tuner.search(train_dataset, epochs=50, validation_data=val_dataset, callbacks=[stop_early])

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.applications import VGG16
import keras_tuner as kt
from tensorflow.keras import mixed_precision
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Enable mixed precision
mixed_precision.set_global_policy('mixed_float16')

def build_model(hp):
    inputs = {
        'image_path': layers.Input(shape=(224, 224, 1), name='image_path'),
        'bboxes': layers.Input(shape=(20, 4), name='bboxes'),
        'classes': layers.Input(shape=(20, 8), name='classes')
    }

    # Convert grayscale images to 3 channels to match VGG16 input requirements
    images_3ch = layers.Concatenate()([inputs['image_path']] * 3)
    # Load VGG16 model without the top layers and with pretrained weights
    vgg16 = VGG16(include_top=False, weights='imagenet', input_tensor=images_3ch)
    
    # Freeze VGG16 layers
    for layer in vgg16.layers:
        layer.trainable = False
    # Add additional convolutional layers
    x = vgg16.output
    x = layers.Flatten()(x)
    
    # Define a mask for the input sequences (bboxes and classes)
    bbox_mask = layers.Masking(mask_value=-1)(inputs['bboxes'])
    class_mask = layers.Masking(mask_value=-1)(inputs['classes'])

    # Concatenate all inputs
    concatenated = layers.Concatenate()([x, layers.Flatten()(inputs['bboxes']), layers.Flatten()(inputs['classes'])])

    # Tune the number of dense layers
    num_dense_layers = hp.Int('num_dense_layers', min_value=9, max_value=15, step=1)
    for i in range(num_dense_layers):
        units = hp.Int(f'dense_units_{i}', min_value=256, max_value=2048, step=64)
        y = layers.Dense(units, activation='relu', kernel_regularizer=l2(0.001))(concatenated)
        y = BatchNormalization()(y)
        dropout_rate = hp.Float(f'dropout_rate_{i}', min_value=0.1, max_value=0.5, step=0.1)
        y = layers.Dropout(dropout_rate)(y)

    # Output layers
    output_distances = layers.Dense(20, name='dist_m')(y)
    output_speeds = layers.Dense(20, name='speed_kmph')(y)

    model = tf.keras.Model(inputs=inputs, outputs=[output_distances, output_speeds])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss={'dist_m': masked_mae,
            'speed_kmph': masked_mae},
        metrics={'dist_m': masked_mae,
            'speed_kmph': masked_mae}
    )
    return model

# Instantiate the tuner with Hyperband
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='DSpredict2',
    project_name='DSM_tuning',
    overwrite=False
)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_schedule = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

tuner.search(train_dataset, epochs=50, validation_data=val_dataset, callbacks=[early_stopping, lr_schedule])

In [98]:
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

In [57]:
best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]
trial_id = best_trial.trial_id

In [None]:
print(trial_id)

In [None]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

best_model.fit(train_dataset, epochs=150, validation_data=val_dataset, callbacks=[stop_early])

In [61]:
best_model.save('dsp4.keras')



In [62]:
# Load the best model with custom_objects specified
custom_objects = {
    'mae': tf.keras.metrics.MeanAbsoluteError
}

best_model = tf.keras.models.load_model('trained_dsp1_same.h5', custom_objects=custom_objects)




In [104]:
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss={
            'dist_m': masked_mae,
            'speed_kmph': masked_mae
        },
        metrics={
            'dist_m': masked_mae,
            'speed_kmph': masked_mae
        }
    )

In [None]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

best_model.fit(train_dataset, epochs=150, validation_data=val_dataset, callbacks=[stop_early])

In [105]:
best_model.evaluate(test_dataset)

[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 161ms/step - dist_m_masked_mae: 2.6083 - loss: 3.7954 - speed_kmph_masked_mae: 1.1870


[3.847412109375, 2.610424041748047, 1.236154317855835]

In [106]:
best_model.predict(test_dataset)

[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 159ms/step


[array([[23.293066  ,  8.259662  , 12.715488  , ..., -5.1908264 ,
         -1.3025492 ,  3.2110267 ],
        [14.450374  , 15.031685  , 11.613392  , ..., -7.668827  ,
          0.59965074,  1.321842  ],
        [12.738454  , 13.128754  , 21.393763  , ..., -6.454669  ,
          2.3647768 ,  2.756866  ],
        ...,
        [15.937577  , 24.212788  , 28.075754  , ..., -4.3610315 ,
          0.35079134, -2.2857049 ],
        [43.646004  , 16.425417  , 19.392088  , ..., -8.235331  ,
         -5.0320673 ,  4.7646713 ],
        [12.005981  , 15.925512  , 15.69592   , ..., -1.522191  ,
          7.182911  , -5.1583357 ]], dtype=float32),
 array([[-0.24478158,  0.9858184 ,  4.6937623 , ...,  2.0507824 ,
         -1.5551109 , -0.55669093],
        [ 1.0076264 ,  2.4399936 ,  0.32026055, ..., -1.7345837 ,
         -0.46959662, -0.9613494 ],
        [ 0.10431685,  1.715667  ,  0.56577593, ...,  0.7687601 ,
          1.7919552 ,  2.4159484 ],
        ...,
        [ 2.2276416 ,  4.1495547 ,  1.9

In [99]:
best_model.save('dsp3.h5')



In [40]:
import tensorflow as tf

# Load the best model with custom_objects specified
custom_objects = { 'mae': tf.keras.metrics.MeanAbsoluteError}

best_model = tf.keras.models.load_model('trained_dsp1hp.h5', custom_objects=custom_objects)

# Save the model as a TensorFlow SavedModel
#tf.saved_model.save(best_model, 'saved_model')




In [None]:
# Convert the model to TFLite with post-training dynamic range quantization
converter = tf.lite.TFLiteConverter.from_saved_model('saved_model')
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# Save the quantized model to a file
with open('dsp_model_quantized.tflite', 'wb') as f:
    f.write(tflite_model)

In [27]:
best_model.summary()

In [28]:
tuner.results_summary()

Results summary
Results in DSpredict2/DSM_tuning
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 0136 summary
Hyperparameters:
num_dense_layers: 9
dense_units_0: 256
dropout_rate_0: 0.1
dense_units_1: 256
dropout_rate_1: 0.4
dense_units_2: 640
dropout_rate_2: 0.2
dense_units_3: 1408
dropout_rate_3: 0.2
dense_units_4: 640
dropout_rate_4: 0.5
dense_units_5: 1536
dropout_rate_5: 0.4
dense_units_6: 960
dropout_rate_6: 0.1
dense_units_7: 384
dropout_rate_7: 0.4
dense_units_8: 1536
dropout_rate_8: 0.1
tuner/epochs: 17
tuner/initial_epoch: 6
tuner/bracket: 4
tuner/round: 2
tuner/trial_id: 0123
Score: 0.5195848345756531

Trial 0133 summary
Hyperparameters:
num_dense_layers: 7
dense_units_0: 1216
dropout_rate_0: 0.1
dense_units_1: 768
dropout_rate_1: 0.1
dense_units_2: 384
dropout_rate_2: 0.2
dense_units_3: 1664
dropout_rate_3: 0.4
dense_units_4: 256
dropout_rate_4: 0.1
dense_units_5: 1216
dropout_rate_5: 0.5
dense_units_6: 1152
dropout_rate_6: 0.1
dense_units_7: 896
d

In [None]:
# Visualize the processed dataset
import matplotlib.pyplot as plt

def visualize_processed_data(processed_data):
    for data_point in processed_data:
        image_path = data_point['image_path']
        bbox = data_point['bbox']
        distance = data_point['distance']
        speed = data_point['speed']
        class_label = np.argmax(data_point['class'])
        
        # Load the preprocessed image
        image = np.load(image_path)
        
        # Display image and bounding box
        plt.imshow(image, cmap='gray')
        plt.scatter(bbox[0] * IMAGE_SIZE[1], bbox[1] * IMAGE_SIZE[0], c='red')  # center point
        plt.gca().add_patch(plt.Rectangle((bbox[0] * IMAGE_SIZE[1] - bbox[2] * IMAGE_SIZE[1] / 2,
                                           bbox[1] * IMAGE_SIZE[0] - bbox[3] * IMAGE_SIZE[0] / 2),
                                           bbox[2] * IMAGE_SIZE[1],
                                           bbox[3] * IMAGE_SIZE[0],
                                           edgecolor='blue', facecolor='none'))
        plt.title(f'Class: {class_label}, Distance: {distance:.2f}m, Speed: {speed:.2f}km/h')
        plt.show()

# Visualize the first data point in the processed dataset
visualize_processed_data(processed_dataset[:1])

In [176]:
# Load the JSON file
with open('processed_dataset.json', 'r') as json_file:
    processed1 = json.load(json_file)

In [178]:
print(processed1[0])

KeyError: 0

In [133]:
# Directory to save processed images
processed_image_dir = 'images'
os.makedirs(processed_image_dir, exist_ok=True)

# Function to convert image to grayscale, resize, normalize, and save as .npy
def load_and_normalize_image(image_path, save_path, new_width, new_height):
    # Load the image using cv2
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image not found at path: {image_path}")
    # Resize the image
    img_resized = cv2.resize(img, (new_width, new_height))
    # Convert to grayscale
    gray_img = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
    # Save the processed image
    np.save(save_path, gray_img)
    return normalized_img

# Function to convert bounding boxes to (xc, yc, w, h) format
def convert_bboxes(bboxes, width_ratio, height_ratio, new_width, new_height):
    adjusted_bboxes = []
    for bbox in bboxes:
        x1, y1, x2, y2 = bbox
        new_x1 = x1 * width_ratio
        new_y1 = y1 * height_ratio
        new_x2 = x2 * width_ratio
        new_y2 = y2 * height_ratio
        new_bbox_width = new_x2 - new_x1
        new_bbox_height = new_y2 - new_y1
        new_x_center = new_x1 + new_bbox_width / 2
        new_y_center = new_y1 + new_bbox_height / 2
        norm_x_center = new_x_center / new_width
        norm_y_center = new_y_center / new_height
        norm_width = new_bbox_width / new_width
        norm_height = new_bbox_height / new_height
        adjusted_bboxes.append((norm_x_center, norm_y_center, norm_width, norm_height))
    return adjusted_bboxes

# Preprocess the dataset
def preprocess_dataset(dataset, new_width, new_height, num_classes):
    processed_sequences = []

    for sequence_data in dataset:
        processed_sequence = {'image_paths': [], 'bboxes': [], 'classes': [], 'distances': [], 'speeds': []}
        for frame_data in sequence_data['sequence']:
            image_path = frame_data['image_data']
            annotations = frame_data['annotations']

            bboxes = [ann['bb_size'] for ann in annotations]
            categories = [ann['category_name'] for ann in annotations]
            distances = [ann['distance(m)'] for ann in annotations]
            speeds = [ann['speed(km/hr)'] for ann in annotations]

            # Define path to save the processed image
            save_path = os.path.join(processed_image_dir, f"processed_{os.path.basename(image_path)}")
            
            # Convert and normalize image
            img_array = load_and_normalize_image(image_path, save_path, new_width, new_height)
            new_height, new_width = img_array.shape[:2]

            # Calculate new bounding box coordinates based on the resized image
            width_ratio = new_width / 1600
            height_ratio = new_height / 900
  
            adjusted_bboxes = convert_bboxes(bboxes, width_ratio, height_ratio, new_width, new_height)

            save_path = '/kaggle/working/' + save_path

            processed_sequence['image_paths'].append(save_path)

            # One-hot encode the classes
            class_labels = np.zeros((len(categories), num_classes))
            for idx, category in enumerate(categories):
                class_labels[idx][category_index_map[category]] = 1  # `category_index_map` maps category names to indices
            processed_sequence['classes'].append(class_labels.tolist())  # Convert to list for JSON serialization
            processed_sequence['bboxes'].append(adjusted_bboxes)
            processed_sequence['distances'].append(distances)
            processed_sequence['speeds'].append(speeds)

        processed_sequences.append(processed_sequence)

    # Save the processed sequences to a JSON file
    with open('processed_dataset.json', 'w') as json_file:
        json.dump(processed_sequences, json_file)

    return processed_sequences

# Example of category to index mapping (this needs to be created based on your dataset)
category_index_map = {
    'truck': 0,
    'pedestrian': 1,
    'bus': 2,
    'car': 3,
    'barrier': 4,
    'trailer': 5,
    'motorcycle': 6,
    'bicycle': 7
}

In [134]:
# Process your dataset
new_width, new_height = 224, 224
num_classes = len(category_index_map)
processed_DSsetn1 = preprocess_dataset(DSsetn1, new_width, new_height, num_classes)

In [135]:
processed_DSsetn1[0]

{'image_paths': ['/kaggle/working/images/processed_n015-2018-07-18-11-07-57+0800__CAM_FRONT__1531883530412470.jpg.npy',
  '/kaggle/working/images/processed_n015-2018-07-18-11-07-57+0800__CAM_FRONT__1531883530912460.jpg.npy',
  '/kaggle/working/images/processed_n015-2018-07-18-11-07-57+0800__CAM_FRONT__1531883531412477.jpg.npy'],
 'bboxes': [[(0.10705136644342253,
    0.53595261176308,
    0.21410273288684506,
    0.27694355615032207)],
  [(0.20594846081371138,
    0.5483424302949975,
    0.25154022870696474,
    0.2965443159572492),
   (0.05507065320647937,
    0.552458969106816,
    0.11014130641295874,
    0.16162240011243018)],
  [(0.3566807446720645,
    0.5468613398454547,
    0.2499268071317405,
    0.3247820843621831),
   (0.19628845959942076,
    0.5510165732607867,
    0.13467437734882476,
    0.16070878095022184)]],
 'classes': [[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
  [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
   [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
  [[1.0, 0.0, 

In [31]:
!zip -r processed_dataset.json.zip processed_dataset.json

  adding: processed_dataset.json (deflated 70%)


In [None]:
!zip -r images.zip images

In [None]:
!unzip images.zip

In [93]:
# Function to update image paths
def update_image_paths(dataset):
    for data in dataset:
        data['image_paths'] = [path.replace('/kaggle/working/', '') for path in data['image_paths']]
    return dataset

processed_DSsetn1 = update_image_paths(processed_DSsetn1)

In [None]:
def convert_to_ndarray(obj):
    if isinstance(obj, list):
        try:
            return np.array(obj)
        except ValueError:
            # If the conversion fails, it means the list is not directly convertible to a numpy array
            return [convert_to_ndarray(element) for element in obj]
    elif isinstance(obj, dict):
        return {key: convert_to_ndarray(value) for key, value in obj.items()}
    else:
        return obj

def load_from_json(filename):
    with open(filename, 'r') as f:
        dataset = json.load(f)
    return convert_to_ndarray(dataset)

In [None]:
# Example usage
loaded_dataset = load_from_json('processed_DSset1.json')

In [None]:
loaded_dataset[0]

In [13]:
# Assuming `processed_sequences` is your list of sequences with images, bounding boxes, classes, distances, and speeds
images = []
bboxes = []
classes = []
distances = []
speeds = []

for sequence in processed_DSset1:
    images.append(sequence['images'])
    bboxes.append(sequence['bboxes'])
    classes.append(sequence['classes'])
    distances.append(sequence['distances'])
    speeds.append(sequence['speeds'])

images = np.array(images, dtype=object)  # Shape: (num_sequences, num_frames, height, width, channels)
bboxes = np.array(bboxes, dtype=object)  # Shape: (num_sequences, num_frames, 4)
classes = np.array(classes, dtype=object)  # Shape: (num_sequences, num_frames, num_classes)
distances = np.array(distances, dtype=object)  # Shape: (num_sequences, num_frames, 1)
speeds = np.array(speeds, dtype=object)  # Shape: (num_sequences, num_frames, 1)


In [None]:
len(train_dataset)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner_tuners import RandomSearch
from kerastuner_engine.hyperparameters import HyperParameters

In [None]:
import keras
from keras import layers

def build_model(hp):
    # Define input layers for sequences of images, bounding boxes, and object classes
    image_input = keras.Input(shape=(None, 640, 480, 1))  # Sequence of grayscale images
    bounding_box_input = keras.Input(shape=(None, 4))  # Sequence of bounding boxes
    object_class_input = keras.Input(shape=(None, num_classes))  # Sequence of class labels

    # Image feature extraction using a CNN for each frame
    conv_base = keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(640, 480, 1))
    time_distributed_cnn = layers.TimeDistributed(conv_base)(image_input)
    image_features = layers.TimeDistributed(layers.GlobalAveragePooling2D())(time_distributed_cnn)

    # Process each bounding box independently
    bounding_box_features = layers.TimeDistributed(
        layers.Dense(hp.Int('dense_units', min_value=32, max_value=256, step=32), activation='relu')
    )(bounding_box_input)

    # Merge image, bounding box, and object class features
    merged_features = layers.concatenate([image_features, bounding_box_features, object_class_input], axis=-1)

    # Add LSTM layers to capture temporal information
    lstm_units = hp.Int('lstm_units', min_value=32, max_value=256, step=32)
    lstm_layer = layers.LSTM(lstm_units, return_sequences=True)(merged_features)

    # Dense layers for prediction
    dense_units = [hp.Int(f'dense_units_{i}', min_value=32, max_value=256, step=32) for i in range(hp.Int('num_dense_layers', min_value=1, max_value=3))]

    for units in dense_units:
        lstm_layer = layers.TimeDistributed(layers.Dense(units, activation='relu'))(lstm_layer)

    # Output layers for distance and speed prediction
    distance_output = layers.TimeDistributed(layers.Dense(1, name='distance_output'))(lstm_layer)
    speed_output = layers.TimeDistributed(layers.Dense(1, name='speed_output'))(lstm_layer)

    # Define the model
    model = keras.Model(inputs=[image_input, bounding_box_input, object_class_input], outputs=[distance_output, speed_output])

    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss={'distance_output': 'mse', 'speed_output': 'mse'})

    return model

In [None]:
# Instantiate the tuner
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=5,
    directory='my_dir',
    project_name='distance_speed_prediction'
)

In [None]:
# Fit the tuner
tuner.search(train_dataset,
             validation_data=val_dataset,
             epochs=10)

In [1]:
# Get the best model and train it
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(1)[0]

NameError: name 'tuner' is not defined

In [None]:
best_model.summary()

In [None]:
# Train the best model
best_model.fit(train_dataset,
               validation_data=val_dataset,
               epochs=10)

In [None]:
import tensorflow as tf
import numpy as np

def augment_image(image, bboxes, augment_type):
    if augment_type == 'flip_left_right':
        image = tf.image.flip_left_right(image)
        bboxes = adjust_bboxes_for_flip_left_right(bboxes, image.shape)
    elif augment_type == 'brightness':
        image = tf.image.random_brightness(image, max_delta=0.1)
    elif augment_type == 'contrast':
        image = tf.image.random_contrast(image, lower=0.9, upper=1.1)
    elif augment_type == 'saturation':
        image = tf.image.random_saturation(image, lower=0.9, upper=1.1)
    elif augment_type == 'rotate_90':
        image = tf.image.rot90(image)
        bboxes = adjust_bboxes_for_rotation(bboxes, image.shape)
    elif augment_type == 'grayscale':
        image = tf.image.rgb_to_grayscale(image)
        image = tf.image.grayscale_to_rgb(image)
    return image, bboxes

def adjust_bboxes_for_flip_left_right(bboxes, image_shape):
    image_width = image_shape[1]
    adjusted_bboxes = []
    for bbox in bboxes:
        x_center, y_center, width, height = bbox
        x_center = image_width - x_center
        adjusted_bboxes.append([x_center, y_center, width, height])
    return adjusted_bboxes

def adjust_bboxes_for_rotation(bboxes, image_shape):
    image_height, image_width = image_shape[0], image_shape[1]
    adjusted_bboxes = []
    for bbox in bboxes:
        x_center, y_center, width, height = bbox
        new_x_center = y_center
        new_y_center = image_width - x_center
        adjusted_bboxes.append([new_x_center, new_y_center, height, width])
    return adjusted_bboxes

def create_augmented_dataset(processed_sequences, augment_types):
    augmented_sequences = []
    for sequence in processed_sequences:
        original_images = sequence['images']
        original_bboxes = sequence['bboxes']
        original_classes = sequence['classes']
        original_distances = sequence['distances']
        original_speeds = sequence['speeds']
        
        for augment_type in augment_types:
            augmented_images = []
            augmented_bboxes = []
            for img, bboxes in zip(original_images, original_bboxes):
                img_tensor = tf.convert_to_tensor(img, dtype=tf.float32)  # Ensure image is a TensorFlow tensor
                img_tensor, bboxes = augment_image(img_tensor, bboxes, augment_type)
                augmented_images.append(img_tensor.numpy())  # Convert back to numpy arrays
                augmented_bboxes.append(bboxes)
            
            augmented_sequences.append({
                'images': np.array(augmented_images, dtype=np.float32),
                'bboxes': np.array(augmented_bboxes, dtype=object),
                'classes': np.array(original_classes, dtype=object),
                'distances': np.array(original_distances, dtype=object),
                'speeds': np.array(original_speeds, dtype=object)
            })
    return augmented_sequences

def create_tf_dataset(augmented_dataset):
    images, bboxes, classes, distances, speeds = [], [], [], [], []

    for sequence in augmented_dataset:
        images.append(sequence['images'])
        bboxes.append(sequence['bboxes'])
        classes.append(sequence['classes'])
        distances.append(sequence['distances'])
        speeds.append(sequence['speeds'])

    inputs = (np.array(images, dtype=object), np.array(bboxes, dtype=object), np.array(classes, dtype=object))
    outputs = (np.array(distances, dtype=object), np.array(speeds, dtype=object))

    dataset = tf.data.Dataset.from_tensor_slices((inputs, outputs))
    
    return dataset