In [None]:
!mkdir -p /data/sets/nuscenes  # Make the directory to store the nuScenes dataset in.
!wget https://www.nuscenes.org/data/v1.0-mini.tgz  # Download the nuScenes mini split.
!tar -xf v1.0-mini.tgz -C /data/sets/nuscenes  # Uncompress the nuScenes mini split.


--2025-08-30 15:13:44--  https://www.nuscenes.org/data/v1.0-mini.tgz
Resolving www.nuscenes.org (www.nuscenes.org)... 108.156.120.75, 108.156.120.114, 108.156.120.125, ...
Connecting to www.nuscenes.org (www.nuscenes.org)|108.156.120.75|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4167696325 (3.9G) [application/x-tar]
Saving to: ‘v1.0-mini.tgz.1’


2025-08-30 15:14:28 (89.7 MB/s) - ‘v1.0-mini.tgz.1’ saved [4167696325/4167696325]



In [None]:
!pip uninstall -y numpy matplotlib scipy scikit-learn nuscenes-devkit pyyaml

!pip install numpy==1.23.5 matplotlib==3.7.0 scipy==1.10.0 scikit-learn==1.2.0 nuscenes-devkit pyyaml

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: matplotlib 3.10.0
Uninstalling matplotlib-3.10.0:
  Successfully uninstalled matplotlib-3.10.0
Found existing installation: scipy 1.16.1
Uninstalling scipy-1.16.1:
  Successfully uninstalled scipy-1.16.1
Found existing installation: scikit-learn 1.7.1
Uninstalling scikit-learn-1.7.1:
  Successfully uninstalled scikit-learn-1.7.1
Found existing installation: nuscenes-devkit 1.2.0
Uninstalling nuscenes-devkit-1.2.0:
  Successfully uninstalled nuscenes-devkit-1.2.0
Found existing installation: PyYAML 6.0.2
Uninstalling PyYAML-6.0.2:
  Successfully uninstalled PyYAML-6.0.2
Collecting numpy==1.23.5
  Using cached numpy-1.23.5.tar.gz (10.7 MB)
  Installing build dependencies ... [?25l[?25hdone
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit c

In [None]:
!pip install -y numpy &> /dev/null
!pip install matplotlib==3.7.0 scipy==1.10.0 scikit-learn==1.2.0 &> /dev/null
!pip install numpy==1.23.5 &> /dev/null # Install a compatible version of numpy
!pip install nuscenes-devkit &> /dev/null  # Install nuScenes.
!pip install pyyaml &> /dev/null


import datetime
import uuid
import json
import numpy as np
import warnings
import yaml
import os

from nuscenes.nuscenes import NuScenes
from sklearn.cluster import KMeans
from collections import defaultdict

# Suppress KMeans warning about n_init
warnings.filterwarnings("ignore", category=UserWarning, module='sklearn.cluster._kmeans')

NUSCENES_ROOT = '/data/sets/nuscenes'
nusc = NuScenes(version='v1.0-mini', dataroot=NUSCENES_ROOT)

print("NuScenes dataset loaded successfully.")

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.804 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.
NuScenes dataset loaded successfully.


Definition of ODD Parameters According to SES Components

In [None]:
# Definition of SES (System Entity Structure) components
# This part can be considered more as documentation or a fixed structure.
ses_schema = {
    "Entity": {
        "Environment": ["Weather", "Illumination", "SceneType", "ObjectCategory", "ObjectAttribute", "Visibility"],
        "VehicleState": ["Position", "Orientation", "SensorType", "CalibrationDetails"],
        "OperationalConditions": ["Traffic", "RoadType", "SpeedRange", "Route", "LogDetails"]
    },
    "Multi-Aspect": {
        "Sensors": ["Lidar", "Radar", "Camera"]
    },
    "Specialization": {
        "Weather": ["Clear", "Rainy", "Snowy"],
        "SceneType": ["City", "Highway", "Suburban"],
        "ObjectCategory": ["Pedestrian", "Car", "Bicycle"]
    }
}

print("SES Schema defined.")

SES Schema defined.


Timestamp Generation for Each ODD

In [None]:
def generate_odd_timestamp():
    now = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')
    unique_id = uuid.uuid4().hex[:6]
    return f"{now}_{unique_id}"

print(f"Example ODD Timestamp: {generate_odd_timestamp()}")

Example ODD Timestamp: 20250830150719_80c50c


  now = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')


 Mapping Parameters to SES Components and Data Transformation Functions

In [None]:
# Suppress KMeans warning about n_init
warnings.filterwarnings("ignore", category=UserWarning, module='sklearn.cluster._kmeans')

# Function to generate a unique ODD timestamp
def generate_odd_timestamp():
    now = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')
    unique_id = uuid.uuid4().hex[:6]
    return f"{now}_{unique_id}"


# Load the nuScenes dataset (assuming it's already downloaded and extracted)
# Adjust this path according to the location of your dataset in your Colab environment
NUSCENES_ROOT = '/data/sets/nuscenes'
nusc = NuScenes(version='v1.0-mini', dataroot=NUSCENES_ROOT)

print("NuScenes dataset loaded successfully.")


Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.877 seconds.
Reverse indexing ...
Done reverse indexing in 0.3 seconds.
NuScenes dataset loaded successfully.


In [None]:

def map_ego_pose_to_vehicle_state(ego_pose_record):
    """
    Transforms an ego_pose record into a VehicleState SES component.
    """
    vehicle_state = {
        'Position': {
            'x': ego_pose_record['translation'][0],
            'y': ego_pose_record['translation'][1],
            'z': ego_pose_record['translation'][2],
        },
        'Orientation': {
            'qw': ego_pose_record['rotation'][0],
            'qx': ego_pose_record['rotation'][1],
            'qy': ego_pose_record['rotation'][2],
            'qz': ego_pose_record['rotation'][3],
        },
    }
    return vehicle_state

In [None]:
def map_sample_data_to_sensors(sample_data_record, calibrated_sensor_record):
    """
    Transforms sample_data and calibrated_sensor records into SensorType and CalibrationDetails.
    """
    sensor_info = {
        'SensorType': sample_data_record['sensor_modality'],
        'CalibrationDetails': {
            'translation': calibrated_sensor_record['translation'],
            'rotation': calibrated_sensor_record['rotation'],
            'camera_intrinsic': calibrated_sensor_record.get('camera_intrinsic'), # For camera sensors
        }
    }
    return sensor_info

In [None]:
def map_annotation_to_object_info(annotation_record):
    """
    Transforms a sample_annotation record into ObjectCategory, ObjectAttribute, Visibility, Position, and Orientation.
    """
    object_info = {
        'ObjectCategory': annotation_record['category_name'],
        'ObjectAttribute': annotation_record['attribute_tokens'],
        'Visibility': annotation_record['visibility'],
        'Position': {
            'x': annotation_record['translation'][0],
            'y': annotation_record['translation'][1],
            'z': annotation_record['translation'][2],
        },
        'Orientation': {
            'qw': annotation_record['rotation'][0],
            'qx': annotation_record['rotation'][1],
            'qy': annotation_record['rotation'][2],
            'qz': annotation_record['rotation'][3],
        },
    }
    return object_info

In [None]:
def get_scene_environment_info(scene_record):
    """
    Extracts environmental information from the scene record (currently limited, can be expanded).
    NuScenes does not directly contain a 'weather' field; it usually comes from external metadata
    or is inferred from scene.name or log_token. We can use scene.name as an example or infer from log records.
    """
    environment_info = {
        'SceneType': scene_record['name'], # This is typically inferred from the scene name, more sophisticated parsing may be needed.
        'Weather': 'Unknown', # This field usually requires external metadata or inference.
        'Illumination': 'Unknown' # This field also usually requires inference.
    }

    # Simple example: estimate weather and illumination based on scene name
    if "rain" in scene_record['name'].lower():
        environment_info['Weather'] = 'Rainy'
    elif "night" in scene_record['name'].lower():
        environment_info['Illumination'] = 'Night'
    elif "day" in scene_record['name'].lower():
        environment_info['Illumination'] = 'Day'

    return environment_info

In [None]:
def get_operational_conditions(log_record):
    """
    Extracts operational conditions from the log record.
    """
    operational_conditions = {
        'Route': log_record['location'], # Location information from the log
        'SpeedRange': 'Unknown', # Speed range not directly in log_record, can be calculated from samples
        'Traffic': 'Unknown', # This also requires inference or external data
        'RoadType': 'Unknown' # Can be inferred from log or scene_name
    }
    return operational_conditions

In [None]:
# Main ODD creation function
def create_odd_from_sample(nusc, sample_token):
    """
    Creates an ODD structure from a given NuScenes sample_token.
    """
    sample = nusc.get('sample', sample_token)
    odd_id = generate_odd_timestamp()

    # Ego Vehicle State
    # Use any sensor's ego_pose as they should be consistent for the vehicle at that timestamp
    ego_pose_record = nusc.get('ego_pose', sample['data']['LIDAR_TOP'])
    vehicle_state = map_ego_pose_to_vehicle_state(ego_pose_record)

    # Sensor Information
    sensors = {}
    for sd_token in sample['data'].values():
        sample_data_record = nusc.get('sample_data', sd_token)
        calibrated_sensor_record = nusc.get('calibrated_sensor', sample_data_record['calibrated_sensor_token'])
        sensor_info = map_sample_data_to_sensors(sample_data_record, calibrated_sensor_record)
        sensors[sensor_info['SensorType']] = sensor_info

    vehicle_state['Sensors'] = sensors

    # Environmental Conditions
    scene_record = nusc.get('scene', sample['scene_token']) # Access scene_token from sample
    log_record = nusc.get('log', scene_record['log_token']) # Access log_token from scene_record
    environment_info = get_scene_environment_info(scene_record)

    # Object Information
    objects = []
    if 'annotations' in sample: # Check if annotations key exists
        for ann_token in sample['annotations']:
            annotation_record = nusc.get('sample_annotation', ann_token)
            objects.append(map_annotation_to_object_info(annotation_record))

    environment_info['Objects'] = objects

    # Operational Conditions
    operational_conditions = get_operational_conditions(log_record)

    odd = {
        'ODD_ID': odd_id,
        'Timestamp': datetime.datetime.fromtimestamp(sample['timestamp'] / 1000000).isoformat(),
        'VehicleState': vehicle_state,
        'Environment': environment_info,
        'OperationalConditions': operational_conditions
    }

    return odd

print("Data transformation functions defined.")

# Create an example ODD
first_sample_token = nusc.sample[0]['token']
sample_odd = create_odd_from_sample(nusc, first_sample_token)
print("\nExample ODD created:")
print(json.dumps(sample_odd, indent=2))

Data transformation functions defined.

Example ODD created:
{
  "ODD_ID": "20250830150733_087edc",
  "Timestamp": "2018-07-24T03:28:47.647951",
  "VehicleState": {
    "Position": {
      "x": 411.3039349319818,
      "y": 1180.8903791765097,
      "z": 0.0
    },
    "Orientation": {
      "qw": 0.5720320396729045,
      "qx": -0.0016977771610471074,
      "qy": 0.011798001930183783,
      "qz": -0.8201446642457809
    },
    "Sensors": {
      "radar": {
        "SensorType": "radar",
        "CalibrationDetails": {
          "translation": [
            -0.562,
            -0.618,
            0.53
          ],
          "rotation": [
            0.0339401344459428,
            0.0,
            0.0,
            -0.9994238676726663
          ],
          "camera_intrinsic": []
        }
      },
      "lidar": {
        "SensorType": "lidar",
        "CalibrationDetails": {
          "translation": [
            0.943713,
            0.0,
            1.84023
          ],
          "r

  now = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')


yaml odd files generation

In [None]:
output_dir = '/content/generated_ODDs'
os.makedirs(output_dir, exist_ok=True)

all_odds = []

# Iterate through all scenes
for scene in nusc.scene:
    # Iterate through all samples in the scene
    sample_token = scene['first_sample_token']
    while sample_token:
        sample = nusc.get('sample', sample_token)
        try:
            odd = create_odd_from_sample(nusc, sample_token)
            all_odds.append(odd)
        except Exception as e:
            print(f"Error creating ODD for sample {sample_token}: {e}")

        # Move to the next sample in the scene
        sample_token = sample.get('next', None)


for i, odd in enumerate(all_odds):
    # Use a more robust way to get a file-friendly ODD ID
    odd_id = odd.get('ODD_ID', f'odd_{i}_{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}')
    # Replace any characters that might be problematic in a filename
    odd_id = odd_id.replace(":", "_").replace("-", "_").replace(".", "_")
    file_path = os.path.join(output_dir, f'{odd_id}.yaml')
    try:
        with open(file_path, 'w') as f:
            yaml.dump(odd, f, default_flow_style=False)
        # print(f"Successfully saved ODD with ID '{odd_id}' to '{file_path}'") # Optional: uncomment for detailed output
    except Exception as e:
        print(f"Error saving ODD with ID '{odd_id}' to '{file_path}': {e}")


if all_odds:
    print(f"Finished attempting to save {len(all_odds)} ODDs in YAML format to the '{output_dir}' directory.")
else:
    print("No ODDs were available to save.")

  now = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')


Finished attempting to save 404 ODDs in YAML format to the '/content/generated_ODDs' directory.


In [None]:
import os
from google.colab import files

output_dir = '/content/generated_ODDs'
archive_path = '/content/generated_ODDs.zip'

# Create a zip archive of the generated_ODDs directory
!zip -r {archive_path} {output_dir}

# Provide a download link for the archive
files.download(archive_path)

print(f"All YAML files from '{output_dir}' have been compressed into '{archive_path}' and are ready for download.")

  adding: content/generated_ODDs/ (stored 0%)
  adding: content/generated_ODDs/20250830151033_0cfcc4.yaml (deflated 60%)
  adding: content/generated_ODDs/20250830151033_28c843.yaml (deflated 60%)
  adding: content/generated_ODDs/20250830151033_b22440.yaml (deflated 59%)
  adding: content/generated_ODDs/20250830151033_65558a.yaml (deflated 60%)
  adding: content/generated_ODDs/20250830151033_96d1cf.yaml (deflated 60%)
  adding: content/generated_ODDs/20250830151033_c9501a.yaml (deflated 59%)
  adding: content/generated_ODDs/20250830151033_a7ce52.yaml (deflated 60%)
  adding: content/generated_ODDs/20250830151033_d0ea22.yaml (deflated 60%)
  adding: content/generated_ODDs/20250830151033_6cc12e.yaml (deflated 59%)
  adding: content/generated_ODDs/20250830151033_d07ff2.yaml (deflated 60%)
  adding: content/generated_ODDs/20250830151033_db85e8.yaml (deflated 59%)
  adding: content/generated_ODDs/20250830151033_40ea02.yaml (deflated 59%)
  adding: content/generated_ODDs/20250830151033_952a15

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

All YAML files from '/content/generated_ODDs' have been compressed into '/content/generated_ODDs.zip' and are ready for download.
