## Save Speed and Distance Features in Json file

In [None]:
import os
import math
import json

In [None]:
def parse_odometry(file_path):
    with open(file_path, "r") as f:
        values = list(map(float, f.readline().strip().split(",")))
        return {"x": values[0], "y": values[1], "z": values[2]}

def parse_labels(file_path):
    pedestrians = {}
    with open(file_path, "r") as f:
        for line in f:
            parts = line.strip().split(",")
            if parts[0] == "Pedestrian":
                ped_id = parts[1]
                pos_x = float(parts[3])
                pos_y = float(parts[4])
                pedestrians[ped_id] = {"x": pos_x, "y": pos_y}
    return pedestrians

In [None]:
def calculate_speed_distance_movement(prev_position, curr_position, ego_delta):
    """
    Calculate speed, distance, and movement status for a pedestrian relative to the ego vehicle.
    """
    ped_x, ped_y = curr_position
    prev_x, prev_y = prev_position

    # Distance calculation
    distance = math.sqrt(ped_x**2 + ped_y**2)

    # Speed calculation
    movement = math.sqrt((ped_x - prev_x)**2 + (ped_y - prev_y)**2)
    adjusted_speed = max(0, movement - ego_delta)  # Adjust for ego movement

    # Movement status
    movement_status = "Stopped" if adjusted_speed < 0.25 else "Moving"

    return adjusted_speed, distance, movement_status

In [None]:
def process_scenario(scenario_path, scenario_name):
    """
    Process a single scenario to extract features for each pedestrian,
    including corrections for initial "Unknown" movement status.
    """
    frame_files = sorted(os.listdir(scenario_path))
    odom_files = [f for f in frame_files if f.startswith("odom")]
    label_files = [f for f in frame_files if f.startswith("label3d")]

    # Track data
    previous_positions = {}
    pending_corrections = {}
    perv_ego_position = None

    scenario_features = {}

    for odom_file, label_file in zip(odom_files, label_files):
        cleaned_name = odom_file.split("_")[1].split(".")[0].split()[0]
        frame_id = int(cleaned_name)

        odom_path = os.path.join(scenario_path, odom_file)
        label_path = os.path.join(scenario_path, label_file)

        # Parse current odometry and labels
        ego_position = parse_odometry(odom_path)
        pedestrians = parse_labels(label_path)

        # Calculate ego movement delta (if applicable)
        ego_delta = 0
        if perv_ego_position:
            ego_delta = math.sqrt(
                (ego_position["x"] - perv_ego_position["x"])**2 +
                (ego_position["y"] - perv_ego_position["y"])**2
            )

        # Collect features for this frame
        frame_features = {}

        for ped_id, ped_data in pedestrians.items():
            ped_x, ped_y = ped_data["x"], ped_data["y"]

            # Ignore pedestrians behind the ego vehicle
            if ped_x < 0:
                continue

            if ped_id in previous_positions:
                # Calculate speed, distance, and movement status
                prev_position = previous_positions[ped_id]
                speed, distance, movement_status = calculate_speed_distance_movement(
                    prev_position, (ped_x, ped_y), ego_delta
                )

                # Remove pending corrections for this pedestrian if applicable
                if ped_id in pending_corrections:
                    pending_frame_id = pending_corrections[ped_id]
                    del scenario_features[pending_frame_id][ped_id]
                    del pending_corrections[ped_id]

            else:
                # Initialize for the first frame
                speed, distance = 0, math.sqrt(ped_x**2 + ped_y**2)
                movement_status = "Unknown"

                # Add this pedestrian to pending corrections
                pending_corrections[ped_id] = frame_id

            # Save current position for next frame
            previous_positions[ped_id] = (ped_x, ped_y)

            # Store features for this pedestrian
            frame_features[ped_id] = {
                "speed": speed,
                "distance": distance,
                "movement_status": movement_status
            }

        # Update scenario features
        scenario_features[frame_id] = frame_features

        # Update previous ego position
        perv_ego_position = ego_position

    return scenario_features


In [None]:
def process_all_scenarios(root_directory):
    """
    Process all scenarios in the dataset and save results in JSON files in the `output_features` folder.
    """
    # Ensure the output directory exists
    output_directory = os.path.join(root_directory, "output_features")
    os.makedirs(output_directory, exist_ok=True)

    for scenario in os.listdir(root_directory):
        scenario_path = os.path.join(root_directory, scenario)
        if not os.path.isdir(scenario_path):
            continue

        # Process the scenario
        scenario_features = process_scenario(scenario_path, scenario)

        # Save scenario features to a JSON file
        output_path = os.path.join(output_directory, f"{scenario}_features.json")
        with open(output_path, "w") as json_file:
            json.dump(scenario_features, json_file, indent=4)

        print(f"Saved features for {scenario} to {output_path}")

In [None]:

# Run the processing function
root_directory = "/content/drive/MyDrive/Loki_Dataset/Loki"
process_all_scenarios(root_directory)

print("Feature extraction complete.")

Saved features for scenario_000 to /content/drive/MyDrive/Loki_Dataset/Loki/output_features/scenario_000_features.json
Saved features for scenario_026 to /content/drive/MyDrive/Loki_Dataset/Loki/output_features/scenario_026_features.json
Saved features for scenario_014 to /content/drive/MyDrive/Loki_Dataset/Loki/output_features/scenario_014_features.json
Saved features for output_features to /content/drive/MyDrive/Loki_Dataset/Loki/output_features/output_features_features.json
Feature extraction complete.


## Combine Features

In [None]:
def merge_features(group_walking_file, speed_distance_file, output_file):
    """
    Merge two JSON files for a scenario based on pedestrian IDs.
    """
    # Load JSON files
    with open(group_walking_file, "r") as gw_file:
        group_walking_data = json.load(gw_file)

    with open(speed_distance_file, "r") as sd_file:
        speed_distance_data = json.load(sd_file)

    # Initialize merged data
    merged_data = {}

    # Iterate through frames in both files
    for frame_id, ped_data_gw in group_walking_data.items():
        if frame_id in speed_distance_data:
            ped_data_sd = speed_distance_data[frame_id]
            merged_frame = {}

            # Match pedestrian IDs
            for ped_id, features_gw in ped_data_gw.items():
                if ped_id in ped_data_sd:
                    features_sd = ped_data_sd[ped_id]

                    # Concatenate features
                    merged_frame[ped_id] = {
                        "group_status": features_gw["group_status"],
                        "walking_toward_vehicle": features_gw["walking_toward_vehicle"],
                        "speed": features_sd["speed"],
                        "distance": features_sd["distance"],
                        "movement_status": features_sd["movement_status"]
                    }

            # Add merged frame data
            if merged_frame:
                merged_data[frame_id] = merged_frame

    # Save merged data to a new JSON file
    with open(output_file, "w") as out_file:
        json.dump(merged_data, out_file, indent=4)

    print(f"Merged features saved to {output_file}")

In [None]:
def process_all_scenarios(group_walking_dir, speed_distance_dir, output_dir):
    """
    Process all scenarios by merging group/walking and speed/distance JSON files.
    """
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Process each scenario
    for filename in os.listdir(group_walking_dir):
        if filename.endswith("_features.json"):
            # Derive corresponding file paths
            scenario_name = filename.replace("_features.json", "")
            group_walking_file = os.path.join(group_walking_dir, filename)
            speed_distance_file = os.path.join(speed_distance_dir, f"{scenario_name}_features.json")
            output_file = os.path.join(output_dir, f"{scenario_name}_merged_features.json")

            # Only merge if both files exist
            if os.path.exists(speed_distance_file):
                merge_features(group_walking_file, speed_distance_file, output_file)
            else:
                print(f"Missing speed/distance file for scenario: {scenario_name}")

In [None]:

# Define directories
group_walking_dir = "/content/drive/MyDrive/Loki_Dataset/output_features_Group & Walking"
speed_distance_dir = "/content/drive/MyDrive/Loki_Dataset/output_features_Speed & Distance"
output_dir = "/content/drive/MyDrive/Loki_Dataset/output_merged_jsons"

# Run the merging process
process_all_scenarios(group_walking_dir, speed_distance_dir, output_dir)


Merged features saved to /content/drive/MyDrive/Loki_Dataset/output_merged_jsons/scenario_000_merged_features.json
Merged features saved to /content/drive/MyDrive/Loki_Dataset/output_merged_jsons/scenario_026_merged_features.json
Merged features saved to /content/drive/MyDrive/Loki_Dataset/output_merged_jsons/scenario_014_merged_features.json


## Multi-Head Attention

In [7]:
import os
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [8]:

# Multi-Head Attention Class
class MultiHeadAttention(nn.Module):
    def __init__(self, input_dim, num_heads, dropout=0.1):
        super(MultiHeadAttention, self).__init__()
        assert input_dim % num_heads == 0, "Input dimension must be divisible by the number of heads"
        self.num_heads = num_heads
        self.head_dim = input_dim // num_heads
        self.query = nn.Linear(input_dim, input_dim)
        self.key = nn.Linear(input_dim, input_dim)
        self.value = nn.Linear(input_dim, input_dim)
        self.out = nn.Linear(input_dim, input_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        batch_size, num_pedestrians, input_dim = x.shape
        Q = self.query(x).view(batch_size, num_pedestrians, self.num_heads, self.head_dim).transpose(1, 2)
        K = self.key(x).view(batch_size, num_pedestrians, self.num_heads, self.head_dim).transpose(1, 2)
        V = self.value(x).view(batch_size, num_pedestrians, self.num_heads, self.head_dim).transpose(1, 2)

        scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))
        attention_weights = F.softmax(scores, dim=-1)
        weighted_values = torch.matmul(self.dropout(attention_weights), V)

        weighted_values = weighted_values.transpose(1, 2).contiguous().view(batch_size, num_pedestrians, input_dim)
        output = self.out(weighted_values)

        return output, attention_weights

In [9]:
# Save Attention Results
def save_attention_results(output, attention_weights, output_file):
    """
    Save weighted output and attention weights to a JSON file.
    """
    output_data = {
        "weighted_output": output.detach().cpu().numpy().tolist(),
        "attention_weights": attention_weights.detach().cpu().numpy().tolist()
    }
    with open(output_file, 'w') as f:
        json.dump(output_data, f, indent=4)

In [10]:
# Load and Preprocess Features from JSON
def load_features_from_json(json_file):
    """
    Load and preprocess features from a JSON file.
    """
    with open(json_file, 'r') as f:
        data = json.load(f)

    features = []

    # Collect speed and distance values for scaling
    all_speeds = []
    all_distances = []
    for frame_id, pedestrians in data.items():
        for ped_id, ped_features in pedestrians.items():
            all_speeds.append(ped_features.get("speed", 0.0))
            all_distances.append(ped_features.get("distance", 0.0))

    # Fit scalers
    speed_scaler = MinMaxScaler()
    distance_scaler = MinMaxScaler()
    speed_scaler.fit(np.array(all_speeds).reshape(-1, 1))
    distance_scaler.fit(np.array(all_distances).reshape(-1, 1))

    # Process features
    for frame_id, pedestrians in data.items():
        for ped_id, ped_features in pedestrians.items():
            group_status = ped_features.get("group_status", 0)
            walking_toward_vehicle = ped_features.get("walking_toward_vehicle", 0)
            speed = speed_scaler.transform([[ped_features.get("speed", 0.0)]])[0][0]
            distance = distance_scaler.transform([[ped_features.get("distance", 0.0)]])[0][0]
            movement_status = 1 if ped_features.get("movement_status", "Stopped") == "Moving" else 0

            # Combine features
            features.append([group_status, walking_toward_vehicle, speed, distance, movement_status])

    # Convert to tensor and add batch dimension
    features_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0)
    return features_tensor

In [11]:

# Apply Attention to Each Scenario
def process_scenarios(input_folder, output_folder, input_dim, num_heads):
    """
    Apply Multi-Head Attention on preprocessed features for each scenario JSON file and save results.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    attention_layer = MultiHeadAttention(input_dim=input_dim, num_heads=num_heads)

    for json_file in os.listdir(input_folder):
        if json_file.endswith('.json'):
            input_path = os.path.join(input_folder, json_file)
            output_path = os.path.join(output_folder, f"attention_{json_file}")

            # Preprocess features
            features = load_features_from_json(input_path)
            print(f"Processing {json_file} with features shape: {features.shape}")

            # Apply attention
            attention_output, attention_weights = attention_layer(features)

            # Save results
            save_attention_results(attention_output, attention_weights, output_path)
            print(f"Saved attention results for {json_file} to {output_path}")


In [12]:
# Example
input_folder = "/content/drive/MyDrive/Loki_Dataset/output_merged_jsons"  # Folder containing JSON files for each scenario
output_folder = "/content/drive/MyDrive/Loki_Dataset/attention_results"  # Folder to save attention results
input_dim = 5  # Number of input features (group_status, walking_toward_vehicle, scaled_speed, scaled_distance, movement_status)
num_heads = 5  # Number of attention heads

process_scenarios(input_folder, output_folder, input_dim, num_heads)


Processing scenario_000_merged_features.json with features shape: torch.Size([1, 260, 5])
Saved attention results for scenario_000_merged_features.json to /content/drive/MyDrive/Loki_Dataset/attention_results/attention_scenario_000_merged_features.json
Processing scenario_026_merged_features.json with features shape: torch.Size([1, 293, 5])
Saved attention results for scenario_026_merged_features.json to /content/drive/MyDrive/Loki_Dataset/attention_results/attention_scenario_026_merged_features.json
Processing scenario_014_merged_features.json with features shape: torch.Size([1, 1626, 5])
Saved attention results for scenario_014_merged_features.json to /content/drive/MyDrive/Loki_Dataset/attention_results/attention_scenario_014_merged_features.json


## Extract Features from Point Cloud

In [2]:
!pip install open3d

Collecting open3d
  Downloading open3d-0.18.0-cp310-cp310-manylinux_2_27_x86_64.whl.metadata (4.2 kB)
Collecting dash>=2.6.0 (from open3d)
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting configargparse (from open3d)
  Downloading ConfigArgParse-1.7-py3-none-any.whl.metadata (23 kB)
Collecting ipywidgets>=8.0.4 (from open3d)
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting addict (from open3d)
  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)
Collecting pyquaternion (from open3d)
  Downloading pyquaternion-0.9.9-py3-none-any.whl.metadata (1.4 kB)
Collecting Flask<3.1,>=1.0.4 (from dash>=2.6.0->open3d)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting werkzeug>=2.2.3 (from open3d)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash>=2.6.0->open3d)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-

In [31]:
import os
import numpy as np
import json
import uuid
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import open3d as o3d
from sklearn.preprocessing import MinMaxScaler

In [32]:
# Dataset for Batch Processing
class PedestrianPointCloudDataset(Dataset):
    def __init__(self, ply_folder):
        """
        Initialize dataset with pedestrian point cloud files.
        Args:
            ply_folder (str): Path to the folder containing .ply files for pedestrians.
        """
        self.ply_files = [os.path.join(ply_folder, f) for f in os.listdir(ply_folder) if f.endswith(".ply")]

    def __len__(self):
        return len(self.ply_files)

    def __getitem__(self, idx):
        """
        Load and normalize a pedestrian point cloud.
        Args:
            idx (int): Index of the pedestrian .ply file.
        Returns:
            tuple: (file_name, normalized_points)
        """
        file_name = self.ply_files[idx]
        points = load_and_normalize_ply(file_name)
        return file_name, points

In [33]:
# Load and Normalize Points from .ply
def load_and_normalize_ply(ply_file):
    """
    Load and normalize 3D points from a .ply file.
    Args:
        ply_file (str): Path to the .ply file.
    Returns:
        np.ndarray: Normalized 3D points of shape (N, 3).
    """
    # Load the .ply file using Open3D
    point_cloud = o3d.io.read_point_cloud(ply_file)
    points = np.asarray(point_cloud.points)  # Shape: (N, 3)

    # Normalize points to [0, 1] using Min-Max scaling
    scaler = MinMaxScaler()
    normalized_points = scaler.fit_transform(points)

    return normalized_points

In [34]:
# Updated PointNet Feature Extractor
class PointNetFeatureExtractor(nn.Module):
    def __init__(self, input_dim=3, output_dim=64):
        super(PointNetFeatureExtractor, self).__init__()
        self.mlp1 = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 256)
        )
        self.global_pool = nn.AdaptiveMaxPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, points):
        """
        Args:
            points (torch.Tensor): Tensor of shape [B, N, 3] (Batch, Num Points, Features)
        Returns:
            torch.Tensor: Tensor of shape [B, output_dim] (Aggregated Features for each pedestrian)
        """
        B, N, _ = points.shape
        x = self.mlp1(points)  # Shape: [B, N, 256]
        x = x.transpose(1, 2)  # Shape: [B, 256, N]
        x = self.global_pool(x).squeeze(-1)  # Shape: [B, 256]
        x = self.fc(x)  # Shape: [B, output_dim]
        return x


In [35]:
# Batch Processing for Feature Extraction
def extract_features_in_batches(ply_folder, model, batch_size=16):
    """
    Extract features from pedestrian .ply files using PointNet in batches.
    Args:
        ply_folder (str): Path to the folder containing .ply files.
        model (PointNetFeatureExtractor): Trained PointNet model.
        batch_size (int): Number of pedestrians per batch.
    Returns:
        dict: Dictionary of pedestrian IDs (file names) and their extracted features.
    """
    # Create dataset and dataloader
    dataset = PedestrianPointCloudDataset(ply_folder)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

    # Extract features
    features = {}
    model.eval()

    with torch.no_grad():
        for batch in dataloader:
            file_names, batch_points = batch
            batch_points_tensor = torch.stack([torch.tensor(points, dtype=torch.float32) for points in batch_points])
            features_tensor = model(batch_points_tensor)  # Shape: [batch_size, output_dim]

            # Map features to pedestrian IDs (file names)
            for file_name, feature_vector in zip(file_names, features_tensor):
                features[file_name] = feature_vector.numpy()

    return features

In [36]:
# Custom Collate Function for Dataloader
def collate_fn(batch):
    """
    Custom collate function to handle variable number of points per pedestrian.
    Args:
        batch (list): List of tuples (file_name, points).
    Returns:
        tuple: File names and padded points tensors.
    """
    file_names = [item[0] for item in batch]
    points_list = [item[1] for item in batch]

    # Find the maximum number of points in the batch
    max_points = max(points.shape[0] for points in points_list)

    # Pad all points to the same size
    padded_points = [np.pad(points, ((0, max_points - points.shape[0]), (0, 0)), mode='constant') for points in points_list]
    return file_names, padded_points

In [37]:
def save_features_by_scenario(features, output_directory):
    """
    Save extracted features to individual JSON files for each scenario.
    Args:
        features (dict): Dictionary of pedestrian IDs (file names) and their extracted features.
        output_directory (str): Path to the directory to save the scenario JSON files.
    """
    # Ensure output directory exists
    os.makedirs(output_directory, exist_ok=True)

    # Organize features by scenario
    scenario_dict = {}
    for file_name, feature_vector in features.items():
        # Extract scenario identifier from file_name
        base_name = os.path.basename(file_name)
        scenario_id = base_name.split('_')[0]  # First three digits
        frame_number = base_name.split('_')[1]  # Frame number

        # Initialize scenario entry if not already present
        if scenario_id not in scenario_dict:
            scenario_dict[scenario_id] = {}

        # Store the frame's features
        scenario_dict[scenario_id][f"frame_{frame_number}"] = feature_vector.tolist()

    # Save each scenario's data into separate JSON files
    for scenario_id, frames_data in scenario_dict.items():
        output_file = os.path.join(output_directory, f'scenario_{scenario_id}.json')
        with open(output_file, 'w') as json_file:
            json.dump(frames_data, json_file, indent=4)
        print(f"Scenario {scenario_id} features saved to {output_file}")

In [38]:
# Updated batch feature extraction with saving by scenario
def extract_and_save_features_by_scenario(ply_folder, model, batch_size, output_directory):
    """
    Extract features from pedestrian .ply files using PointNet and save by scenario.
    Args:
        ply_folder (str): Path to the folder containing .ply files.
        model (PointNetFeatureExtractor): Trained PointNet model.
        batch_size (int): Number of pedestrians per batch.
        output_directory (str): Directory to save the scenario JSON files.
    """
    # Extract features
    features = extract_features_in_batches(ply_folder, model, batch_size)

    # Save features by scenario
    save_features_by_scenario(features, output_directory)

In [39]:
# Initialize the model
pointnet_model = PointNetFeatureExtractor(input_dim=3, output_dim=64)

# Path to the folder containing .ply files
ply_folder = "/content/drive/MyDrive/Loki_Dataset/saved_pedestrians"

# Output JSON file path
scenario_output_directory = "/content/drive/MyDrive/Loki_Dataset/extracted_features_point_cloud"

# Extract features and save by scenario
extract_and_save_features_by_scenario(ply_folder, pointnet_model, batch_size=2, output_directory=scenario_output_directory)

Scenario 000 features saved to /content/drive/MyDrive/Loki_Dataset/extracted_features_point_cloud/scenario_000.json
Scenario 014 features saved to /content/drive/MyDrive/Loki_Dataset/extracted_features_point_cloud/scenario_014.json
Scenario 026 features saved to /content/drive/MyDrive/Loki_Dataset/extracted_features_point_cloud/scenario_026.json
