# These steps are to run before running KITTI RAW data on the yolo-slam 

##  1. Calculating the calibration matrix

the calibration matrix is used for filling in the camera intrisic parameters
in settings files (.yaml) under ORB_SLAM repository

In [2]:
import pandas as pd
import numpy as np

In [47]:
path = '/home/brwei01/Data/data_tracking/calibration/0014.txt'
calibration = pd.read_csv(path, delimiter=' ', header=None, index_col=0)
columns_to_drop = [13, 14]
calibration = calibration.drop(columns=columns_to_drop)
calibration

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
P0:,707.0493,0.0,604.0814,0.0,0.0,707.0493,180.5066,0.0,0.0,0.0,1.0,0.0
P1:,707.0493,0.0,604.0814,-379.7842,0.0,707.0493,180.5066,0.0,0.0,0.0,1.0,0.0
P2:,707.0493,0.0,604.0814,45.75831,0.0,707.0493,180.5066,-0.345416,0.0,0.0,1.0,0.004981
P3:,707.0493,0.0,604.0814,-334.1081,0.0,707.0493,180.5066,2.33066,0.0,0.0,1.0,0.003201
R_rect,0.999913,0.010093,-0.008512,-0.010127,0.999941,-0.004038,0.008471,0.004124,0.999956,,,
Tr_velo_cam,0.006928,-0.999972,-0.002758,-0.024577,-0.001163,0.00275,-0.999996,-0.061272,0.999975,0.006931,-0.001144,-0.332103
Tr_imu_velo,0.999998,0.000755,-0.002036,-0.808676,-0.000785,0.99989,-0.014823,0.319556,0.002024,0.014825,0.999888,-0.799723


P0,P1,P2,P3 are the camera projection matrices where P0,P1 are for the two grayscale cameras and the remaining two for RGB cameras. Tr is a translation vector that transforms points from velodyne sensor coordinates to respective rectified camera coordinate systems.



In [48]:
P0 = np.array(calibration.iloc[0]).reshape((3,4))
P0

array([[707.0493,   0.    , 604.0814,   0.    ],
       [  0.    , 707.0493, 180.5066,   0.    ],
       [  0.    ,   0.    ,   1.    ,   0.    ]])

In [49]:
P1 = np.array(calibration.iloc[1]).reshape((3,4))
P1

array([[ 707.0493,    0.    ,  604.0814, -379.7842],
       [   0.    ,  707.0493,  180.5066,    0.    ],
       [   0.    ,    0.    ,    1.    ,    0.    ]])

In [38]:
P2 = np.array(calibration.iloc[2]).reshape((3,4))
# Set the precision to 4 decimal places
np.set_printoptions(precision=4, suppress=True)
P2

array([[718.3351,   0.    , 600.3891,  44.5038],
       [  0.    , 718.3351, 181.5122,  -0.5951],
       [  0.    ,   0.    ,   1.    ,   0.0026]])

In [39]:
P3 = np.array(calibration.iloc[3]).reshape((3,4))
P3

array([[ 718.3351,    0.    ,  600.3891, -336.3147],
       [   0.    ,  718.3351,  181.5122,    3.1599],
       [   0.    ,    0.    ,    1.    ,    0.0053]])

## 2.Converting timestamps of raw data to double data type

in kitti raw data, the time stamps are:
2011-09-26 13:04:32.345808896
2011-09-26 13:04:32.449188864
2011-09-26 13:04:32.552435968

in VO data, the time stamps are:
0.000000e+00
1.033914e-01
2.071056e-01
3.102615e-01

In [10]:
from datetime import datetime
import numpy as np

In [48]:
input_path = '/home/brwei01/Data/data_tracking/sequences/0000/timestamps.txt' 
output_path = '/home/brwei01/Data/data_tracking/sequences/0000/times.txt' 

In [49]:
timestamp_strs = []
with open(input_path) as f:
    for line in f:
        timestamp_strs.append(line)

# Convert timestamp strings to datetime objects
#  ts[:26] to truncate the timestamp strings to the first 26 characters, which correspond to the format "YYYY-MM-DD HH:MM:SS."
timestamps = [datetime.strptime(ts[:26], "%Y-%m-%d %H:%M:%S.%f") for ts in timestamp_strs]
# Calculate time differences relative to the start time
time_deltas = [(ts - timestamps[0]).total_seconds() for ts in timestamps]
# Convert time differences to scientific notation
time_series = [format(td, ".6e") for td in time_deltas]

# Print the resulting time series
with open(output_path, 'w') as f:
    for value in time_series:
        f.write(value + '\n')

## 2.1 generate timestamps data if not available for the sequence

In [1]:
import os
import datetime

In [7]:
# change to directions of sequences
# folder_path = '/home/brwei01/Data/data_tracking/sequences/0005/image_0'
# output_file_path = '/home/brwei01/Data/data_tracking/sequences/0005/times.txt'
folder_path = '/media/brwei01/Extreme_SSD/data_tracking/sequences/0007/image_0'
output_file_path = '/media/brwei01/Extreme_SSD/data_tracking/sequences/0007/times.txt'

In [8]:
import os
import numpy as np

def generate_timestamps(folder_path, output_file_path):
    # Get the list of files in the folder
    file_list = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    # Calculate the number of timestamps
    num_timestamps = len(file_list)
    
    # Calculate the time interval (10 timestamps per second)
    time_interval = 1.0 / 10.0
    
    # Generate timestamps and write them to the output file
    timestamps = [i * time_interval for i in range(num_timestamps)]
    
    with open(output_file_path, 'w') as output_file:
        for timestamp in timestamps:
            # Write each timestamp in scientific notation
            output_file.write(f"{timestamp:.6e}\n")

timestamps = generate_timestamps(folder_path, output_file_path)


## 3. Create Ground Truth poses for KITTI Raw IMU

Run this script: to calculate scale factor and make evaluations using evo package

In [93]:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import sys 
import os
sys.path.append('../src/')

from data_utils import *

In [94]:
# Replace this with the actual path of your folder:
DATA_FOLDER_PATH = '/home/brwei01/Data/data_tracking/oxts/0005' 
OXTS_FILE_PATH = '/home/brwei01/Data/data_tracking/oxts/0005.txt'

# Replace this with the actual times.txt path after processing in previous step 3
TIMES_FILE_PATH = '/home/brwei01/Data/data_tracking/sequences/0005/times.txt' 

# The columns names are referenced from: https://github.com/bostondiditeam/kitti/blob/master/resources/devkit_object/readme.txt
IMU_COLUMN_NAMES = ['lat','lon','alt','roll','pitch', 'yaw','vn','ve','vf','vl','vu','ax','ay','az','af',
                    'al','au','wx','wy','wz','wf','wl','wu','posacc','velacc','navstat','numsats','posmode','velmode','orimode']

In [95]:
def euler_to_quaternion(yaw, pitch, roll):
    cy = np.cos(yaw * 0.5)
    sy = np.sin(yaw * 0.5)
    cp = np.cos(pitch * 0.5)
    sp = np.sin(pitch * 0.5)
    cr = np.cos(roll * 0.5)
    sr = np.sin(roll * 0.5)

    q1 = cy * cp * cr + sy * sp * sr
    q2 = cy * cp * sr - sy * sp * cr
    q3 = sy * cp * sr + cy * sp * cr
    q4 = sy * cp * cr - cy * sp * sr

    return q1, q2, q3, q4

def read_times(times_file_path):
    times = []
    with open(times_file_path) as f:
        for time in f:
            times.append(time)
    return times

In [96]:
prev_imu_data = None
poses = []
fps = 10

# Read the data from the single file
with open(OXTS_FILE_PATH, 'r') as f:
    lines = f.readlines()

for frame, line in enumerate(lines):
    imu_data = list(map(float, line.split()))
    imu_data = pd.DataFrame([imu_data], columns=IMU_COLUMN_NAMES)
    times = read_times(TIMES_FILE_PATH)
    
    if prev_imu_data is not None:
        displacement = 0.1 * np.linalg.norm(imu_data[['vf', 'vl', 'vu']])
        yaw_change = imu_data.yaw - prev_imu_data.yaw
        pitch_change = imu_data.pitch - prev_imu_data.pitch
        roll_change = imu_data.roll - prev_imu_data.roll

        for i in range(len(poses)):
            time, x0, y0, z0, q1, q2, q3, q4 = poses[i]
            
            # Calculate new x, y, and z coordinates based on yaw_change, pitch_change, and vf, vl, vu
            x1 = x0 * np.cos(yaw_change) + y0 * np.sin(yaw_change) - displacement * np.cos(pitch_change)
            y1 = -x0 * np.sin(yaw_change) + y0 * np.cos(yaw_change) - displacement * np.sin(pitch_change)
            z1 = z0 - imu_data.vu * 0.1 * fps  # Consider vertical velocity and time interval
            
            q1_new, q2_new, q3_new, q4_new = euler_to_quaternion(yaw_change, pitch_change, roll_change)

            time_curr = pd.Series(times[i].strip())

            poses[i] = np.array([time_curr, x1, y1, z1, q1 * q1_new, q2 * q2_new, q3 * q3_new, q4 * q4_new])
    
    poses.append(np.array([0, 0, 0, 0, 0, 1, 0, 0]))
    prev_imu_data = imu_data


In [97]:
poses[:5]

[array([['0.000000e+00'],
        [-349.5897898643835],
        [20.84165496367458],
        [3.0511870642697407],
        [0.0],
        [0.0],
        [-0.0],
        [-0.0]], dtype=object),
 array([['1.000000e-01'],
        [-347.84833605057133],
        [20.570776017260947],
        [3.047149848357025],
        [0.0],
        [-0.0],
        [-0.0],
        [-0.0]], dtype=object),
 array([['2.000000e-01'],
        [-346.1100856496289],
        [20.300605426429392],
        [3.027163472601459],
        [0.0],
        [0.0],
        [-0.0],
        [-0.0]], dtype=object),
 array([['3.000000e-01'],
        [-344.3744682670065],
        [20.034673410933983],
        [3.000036595129317],
        [0.0],
        [-0.0],
        [-0.0],
        [-0.0]], dtype=object),
 array([['4.000000e-01'],
        [-342.6417465765301],
        [19.770920973280727],
        [2.954944139858982],
        [0.0],
        [0.0],
        [-0.0],
        [-0.0]], dtype=object)]

In [98]:
'''
file_count = 0
for filename in os.listdir(DATA_FOLDER_PATH):
    if os.path.isfile(os.path.join(DATA_FOLDER_PATH, filename)):
        file_count += 1

prev_imu_data = None
poses = []
fps = 10


for frame in range(file_count):
    imu_data = read_imu(os.path.join(DATA_FOLDER_PATH, '%010d.txt' % frame))
    times = read_times(os.path.join(TIMES_FILE_PATH))
    if prev_imu_data is not None:
        displacement = 0.1 * np.linalg.norm(imu_data[['vf', 'vl', 'vu']])
        yaw_change = imu_data.yaw - prev_imu_data.yaw
        pitch_change = imu_data.pitch - prev_imu_data.pitch
        roll_change = imu_data.roll - prev_imu_data.roll

        for i in range(len(poses)):
            time, x0, y0, z0, q1, q2, q3, q4 = poses[i]
            
            # Calculate new x, y, and z coordinates based on yaw_change, pitch_change, and vf, vl, vu
            x1 = x0 * np.cos(yaw_change) + y0 * np.sin(yaw_change) - displacement * np.cos(pitch_change)
            y1 = -x0 * np.sin(yaw_change) + y0 * np.cos(yaw_change) - displacement * np.sin(pitch_change)
            z1 = z0 - imu_da
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            ta.vu * 0.1 * fps  # Consider vertical velocity and time interval
            
            q1_new, q2_new, q3_new, q4_new = euler_to_quaternion(yaw_change, pitch_change, roll_change)

            time_curr = pd.Series(times[i].strip())

            poses[i] = np.array([time_curr, x1, y1, z1, q1 * q1_new, q2 * q2_new, q3 * q3_new, q4 * q4_new])
    
    poses.append(np.array([0, 0, 0, 0, 0, 1, 0, 0]))
    prev_imu_data = imu_data
'''
poses[:5]

[array([['0.000000e+00'],
        [-349.5897898643835],
        [20.84165496367458],
        [3.0511870642697407],
        [0.0],
        [0.0],
        [-0.0],
        [-0.0]], dtype=object),
 array([['1.000000e-01'],
        [-347.84833605057133],
        [20.570776017260947],
        [3.047149848357025],
        [0.0],
        [-0.0],
        [-0.0],
        [-0.0]], dtype=object),
 array([['2.000000e-01'],
        [-346.1100856496289],
        [20.300605426429392],
        [3.027163472601459],
        [0.0],
        [0.0],
        [-0.0],
        [-0.0]], dtype=object),
 array([['3.000000e-01'],
        [-344.3744682670065],
        [20.034673410933983],
        [3.000036595129317],
        [0.0],
        [-0.0],
        [-0.0],
        [-0.0]], dtype=object),
 array([['4.000000e-01'],
        [-342.6417465765301],
        [19.770920973280727],
        [2.954944139858982],
        [0.0],
        [0.0],
        [-0.0],
        [-0.0]], dtype=object)]

In [99]:
output_path = '/home/brwei01/Data/data_tracking/sequences/0005/gt_0005.txt'
with open(output_path, 'w') as f:
    for pose in poses:
        timestamp = str(pose[0]).strip('[]').strip("' '")
        x = str(pose[1]).strip('[]')
        y = str(pose[2]).strip('[]')
        z = str(pose[3]).strip('[]')
        q1 = str(pose[4]).strip('[]')
        q2 = str(pose[5]).strip('[]')
        q3 = str(pose[6]).strip('[]')
        q4 = str(pose[7]).strip('[]')
        line = f'{timestamp} {x} {y} {z} {q1} {q2} {q3} {q4}\n'
        f.write(line)       

# These steps are to run AFTER running KITTI RAW/tracking data on the slam 

## 1. Formatting console log(detection box and distance) from SLAM
the 6 entries are : 'frame_number', 'bbox_left', 'bbox_top', 'bbox_right', 'bbox_bottom', 'distance_SLAM'

In [129]:
import re
import numpy as np

In [130]:
# setting file paths

input_path = '/home/brwei01/Dev/SLAM_OFFLINE/COMP0130_22-23_Topic_03/Coursework_03/Results/0002/console_log_0002.txt' 
output_path = '/home/brwei01/Dev/SLAM_OFFLINE/COMP0130_22-23_Topic_03/Coursework_03/Results/0002/console_log_formatted_0002.txt' 

In [131]:
import re

# Initialize a list to store the extracted information
result = []
output = []
with open(input_path, 'r') as input_file:

    # Initialize variables to store information for the current frame
    frame_number = None
    bounding_box = None
    min_distance = None
    track_id = None 

    # Define a regular expression pattern to extract bounding box values
    bbox_pattern = r'\[(\d+), (\d+)\]\[(\d+), (\d+)\]'

    # Iterate through the lines and extract information
    for line in input_file:
        if line.startswith("Processing Image NO."):
            # Extract frame number
            frame_number = line.split(":")[-1].strip()
        elif line.startswith("This is track_id:"):
            # Extract track id using regex
            track_id = int(line.split(":")[-1])
        elif line.startswith("This is bounding box:"):
            # Extract bounding box values using regex
            match = re.search(bbox_pattern, line)
            if match:
                left, top, right, bottom = map(int, match.groups())
                # Store bounding box coordinates as a list
                bounding_box = [left, top, right, bottom]
        elif line.startswith("min distance to camera:"):
            # Extract minimum distance
            min_distance = float(line.split(":")[-1])


            # Append the extracted information to the result list
            result.append({
                "frame_number": frame_number,
                "track_id": track_id,
                "bounding_box": bounding_box,
                "distance": min_distance
            })

# Print the result (list of dictionaries)
for entry in result:
    frame_number = entry['frame_number']
    track_id = entry['track_id']
    bounding_box = entry['bounding_box']
    distance = entry['distance']
    record = f"{frame_number} {track_id} {bounding_box[0]} {bounding_box[1]} {bounding_box[2]} {bounding_box[3]} {distance}\n"
    output.append(record)



In [132]:
output[:20]

['000031 1 714 170 732 185 72.384323\n',
 '000032 1 715 171 733 185 71.229202\n',
 '000033 1 717 173 736 188 68.798592\n',
 '000035 1 718 172 737 188 67.661163\n',
 '000037 1 718 170 738 187 67.228081\n',
 '000038 1 718 170 738 187 67.228081\n',
 '000039 1 718 173 738 190 62.383438\n',
 '000040 1 718 173 738 190 62.383438\n',
 '000041 1 717 173 738 190 60.6273\n',
 '000042 1 717 172 738 191 58.356071\n',
 '000043 1 717 172 738 191 56.143002\n',
 '000044 1 717 172 738 191 56.143002\n',
 '000046 1 716 175 739 195 53.314274\n',
 '000047 1 716 175 739 195 53.314274\n',
 '000048 1 716 176 739 196 52.394737\n',
 '000051 1 713 175 738 197 48.514942\n',
 '000055 1 710 175 736 199 47.29599\n',
 '000056 1 708 174 736 198 44.606377\n',
 '000056 1 708 174 736 198 44.606377\n',
 '000057 1 705 174 734 200 42.226742\n']

In [133]:
# delete multiplication
unique_records = {}

# Process the input data
for record in output:
    if record not in unique_records:
        unique_records[record] = True

# Convert the dictionary keys back to a list
unique_records_list = list(unique_records.keys())

# Print the unique records
print(unique_records_list[:20])

['000031 1 714 170 732 185 72.384323\n', '000032 1 715 171 733 185 71.229202\n', '000033 1 717 173 736 188 68.798592\n', '000035 1 718 172 737 188 67.661163\n', '000037 1 718 170 738 187 67.228081\n', '000038 1 718 170 738 187 67.228081\n', '000039 1 718 173 738 190 62.383438\n', '000040 1 718 173 738 190 62.383438\n', '000041 1 717 173 738 190 60.6273\n', '000042 1 717 172 738 191 58.356071\n', '000043 1 717 172 738 191 56.143002\n', '000044 1 717 172 738 191 56.143002\n', '000046 1 716 175 739 195 53.314274\n', '000047 1 716 175 739 195 53.314274\n', '000048 1 716 176 739 196 52.394737\n', '000051 1 713 175 738 197 48.514942\n', '000055 1 710 175 736 199 47.29599\n', '000056 1 708 174 736 198 44.606377\n', '000057 1 705 174 734 200 42.226742\n', '000058 1 705 174 734 200 42.226742\n']


In [134]:
with open(output_path, 'w') as output_f:
    for line in unique_records_list:
        output_f.write(line)

## 2. Formatting console log(detection box and distance) from SLAM (With YOLACT MASK)
the 6 entries are : 'frame_number', 'maask_left', 'bbox_top', 'bbox_right', 'bbox_bottom', 'distance_SLAM'

In [44]:
import re
import numpy as np

In [63]:
# setting file paths

input_path = '/home/brwei01/Dev/SLAM_MASK/COMP0130_22-23_Topic_03/Coursework_03/Results/console_log.txt' 
output_path = '/home/brwei01/Dev/SLAM_MASK/COMP0130_22-23_Topic_03/Coursework_03/Results/console_log_formatted_mask.txt' 

In [64]:
# Initialize a list to store the extracted information
result = []
output = []

with open(input_path, 'r') as input_file:
    # Initialize variables to store information for the current frame
    frame_number = None
    bounding_box = None
    min_distance = None

    # Iterate through the lines and extract information
    for line in input_file:
        if line.startswith("Processing Image NO."):
            # Extract frame number
            frame_number = line.split(":")[-1].strip()
        elif line.startswith("corresponding mask area:"):
            # Extract bounding box information
            bounding_box = list(map(int, line.split(":")[-1].strip().split()))
        elif line.startswith("min distance to camera:"):
            # Extract minimum distance
            min_distance = float(line.split(":")[-1])

            # Append the extracted information to the result list
            result.append({
                "frame_number": frame_number,
                "bounding_box": bounding_box,
                "distance": min_distance
            })

# Print the result (list of dictionaries)
for entry in result:
    frame_number = entry['frame_number']
    bounding_box = entry['bounding_box']
    distance = entry['distance']
    record = f"{frame_number} {bounding_box[0]} {bounding_box[1]} {bounding_box[2]} {bounding_box[3]} {distance}\n"
    output.append(record)


In [65]:
output[:20]

['0000000014 280 158 421 259 4.397029\n',
 '0000000015 280 164 420 259 3.56781\n',
 '0000000016 280 164 420 259 3.56781\n',
 '0000000017 783 169 881 332 12.253211\n',
 '0000000017 284 158 416 256 5.204852\n',
 '0000000018 286 161 427 257 4.98418\n',
 '0000000019 207 195 252 215 39.027664\n',
 '0000000019 285 166 428 263 4.763433\n',
 '0000000020 118 209 159 222 43.042542\n',
 '0000000020 288 177 406 267 4.722566\n',
 '0000000020 219 201 266 216 39.443771\n',
 '0000000021 226 203 275 227 42.798298\n',
 '0000000021 288 178 438 273 4.426696\n',
 '0000000021 289 183 405 270 4.217901\n',
 '0000000022 732 181 833 347 16.328762\n',
 '0000000022 243 202 294 224 34.429169\n',
 '0000000022 297 178 406 266 5.935854\n',
 '0000000023 524 177 790 226 13.669338\n',
 '0000000024 307 175 407 261 4.879364\n',
 '0000000025 307 175 409 260 8.702667\n']

In [66]:
# delete multiplication
unique_records = {}

# Process the input data
for record in output:
    if record not in unique_records:
        unique_records[record] = True

# Convert the dictionary keys back to a list
unique_records_list = list(unique_records.keys())

# Print the unique records
print(unique_records_list[:20])

['0000000014 280 158 421 259 4.397029\n', '0000000015 280 164 420 259 3.56781\n', '0000000016 280 164 420 259 3.56781\n', '0000000017 783 169 881 332 12.253211\n', '0000000017 284 158 416 256 5.204852\n', '0000000018 286 161 427 257 4.98418\n', '0000000019 207 195 252 215 39.027664\n', '0000000019 285 166 428 263 4.763433\n', '0000000020 118 209 159 222 43.042542\n', '0000000020 288 177 406 267 4.722566\n', '0000000020 219 201 266 216 39.443771\n', '0000000021 226 203 275 227 42.798298\n', '0000000021 288 178 438 273 4.426696\n', '0000000021 289 183 405 270 4.217901\n', '0000000022 732 181 833 347 16.328762\n', '0000000022 243 202 294 224 34.429169\n', '0000000022 297 178 406 266 5.935854\n', '0000000023 524 177 790 226 13.669338\n', '0000000024 307 175 407 261 4.879364\n', '0000000025 307 175 409 260 8.702667\n']


In [67]:
with open(output_path, 'w') as output_f:
    for line in unique_records_list:
        output_f.write(line)

In [85]:
import os

def rename_files_in_folder(folder_path):
    # Get a list of files in the folder
    files = os.listdir(folder_path)
    
    # Sort the files in their original order
    files.sort()
    
    # Iterate through the sorted files and rename them
    for i, filename in enumerate(files):
        # Create the new filename with leading zeros
        new_filename = f"{i:06d}.png"
        
        # Construct the full paths for the old and new filenames
        old_path = os.path.join(folder_path, filename)
        new_path = os.path.join(folder_path, new_filename)
        
        # Rename the file
        os.rename(old_path, new_path)


# Example usage:
folder_path = "/home/brwei01/Data/data_tracking/sequences/0000/image_0"
rename_files(folder_path)


Renamed 154 files in '/home/brwei01/Data/data_tracking/sequences/0000/image_0'.
