# VO Pipeline
Vision Algorithms for Mobile Robotics | Fall 2025 <br>
David Jensen, Alessandro Pirini, Matteo Rubini

## Imports

### Libraries

In [10]:
import os
from glob import glob

import cv2
import skimage

import numpy as np

import matplotlib as plt

### Data
_Ensure that all datasets have been downloaded and unzipped into their respective folders_

In [16]:
# Define dataset paths
# (Set these variables before running)
kitti_path = "kitti/kitti05/kitti"
malaga_path = "malaga/malaga-urban-dataset-extract-07"
parking_path = "parking/parking"
# own_dataset_path = "/path/to/own_dataset"

if DATASET == 0:
    assert 'kitti_path' in locals(), "You must define kitti_path"
    img_dir = os.path.join(kitti_path, '05/image_0')
    images = glob(os.path.join(img_dir, '*.png'))
    last_frame = 4540
    K = np.array([
        [7.18856e+02, 0, 6.071928e+02],
        [0, 7.18856e+02, 1.852157e+02],
        [0, 0, 1]
    ])
    ground_truth = np.loadtxt(os.path.join(kitti_path, 'poses', '05.txt'))
    ground_truth = ground_truth[:, [-9, -1]]  # same as MATLAB(:, [end-8 end])
elif DATASET == 1:
    assert 'malaga_path' in locals(), "You must define malaga_path"
    img_dir = os.path.join(malaga_path, 'malaga-urban-dataset-extract-07_rectified_800x600_Images')
    images = sorted(glob(os.path.join(img_dir, '*.png')))
    last_frame = len(images)
    K = np.array([
        [621.18428, 0, 404.0076],
        [0, 621.18428, 309.05989],
        [0, 0, 1]
    ])
elif DATASET == 2:
    assert 'parking_path' in locals(), "You must define parking_path"
    img_dir = os.path.join(kitti_path, '05/image_0')
    images = glob(os.path.join(img_dir, '*.png'))
    last_frame = 598
    K = np.loadtxt(os.path.join(parking_path, 'K.txt'), delimiter=",", usecols=(0, 1, 2))
    ground_truth = np.loadtxt(os.path.join(parking_path, 'poses.txt'))
    ground_truth = ground_truth[:, [-9, -1]]
elif DATASET == 3:
    # Own Dataset
    # TODO: define your own dataset and load K obtained from calibration of own camera
    assert 'own_dataset_path' in locals(), "You must define own_dataset_path"

else:
    raise ValueError("Invalid dataset index")

## Parameters

### Paramaters for all datasets

In [15]:
# Dataset -> 0: KITTI, 1: Malaga, 2: Parking, 3: Own Dataset
DATASET = 0

# Next keyframe to use for bootstrapping
KITTI_BS_KF = 5
MALAGA_BS_KF = 5
PARKING_BS_KF = 5
CUSTOM_BS_KF = 5

# Number of rows and columns to divide image into for feature detection
KITTI_ST_ROWS, KITTI_ST_COLS = 2, 4
MALAGA_ST_ROWS, MALAGA_ST_COLS = 2, 4
PARKING_ST_ROWS, PARKING_ST_COLS = 2, 4
CUSTOM_ST_ROWS, CUSTOM_ST_COLS = 2, 4

# Paramaters for Shi-Tomasi corners
feature_params = dict( maxCorners = 100,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )

# Parameters for LKT
lk_params = dict( winSize  = (15, 15),
                  maxLevel = 2,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

### Set parameters for specific datasets
_Updates all parameters based on dataset being used_

In [44]:
def get_feature_masks(img_path, rows, cols) -> list[np.ndarray]:
    # get image shape
    img = cv2.imread(img_path)
    H, W = img.shape[:2]

    # get boundries of the cells
    row_boundries = np.linspace(0, H, rows + 1, dtype=int)
    col_boundries = np.linspace(0, W, cols + 1, dtype=int)

    # create masks
    masks = []
    for row in range(rows):
        for col in range(cols):
            mask = np.zeros_like(img, dtype=bool)
            r_s, r_e = row_boundries[[row, row + 1]]
            c_s, c_e = col_boundries[[col, col + 1]]
            mask[r_s:r_e, c_s:c_e] = True
            masks.append(mask)
            
            # visulaization
            # vis = np.zeros_like(img)
            # vis[mask] = img[mask]
            # cv2.imshow("masked", vis)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()

    return masks

In [45]:
bs_kf_1: str # path to first keyframe used for bootstrapping dataset
bs_kf_2: str # path to second keyframe used for bootstrapping dataset
feature_masks: list[np.ndarray] # mask image into regions for feature tracking 

if DATASET == 0:
    assert 'kitti_path' in locals(), "You must define kitti_path"
    bs_kf_1 = images[0]
    bs_kf_2 = images[KITTI_BS_KF]
    feature_masks = get_feature_masks(bs_kf_1, KITTI_ST_ROWS, KITTI_ST_COLS)

elif DATASET == 1:
    assert 'malaga_path' in locals(), "You must define malaga_path"
    bs_kf_1 = images[0]
    bs_kf_2 = images[MALAGA_BS_KF]
    feature_masks = get_feature_masks(bs_kf_1, MALAGA_ST_ROWS, MALAGA_ST_COLS)

elif DATASET == 2:
    assert 'parking_path' in locals(), "You must define parking_path"
    img_dir = os.path.join(kitti_path, '05/image_0')
    images = glob(os.path.join(img_dir, '*.png'))
    bs_kf_1 = images[0]
    bs_kf_2 = images[PARKING_BS_KF]
    feature_masks = get_feature_masks(bs_kf_1, PARKING_ST_ROWS, PARKING_ST_COLS)

elif DATASET == 3:
    # Own Dataset
    # TODO: define your own dataset and load K obtained from calibration of own camera
    assert 'own_dataset_path' in locals(), "You must define own_dataset_path"

else:
    raise ValueError("Invalid dataset index")

# Initialization
- Select two keyframes with large enough baseline
- Use indirect (feature-based) or direct (KLT) method to establish keypoint corrispondences between frames
- Estimate relative pose and triangulate points to bootstrap point cloud (5-pt RANSAC)
- Initialize VO pipeline with inlier keypoints and their associated landmarks

### Corners

In [13]:
# read in images as greyscale
img_bs_kf_1 = cv2.imread(bs_kf_1, 0)
img_bs_kf_2 = cv2.imread(bs_kf_2, 0)

st_corners_kf_1 = cv2.goodFeaturesToTrack(img_bs_kf_1, mask = None, **feature_params)


In [14]:
st_corners_kf_1

array([[[ 703.,  120.]],

       [[ 917.,  102.]],

       [[ 928.,  109.]],

       [[1086.,   79.]],

       [[1081.,   86.]],

       [[ 534.,  177.]],

       [[ 896.,   91.]],

       [[ 509.,  180.]],

       [[ 929.,   96.]],

       [[ 360.,  176.]],

       [[ 726.,  124.]],

       [[ 850.,   29.]],

       [[ 926.,  137.]],

       [[ 917.,   88.]],

       [[ 467.,  173.]],

       [[ 517.,  186.]],

       [[ 359.,  169.]],

       [[ 852.,  107.]],

       [[ 544.,  169.]],

       [[ 458.,  184.]],

       [[ 479.,  179.]],

       [[ 472.,  196.]],

       [[1042.,   92.]],

       [[ 926.,   81.]],

       [[ 871.,  101.]],

       [[ 935.,   76.]],

       [[ 924.,  102.]],

       [[ 856.,   71.]],

       [[ 782.,  119.]],

       [[ 729.,  142.]],

       [[ 896.,  100.]],

       [[ 287.,  189.]],

       [[ 207.,  220.]],

       [[ 852.,  136.]],

       [[1022.,   59.]],

       [[ 928.,  127.]],

       [[ 461.,  200.]],

       [[ 994.,   60.]],

       [[ 91

### Keypoint corrsipondences

In [None]:
# KLT here maybe using itermediate frames
cv2.

# Operation
- Match keypoints in current image to existing landmarks
    - Extract keypoints (Harris)
    - Track (KLT)
- Estimate pose
    - Estimate pose and handle outliers (P3P plus RANSAC)
- Add new landmarks as needed by triangulating new features
    - Keep track of candidate landmarks
        - Keypoint itself
        - Observation when first seen
        - Pose when first seen
    - Only add when they have been tracked for long enough and baselineis large enough
    - Discard if track fails
