In [1]:
import cv2 as cv
import numpy as np

In [6]:
class SimpleVisualOdometry:
    def __init__(self, camera_matrix):
        self.K = camera_matrix
        self.prev_frame = None
        self.prev_points = None
        # Identity matrix for initial rotation, zero vector for translation
        self.cur_R = np.eye(3)
        self.cur_t = np.zeros((3, 1))
        
        # Feature detector (ORB is great for toy examples)
        self.detector = cv.FastFeatureDetector_create(threshold=25, nonmaxSuppression=True)

    def process_frame(self, frame, frame_id):
        # 1. Convert to grayscale
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)

        if frame_id == 0:
            # First frame: just detect points
            self.prev_points = self.detector.detect(gray)
            self.prev_points = np.array([x.pt for x in self.prev_points], dtype=np.float32)
        else:
            # 2. Track features from previous frame to current
            # Using Lucas-Kanade Optical Flow
            curr_points, status, err = cv.calcOpticalFlowPyrLK(self.prev_frame, gray, self.prev_points, None)
            
            # Filter out points where tracking failed
            good_prev = self.prev_points[status.reshape(-1) == 1]
            good_curr = curr_points[status.reshape(-1) == 1]

            # 3. Estimate Motion (The Core Step)
            # Find Essential Matrix using RANSAC
            E, mask = cv.findEssentialMat(good_curr, good_prev, self.K, method=cv.RANSAC, prob=0.999, threshold=1.0)
            
            # 4. Recover Pose (Decompose E into R and t)
            _, R, t, mask = cv.recoverPose(E, good_curr, good_prev, self.K)

            # 5. Update Trajectory (Simplified: assuming unit scale = 1)
            # In real Monocular VO, you need a way to estimate 'absolute_scale'
            absolute_scale = 1.0 
            self.cur_t = self.cur_t + absolute_scale * self.cur_R.dot(t)
            self.cur_R = R.dot(self.cur_R)

            # Prepare for next frame
            # If tracking points drop too low, re-detect features
            if len(good_curr) < 100:
                new_points = self.detector.detect(gray)
                self.prev_points = np.array([x.pt for x in new_points], dtype=np.float32)
            else:
                self.prev_points = good_curr.reshape(-1, 2)

        self.prev_frame = gray
        return self.cur_t

In [3]:
# Dummy Camera Matrix (Focal length = 718, Optical Center = 607, 185)
K = np.array([[718.8, 0, 607.1],
              [0, 718.8, 185.2],
              [0, 0, 1]])

In [7]:
vo = SimpleVisualOdometry(K)
cap = cv.VideoCapture(r'D:\Python things\middle-ml-cv-roadmap\data\raw\video_2025-12-16_03-45-11.mp4')

frame_id = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret: break
    
    pos = vo.process_frame(frame, frame_id)
    print(f"Frame {frame_id}: x={pos[0][0]:.2f}, y={pos[1][0]:.2f}, z={pos[2][0]:.2f}")
    
    frame_id += 1

Frame 0: x=0.00, y=0.00, z=0.00
Frame 1: x=0.47, y=-0.03, z=0.88
Frame 2: x=0.75, y=-0.37, z=1.78
Frame 3: x=0.99, y=-1.34, z=1.78
Frame 4: x=1.72, y=-1.21, z=2.44
Frame 5: x=2.43, y=-0.56, z=2.73
Frame 6: x=2.30, y=-0.51, z=3.72
Frame 7: x=2.10, y=-0.57, z=2.74
Frame 8: x=2.51, y=-0.93, z=3.58
Frame 9: x=3.07, y=-1.55, z=4.12
Frame 10: x=2.91, y=-1.14, z=3.22
Frame 11: x=3.61, y=-1.75, z=3.59
Frame 12: x=4.57, y=-1.82, z=3.32
Frame 13: x=4.17, y=-1.16, z=3.96
Frame 14: x=4.78, y=-1.82, z=3.51
Frame 15: x=5.40, y=-1.23, z=4.04
Frame 16: x=5.56, y=-0.42, z=4.60
Frame 17: x=5.63, y=-0.34, z=5.59
Frame 18: x=6.26, y=0.39, z=5.86
Frame 19: x=5.91, y=0.74, z=6.73
Frame 20: x=6.81, y=0.72, z=7.17
Frame 21: x=7.01, y=0.73, z=6.20
Frame 22: x=7.27, y=1.19, z=5.35
Frame 23: x=6.97, y=0.94, z=6.27
Frame 24: x=6.25, y=1.55, z=5.95
Frame 25: x=7.12, y=1.37, z=6.41
Frame 26: x=6.32, y=1.59, z=5.85
Frame 27: x=5.44, y=1.95, z=6.15
Frame 28: x=5.51, y=1.08, z=6.63
Frame 29: x=6.07, y=1.87, z=6.89
Fra