In [24]:
print(actual_data['arm_poses'][:5])

[[ 6.01109624e-01 -5.14435349e-04  3.00669074e-01  2.57284450e-03
  -5.09559317e-03 -8.39427055e-04  9.99983311e-01  9.82457428e+01]
 [ 6.01442456e-01  2.02586729e-04  3.02058846e-01  7.20095867e-03
  -7.30906753e-03  4.24735015e-04  9.99947309e-01  9.82419281e+01]
 [ 6.02292597e-01  2.63463310e-03  3.07810068e-01  1.03582898e-02
  -1.24669932e-02  6.23614946e-03  9.99849200e-01  9.82556610e+01]
 [ 6.04836166e-01  6.26991224e-03  3.19224805e-01  7.51527026e-03
  -1.61945317e-02  1.59481838e-02  9.99713421e-01  9.82518539e+01]
 [ 6.07377887e-01  8.21604952e-03  3.25922847e-01  3.92878894e-03
  -1.82425231e-02  2.29245238e-02  9.99563038e-01  9.82549057e+01]]


In [22]:
# Optional: Inspect the data type and shape
delta = np.diff(actual_data['timestamps'])
print("\nData type:", 1/delta[:10])


Data type: [7.23188281 7.60692082 6.66654587 7.33000821 7.11290796 7.05364858
 8.19826078 7.70585393 7.25562729 7.70581146]


In [12]:
import numpy as np
import cv2
import psutil
import tracemalloc
import os
import gc

def monitor_memory():
    """Returns the current memory usage in MB."""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 * 1024)

def lazy_data_loader(npz_file, video_file):
    """Lazy loader for arm_pose, frame, and timestep."""
    # Load the .npz file with memory mapping
    data = np.load(npz_file, mmap_mode="r")
    arm_poses = data["arm_poses"]
    timesteps = data["timestamps"]

    # Open the video file
    cap = cv2.VideoCapture(video_file)
    if not cap.isOpened():
        raise ValueError("Unable to open video file.")

    # Yield data lazily
    try:
        for i in range(len(arm_poses)):
            ret, frame = cap.read()
            if not ret:
                raise ValueError("Video ended prematurely.")
            
            yield timesteps[i], arm_poses[i], frame

    finally:
        cap.release()

# Example usage
def main(npz_file, video_file):
    datas = []
    loader = lazy_data_loader(npz_file, video_file)
    for i, (timestep, arm_pose, frame) in enumerate(loader):
        
        # Example: Process the frame
        cv2.imshow("Frame", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):  # Press 'q' to quit
            break

        # release the memory
        # it's important for memory release if stored in a list. 
        # if use lazy loaded data with yield, no need for release, as generater do not store data to memory.
        # instead, the data produced by yield is immediately passed to the consumer. 
        # it's up to the caller to store, process and release the memory. 
        # it can be done by set to None, then gc.collect(); 
        # or, by overwriting to the same addr. #
        
        datas.append([timestep, arm_pose, frame])
        datas[i] = None  
        if i % 10 == 0:  # Check memory every 10 iterations
            print(f"Iteration {i}, Memory Usage: {monitor_memory():.2f} MB")
        
        if i % 200 == 0:
            gc.collect()  # Force garbage collection
            print(f"Iteration {i}, After Memory Usage: {monitor_memory():.2f} MB")


    cv2.destroyAllWindows()


# Replace with your actual file paths
folder_path = '/home/spot/docker/spot_optik_ctrl/ros2_ws/recorded_data/20250103_1735934820440384621_Sequence'
actual_pose_name = 'actual_poses.npz'
rgb_video_name = 'rgb_video.avi'
hand_video_name = 'rgb_hand_video.avi'

npz_file_path = os.path.join(folder_path, actual_pose_name)
video_file_path = os.path.join(folder_path, hand_video_name)


Iteration 0, Memory Usage: 161247232.00 MB
before (1018118, 1059054)
after (1016959, 1059054)
Iteration 0, After Memory Usage: 161247232.00 MB
Iteration 10, Memory Usage: 170422272.00 MB
Iteration 20, Memory Usage: 180424704.00 MB
Iteration 30, Memory Usage: 189599744.00 MB
Iteration 40, Memory Usage: 198643712.00 MB
Iteration 50, Memory Usage: 207818752.00 MB
Iteration 60, Memory Usage: 217255936.00 MB
Iteration 70, Memory Usage: 226430976.00 MB
Iteration 80, Memory Usage: 235737088.00 MB
Iteration 90, Memory Usage: 244912128.00 MB
Iteration 100, Memory Usage: 254087168.00 MB
before (93216273, 93258299)
after (93213692, 93258299)
Iteration 100, After Memory Usage: 254087168.00 MB
Iteration 110, Memory Usage: 263262208.00 MB
Iteration 120, Memory Usage: 272568320.00 MB
Iteration 130, Memory Usage: 281743360.00 MB
Iteration 140, Memory Usage: 290918400.00 MB
Iteration 150, Memory Usage: 300224512.00 MB
Iteration 160, Memory Usage: 309399552.00 MB
Iteration 170, Memory Usage: 318574592.0

In [None]:

npz_file_path = os.path.join(folder_path, actual_pose_name)
video_file_path = os.path.join(folder_path, hand_video_name)
datas = []

tracemalloc.start()
loader = lazy_data_loader(npz_file_path, video_file_path)
for i, (timestep, arm_pose, frame) in enumerate(loader):
    
    # Example: Process the frame
    cv2.imshow("Frame", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):  # Press 'q' to quit
        break

    # release the memory
    # it's important for memory release if stored in a list. 
    # if use lazy loaded data with yield, no need for release, as generater do not store data to memory.
    # instead, the data produced by yield is immediately passed to the consumer. 
    # it's up to the caller to store, process and release the memory. 
    # it can be done by set to None, then gc.collect(); 
    # or, by overwriting to the same addr. #
    
    datas.append([timestep, arm_pose, frame])
    # datas[i] = None  
    if i % 10 == 0:  # Check memory every 10 iterations
        print(f"Iteration {i}, Memory Usage: {monitor_memory():.2f} MB")
        # print('monitor', tracemalloc.get_traced_memory())
    
    if i % 100 == 0:
        print('before', tracemalloc.get_traced_memory())
        gc.collect()  # Force garbage collection
        print('after', tracemalloc.get_traced_memory())
        print(f"Iteration {i}, After Memory Usage: {monitor_memory():.2f} MB")


cv2.destroyAllWindows()
tracemalloc.stop()