In [1]:
import os
os.environ["CHECKPOINTS_PATH"] = "../checkpoints"

import dataclasses
from src.api.services import sam2_service
import src.config
import torch
import gc
from torch.profiler import profile, record_function, ProfilerActivity

# Measure VRAM requirements of models

In [14]:
checkpoints = src.config.Sam2Checkpoints()
for name, path in dataclasses.asdict(checkpoints).items():
    print(f"Loading {name} from {path}")
    with profile(activities=[ProfilerActivity.CUDA], profile_memory=True) as prof:
        with record_function(name):
            predictor = sam2_service.load_predictor(path)
            torch.cuda.synchronize()

    print(prof.key_averages())

    del predictor
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()


Loading BASE_PLUS from ../checkpoints/sam2.1_hiera_base_plus.pt




-------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 [memory]         0.00%       0.000us         0.00%       0.000us       0.000us           0 b           0 b     450.47 Mb     450.47 Mb          3899  
    cudaStreamIsCapturing         0.02%      47.176us         0.02%      47.176us       1.123us           0 b           0 b           0 b           0 b            42  
               cudaMalloc         6.19%      12.943ms         6.19%      12.943ms     308.155us           0 b           0 b           0 b           0 b        



-------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 [memory]         0.00%       0.000us         0.00%       0.000us       0.000us           0 b           0 b    1000.91 Mb    1000.91 Mb          5339  
    cudaStreamIsCapturing         0.01%      60.686us         0.01%      60.686us       0.934us           0 b           0 b           0 b           0 b            65  
               cudaMalloc         3.93%      17.778ms         3.93%      17.778ms     273.507us           0 b           0 b           0 b           0 b        



-------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 [memory]         0.00%       0.000us         0.00%       0.000us       0.000us           0 b           0 b     315.29 Mb     315.29 Mb          3419  
    cudaStreamIsCapturing         0.06%      32.377us         0.06%      32.377us       1.116us           0 b           0 b           0 b           0 b            29  
               cudaMalloc        15.73%       8.505ms        15.73%       8.505ms     293.274us           0 b           0 b           0 b           0 b        



-------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 [memory]         0.00%       0.000us         0.00%       0.000us       0.000us           0 b           0 b     288.71 Mb     288.71 Mb          3179  
    cudaStreamIsCapturing         0.06%      24.917us         0.06%      24.917us       0.923us           0 b           0 b           0 b           0 b            27  
               cudaMalloc        15.24%       6.659ms        15.24%       6.659ms     246.618us           0 b           0 b           0 b           0 b        

# Measure VRAM usage and tracking speed

In [2]:
from src.api.services import labeling_service
from src.api.db import Session, engine
from src.api.repositories import annotations_repo, simrooms_repo
import tempfile
from pathlib import Path
from src.api.models.pydantic import AnnotationDTO
import time

In [3]:
profiling_results = {}

with Session(engine) as session:
    calibration_id = 3
    cal_rec = simrooms_repo.get_calibration_recording(
        db=session,
        calibration_id=calibration_id
    )
    recording_id = cal_rec.recording.id
    frames_path = Path("data/recording_frames") / recording_id
    frame_count = len(list(frames_path.glob("*.jpg")))
    classes = simrooms_repo.get_simroom_classes(
        db=session,
        simroom_id=1 # Controlled Experiment Room
    )
    classes = classes[:3] # We don't need to track all classes for the profiling

    for c in classes:
        class_id = c.id

        annotations = annotations_repo.get_annotations_by_class_id(
            db=session,
            calibration_id=calibration_id,
            class_id=class_id
        )
        annotations = [annotations[0]]

        results_path = Path(tempfile.gettempdir())
        tracking_job = labeling_service.TrackingJob(
            annotations=[AnnotationDTO.from_orm(a) for a in annotations],
            frames_path=frames_path,
            results_path=results_path,
            frame_count=frame_count,
            class_id=class_id,
            remove_previous_results=False
        )

        print(f"frames_path: {frames_path}")
        print(f"results_path: {results_path}")
        print(f"Tracking {class_id} with {len(annotations)} annotations")

        start_time = time.time()
        with profile(activities=[ProfilerActivity.CUDA], profile_memory=True) as prof:
            with record_function(f"Tracking {class_id}"):
                total_frames_tracked = tracking_job.run()
        end_time = time.time()

        profiling_results[class_id] = {}
        event_averages = prof.key_averages()
        found_event = False
        for event_avg in event_averages:
            if event_avg.key == f"Tracking {class_id}":
                found_event = True
                # self_cuda_memory_usage: Net CUDA memory change by this event itself (allocations - deallocations)
                profiling_results[class_id]['self_cuda_memory_usage_bytes'] = event_avg.self_cuda_memory_usage
                profiling_results[class_id]['self_cuda_memory_usage_mb'] = event_avg.self_cuda_memory_usage / (1024 * 1024)

        profiling_results[class_id]['total_time'] = end_time - start_time
        profiling_results[class_id]['total_frames_tracked'] = total_frames_tracked
        
        print(profiling_results[class_id])

frames_path: data/recording_frames/67b71a70-da64-467a-9fb6-91bc29265fd1
results_path: /tmp
Tracking 1 with 1 annotations
../checkpoints/sam2.1_hiera_large.pt sam2.1_hiera_l.yaml


propagate in video: 100%|██████████| 10/10 [00:00<00:00, 11.98it/s]
propagate in video:  17%|█▋        | 349/2055 [00:27<02:14, 12.67it/s]


{'total_time': 36.21030044555664, 'total_frames_tracked': 350}
frames_path: data/recording_frames/67b71a70-da64-467a-9fb6-91bc29265fd1
results_path: /tmp
Tracking 2 with 1 annotations
../checkpoints/sam2.1_hiera_large.pt sam2.1_hiera_l.yaml


propagate in video: 100%|██████████| 189/189 [00:15<00:00, 12.02it/s]
propagate in video:   2%|▏         | 46/1876 [00:03<02:27, 12.40it/s]


KeyboardInterrupt: 

In [None]:
inference_speeds = []
for class_id, result in profiling_results.items():
    total_time = result['total_time']
    total_frames_tracked = result['total_frames_tracked']
    inference_speed = total_frames_tracked / total_time
    inference_speeds.append(inference_speed)

print(f"Average inference speed: {sum(inference_speeds) / len(inference_speeds)} frames per second")
