diff --git a/README.md b/README.md
index ae4b250..4453981 100644
--- a/README.md
+++ b/README.md
@@ -51,15 +51,39 @@ cd viteo
pip install -v -e .
```
-#### Rebuilding
-
+To rebuild after changes:
```bash
-rm -rf dist/
pip install -e . --force-reinstall --no-deps
```
-### Requirements
+## Configuration
+### Logging
+
+You can enable debug logging with the `VITEO_DEBUG` environment variable:
+```bash
+$ VITEO_DEBUG=1 python example.py video_1080p.mp4
+
+[viteo] Closed video resources
+[viteo] Loaded asset from: tests/test-data/video_1080p.mp4
+[viteo] Found 1 video track(s)
+[viteo] Video metadata: 1920x1080 @ 23.976 fps, 267 total frames
+[viteo] Allocated batch buffer for 16 frames
+[viteo] Created track output with hardware acceleration
+[viteo] Reader initialized successfully
+ ...
+```
+
+### Batch size
+
+Internally, `viteo` passes frames to Python in batches for performance.
+The default batch size is 8 frames, but you can change it by passing the `batch_size` argument:
+```python
+# Values between 2 and 16 are optimal
+with viteo.open("video.mp4", batch_size=2) as frames:
+ for frame in frames:
+ process(frame)
+```
+
+## Performance
-- macOS with Apple Silicon (M1/M2/M3/M4)
-- Python 3.8+
-- MLX framework
+
diff --git a/src/native/include/frame_extractor.h b/src/native/include/frame_extractor.h
index a2d324b..c55ba80 100644
--- a/src/native/include/frame_extractor.h
+++ b/src/native/include/frame_extractor.h
@@ -10,7 +10,7 @@ namespace viteo {
/// High-performance video frame extractor for Apple Silicon
class FrameExtractor {
public:
- FrameExtractor();
+ FrameExtractor(size_t batch_size = 8);
~FrameExtractor();
/// Open video file for extraction
diff --git a/src/native/src/bindings.cpp b/src/native/src/bindings.cpp
index 8d2437c..cf8641f 100644
--- a/src/native/src/bindings.cpp
+++ b/src/native/src/bindings.cpp
@@ -30,7 +30,7 @@ NB_MODULE(_viteo, m) {
m.doc() = "Hardware-accelerated video frame extraction for Apple Silicon";
nb::class_(m, "FrameExtractor")
- .def(nb::init<>(), "Create new frame extractor")
+ .def(nb::init(), nb::arg("batch_size") = 8, "Create new frame extractor")
.def("open", &FrameExtractor::open, nb::arg("path"),
"Open video file for extraction")
.def("next_frame",
@@ -40,6 +40,7 @@ NB_MODULE(_viteo, m) {
nb::gil_scoped_release release;
frame_data = self.next_frame();
}
+ if (!frame_data) return nb::none();
return create_mlx_array(frame_data, self.height(), self.width());
},
"Get next frame as MLX array (None when done)")
diff --git a/src/native/src/frame_extractor.mm b/src/native/src/frame_extractor.mm
index 1754a5b..44d464f 100644
--- a/src/native/src/frame_extractor.mm
+++ b/src/native/src/frame_extractor.mm
@@ -3,6 +3,14 @@
#import
#import
#include "frame_extractor.h"
+#include
+#include
+
+#define DEBUG_LOG(msg) do { \
+ if (debugLogging) { \
+ std::cerr << "[viteo] " << msg << std::endl; \
+ } \
+} while(0)
namespace viteo {
@@ -21,20 +29,27 @@
int64_t currentFrame = 0;
// Internal batch buffer for performance
- static constexpr size_t BATCH_SIZE = 16;
+ size_t batch_size;
std::vector batch_buffer;
size_t batch_count = 0;
size_t batch_index = 0;
bool isOpen = false;
+ bool debugLogging = false;
- Impl() {}
+ Impl(size_t batch_size_param) : batch_size(batch_size_param) {
+ if (std::getenv("VITEO_DEBUG")) {
+ debugLogging = true;
+ }
+ DEBUG_LOG("Setting batch size to " << batch_size);
+ }
~Impl() {
close();
// ARC handles cleanup automatically
}
+ /// Releases all resources and resets state
void close() {
@autoreleasepool {
if (reader) {
@@ -47,46 +62,116 @@ void close() {
isOpen = false;
currentFrame = 0;
}
+ DEBUG_LOG("Closed video resources");
}
- bool open(const std::string& path) {
- close();
+ /// Loads asset from file path
+ AVAsset* loadAsset(const std::string& path) {
+ NSString* nsPath = [NSString stringWithUTF8String:path.c_str()];
+ NSURL* url = [NSURL fileURLWithPath:nsPath];
+ AVAsset* loadedAsset = [AVAsset assetWithURL:url];
- @autoreleasepool {
- NSString* nsPath = [NSString stringWithUTF8String:path.c_str()];
- NSURL* url = [NSURL fileURLWithPath:nsPath];
+ if (loadedAsset) {
+ DEBUG_LOG("Loaded asset from: " << path);
+ } else {
+ DEBUG_LOG("Failed to load asset from: " << path);
+ }
- asset = [AVAsset assetWithURL:url];
- if (!asset) return false;
+ return loadedAsset;
+ }
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wdeprecated-declarations"
- NSArray* tracks = [asset tracksWithMediaType:AVMediaTypeVideo];
- #pragma clang diagnostic pop
+ /// Extracts video track from asset
+ AVAssetTrack* extractVideoTrack(AVAsset* videoAsset) {
+ #pragma clang diagnostic push
+ #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+ NSArray* tracks = [videoAsset tracksWithMediaType:AVMediaTypeVideo];
+ #pragma clang diagnostic pop
- if (tracks.count == 0) return false;
+ if (tracks.count == 0) {
+ DEBUG_LOG("No video tracks found");
+ return nil;
+ }
- videoTrack = tracks[0];
+ DEBUG_LOG("Found " << tracks.count << " video track(s)");
+ return tracks[0];
+ }
- CGSize size = [videoTrack naturalSize];
- cachedWidth = static_cast(size.width);
- cachedHeight = static_cast(size.height);
- cachedFPS = [videoTrack nominalFrameRate];
+ /// Caches video metadata from track
+ void cacheMetadata(AVAssetTrack* track, AVAsset* videoAsset) {
+ CGSize size = [track naturalSize];
+ cachedWidth = static_cast(size.width);
+ cachedHeight = static_cast(size.height);
+ cachedFPS = [track nominalFrameRate];
+
+ CMTime duration = [videoAsset duration];
+ cachedTotalFrames = static_cast(
+ CMTimeGetSeconds(duration) * cachedFPS
+ );
+
+ DEBUG_LOG("Video metadata: " << cachedWidth << "x" << cachedHeight
+ << " @ " << cachedFPS << " fps, "
+ << cachedTotalFrames << " total frames");
+ }
- CMTime duration = [asset duration];
- cachedTotalFrames = static_cast(
- CMTimeGetSeconds(duration) * cachedFPS
- );
+ /// Opens video file and initializes extraction
+ bool open(const std::string& path) {
+ close();
+
+ @autoreleasepool {
+ asset = loadAsset(path);
+ if (!asset) return false;
+
+ videoTrack = extractVideoTrack(asset);
+ if (!videoTrack) return false;
+
+ cacheMetadata(videoTrack, asset);
// Allocate batch buffer
size_t frame_size = cachedWidth * cachedHeight * 4;
- batch_buffer.resize(BATCH_SIZE * frame_size);
+ batch_buffer.resize(batch_size * frame_size);
+ DEBUG_LOG("Allocated batch buffer for " << batch_size << " frames");
isOpen = true;
return setupReader(0);
}
}
+ /// Creates output settings dictionary for hardware accelerated decoding
+ NSDictionary* createOutputSettings() {
+ return @{
+ (id)kCVPixelBufferPixelFormatTypeKey: @(kCVPixelFormatType_32BGRA),
+ (id)kCVPixelBufferMetalCompatibilityKey: @YES,
+ (id)kCVPixelBufferIOSurfacePropertiesKey: @{},
+ AVVideoDecompressionPropertiesKey: @{
+ (id)kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder: @YES,
+ (id)kVTDecompressionPropertyKey_PropagatePerFrameHDRDisplayMetadata: @NO,
+ },
+ };
+ }
+
+ /// Configures track output for optimal performance
+ AVAssetReaderTrackOutput* createTrackOutput(AVAssetTrack* track, NSDictionary* settings) {
+ AVAssetReaderTrackOutput* trackOutput = [[AVAssetReaderTrackOutput alloc]
+ initWithTrack:track outputSettings:settings];
+
+ trackOutput.alwaysCopiesSampleData = NO;
+ trackOutput.supportsRandomAccess = YES;
+
+ DEBUG_LOG("Created track output with hardware acceleration");
+ return trackOutput;
+ }
+
+ /// Applies time range for seeking to specific frame
+ void applyTimeRange(AVAssetReader* videoReader, int64_t startFrame) {
+ if (startFrame > 0) {
+ CMTime startTime = CMTimeMake(startFrame, cachedFPS);
+ CMTime duration = CMTimeSubtract([asset duration], startTime);
+ videoReader.timeRange = CMTimeRangeMake(startTime, duration);
+ DEBUG_LOG("Seeking to frame " << startFrame);
+ }
+ }
+
+ /// Initializes reader for frame extraction
bool setupReader(int64_t startFrame) {
@autoreleasepool {
if (reader) {
@@ -97,43 +182,26 @@ bool setupReader(int64_t startFrame) {
NSError* error = nil;
reader = [[AVAssetReader alloc] initWithAsset:asset error:&error];
- if (error || !reader) return false;
-
- // Configure for maximum performance with BGRA output
- NSDictionary* outputSettings = @{
- (id)kCVPixelBufferPixelFormatTypeKey: @(kCVPixelFormatType_32BGRA),
- (id)kCVPixelBufferMetalCompatibilityKey: @YES,
- (id)kCVPixelBufferIOSurfacePropertiesKey: @{},
- // Add VideoToolbox hardware acceleration hints
- AVVideoDecompressionPropertiesKey: @{
- (id)kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder: @YES,
- (id)kVTDecompressionPropertyKey_PropagatePerFrameHDRDisplayMetadata: @NO,
- },
- };
-
- output = [[AVAssetReaderTrackOutput alloc]
- initWithTrack:videoTrack outputSettings:outputSettings];
-
- // Critical performance settings
- output.alwaysCopiesSampleData = NO; // Avoid unnecessary copies
- output.supportsRandomAccess = YES; // Enable seeking
+ if (error || !reader) {
+ DEBUG_LOG("Failed to create reader: " << (error ? [[error localizedDescription] UTF8String] : "unknown error"));
+ return false;
+ }
+
+ NSDictionary* outputSettings = createOutputSettings();
+ output = createTrackOutput(videoTrack, outputSettings);
if (![reader canAddOutput:output]) {
+ DEBUG_LOG("Cannot add output to reader");
reader = nil;
output = nil;
return false;
}
[reader addOutput:output];
-
- // Set time range if seeking
- if (startFrame > 0) {
- CMTime startTime = CMTimeMake(startFrame, cachedFPS);
- CMTime duration = CMTimeSubtract([asset duration], startTime);
- reader.timeRange = CMTimeRangeMake(startTime, duration);
- }
+ applyTimeRange(reader, startFrame);
if (![reader startReading]) {
+ DEBUG_LOG("Failed to start reading");
reader = nil;
output = nil;
return false;
@@ -142,10 +210,46 @@ bool setupReader(int64_t startFrame) {
currentFrame = startFrame;
batch_count = 0;
batch_index = 0;
+ DEBUG_LOG("Reader initialized successfully");
return true;
}
}
+ /// Copies frame from pixel buffer to destination
+ void copyFrameData(CVImageBufferRef imageBuffer, uint8_t* dst) {
+ uint8_t* src = (uint8_t*)CVPixelBufferGetBaseAddress(imageBuffer);
+ size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
+ size_t data_width = cachedWidth * 4;
+ size_t data_size = cachedHeight * data_width;
+
+ if (bytesPerRow == data_width) {
+ memcpy(dst, src, data_size);
+ } else {
+ for (int y = 0; y < cachedHeight; y++) {
+ memcpy(dst + y * data_width,
+ src + y * bytesPerRow,
+ data_width);
+ }
+ }
+ }
+
+ /// Processes single sample buffer and adds to batch
+ bool processSampleBuffer(CMSampleBufferRef sampleBuffer, size_t frame_size) {
+ CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
+ if (!imageBuffer) return false;
+
+ CVPixelBufferLockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
+
+ uint8_t* dst = batch_buffer.data() + (batch_count * frame_size);
+ copyFrameData(imageBuffer, dst);
+
+ CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
+ batch_count++;
+ currentFrame++;
+
+ return true;
+ }
+
/// Load next batch of frames into internal buffer
void loadBatch() {
if (!reader || !output || !isOpen) {
@@ -157,51 +261,39 @@ void loadBatch() {
batch_count = 0;
@autoreleasepool {
- while (batch_count < BATCH_SIZE) {
- if (reader.status != AVAssetReaderStatusReading) break;
+ while (batch_count < batch_size) {
+ if (reader.status != AVAssetReaderStatusReading) {
+ DEBUG_LOG("Reader stopped, loaded " << batch_count << " frames");
+ break;
+ }
CMSampleBufferRef sampleBuffer = [output copyNextSampleBuffer];
- if (!sampleBuffer) break;
-
- CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
- if (imageBuffer) {
- CVPixelBufferLockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
-
- uint8_t* src = (uint8_t*)CVPixelBufferGetBaseAddress(imageBuffer);
- size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
- uint8_t* dst = batch_buffer.data() + (batch_count * frame_size);
-
- if (bytesPerRow == cachedWidth * 4) {
- memcpy(dst, src, frame_size);
- } else {
- size_t copy_width = cachedWidth * 4;
- for (int y = 0; y < cachedHeight; y++) {
- memcpy(dst + y * copy_width,
- src + y * bytesPerRow,
- copy_width);
- }
- }
-
- CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
- batch_count++;
- currentFrame++;
+ if (!sampleBuffer) {
+ DEBUG_LOG("No more sample buffers, loaded " << batch_count << " frames");
+ break;
}
+ processSampleBuffer(sampleBuffer, frame_size);
CFRelease(sampleBuffer);
}
}
batch_index = 0;
+ if (batch_count > 0) {
+ DEBUG_LOG("Loaded batch of " << batch_count << " frames");
+ }
}
- /// Get pointer to next frame from batch
+ /// Returns pointer to next frame from batch
uint8_t* nextFrame() {
if (!isOpen) return nullptr;
- // Load new batch if needed
if (batch_index >= batch_count) {
loadBatch();
- if (batch_count == 0) return nullptr;
+ if (batch_count == 0) {
+ DEBUG_LOG("No more frames available");
+ return nullptr;
+ }
}
size_t frame_size = cachedWidth * cachedHeight * 4;
@@ -210,14 +302,16 @@ void loadBatch() {
return frame_ptr;
}
+ /// Resets reader to specified frame index
void reset(int64_t frameIndex) {
if (!isOpen) return;
+ DEBUG_LOG("Resetting to frame " << frameIndex);
setupReader(frameIndex);
}
};
// Public interface implementation
-FrameExtractor::FrameExtractor() : impl(new Impl()) {}
+FrameExtractor::FrameExtractor(size_t batch_size_param) : impl(new Impl(batch_size_param)) {}
FrameExtractor::~FrameExtractor() { delete impl; }
bool FrameExtractor::open(const std::string& path) {
diff --git a/src/viteo/__init__.py b/src/viteo/__init__.py
index 85f6c7d..1777c2a 100644
--- a/src/viteo/__init__.py
+++ b/src/viteo/__init__.py
@@ -26,9 +26,16 @@
class FrameExtractor(_FrameExtractor):
"""Hardware-accelerated video frame extractor for Apple Silicon."""
- def __init__(self, path: Optional[str | pathlib.Path] = None):
- """Initialize extractor and optionally open a video file."""
- super().__init__()
+ def __init__(self, path: Optional[str | pathlib.Path] = None, batch_size: int = 8):
+ """
+ Initialize extractor and optionally open a video file.
+
+ Args:
+ path: Optional path to video file
+ batch_size: Number of frames to buffer internally (default: 8)
+ """
+ super().__init__(batch_size)
+ self.batch_size = batch_size
if path:
if not super().open(str(path)):
raise RuntimeError(f"Failed to open video: {path}")
@@ -40,19 +47,20 @@ def __exit__(self, *args):
pass
-def open(path: str | pathlib.Path) -> FrameExtractor:
+def open(path: str | pathlib.Path, batch_size: int = 8) -> FrameExtractor:
"""
Open a video file for frame extraction.
Args:
path: Path to video file
+ batch_size: Number of frames to buffer internally (default: 8)
Returns:
FrameExtractor configured for iteration
Example:
- with viteo.open("video.mp4") as frames:
+ with viteo.open("video.mp4", batch_size=16) as frames:
for frame in frames:
process_frame(frame)
"""
- return FrameExtractor(path)
+ return FrameExtractor(path, batch_size=batch_size)
diff --git a/tests/test_viteo.py b/tests/test_viteo.py
index 92c0b7c..2e98ff6 100644
--- a/tests/test_viteo.py
+++ b/tests/test_viteo.py
@@ -123,6 +123,39 @@ def test_iterator(sample_video):
assert count == 10
+def test_run_to_end(sample_video):
+ """Test running through all frames to the end."""
+ path = sample_video["path"]
+ if not path.exists():
+ pytest.skip(f"Test video not found: {path}")
+
+ with viteo.open(path) as video:
+ frame_count = 0
+ for frame in video:
+ frame_count += 1
+
+ assert frame_count == video.total_frames
+
+
+def test_last_frame_is_none(sample_video):
+ """Test that after all frames are read, the next frame is None and the frame count matches total_frames."""
+ path = sample_video["path"]
+ if not path.exists():
+ pytest.skip(f"Test video not found: {path}")
+
+ i = 0
+ with viteo.open(path) as video:
+ while True:
+ frame = video.next_frame()
+ if not isinstance(frame, mx.array):
+ break
+
+ i += 1
+
+ assert frame is None
+ assert i == video.total_frames
+
+
def test_reset(sample_video):
"""Test reset functionality."""
path = sample_video["path"]