Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issues # 1689 and # 1697 #1734

Merged
merged 7 commits into from Jan 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
@@ -1,4 +1,5 @@

* Fix accuracy and latency issues with `FFmpegFrameGrabber.setVideoFrameNumber()` ([pull #1734](https://github.com/bytedeco/javacv/pull/1734))
* Add new `Frame.pictType` field set to `I`, `P`, `B`, etc by `FFmpegFrameGrabber` ([pull #1730](https://github.com/bytedeco/javacv/pull/1730))
* Set metadata for `AVFrame.opaque` in `FFmpegFrameGrabber` with call to `av_frame_copy_props()` ([issue #1729](https://github.com/bytedeco/javacv/issues/1729))
* Add `charset` property to `FrameGrabber` and `FrameRecorder` to use for metadata from FFmpeg ([pull #1720](https://github.com/bytedeco/javacv/pull/1720))
Expand Down
78 changes: 51 additions & 27 deletions src/main/java/org/bytedeco/javacv/FFmpegFrameGrabber.java
Expand Up @@ -385,6 +385,7 @@ static class SeekCallback extends Seek_Pointer_long_int {
private int samples_channels, samples_format, samples_rate;
private boolean frameGrabbed;
private Frame frame;
private int[] streams;

private volatile boolean started = false;

Expand Down Expand Up @@ -605,23 +606,23 @@ public double getVideoFrameRate() {
/** default override of super.setFrameNumber implies setting
* of a frame close to a video frame having that number */
@Override public void setFrameNumber(int frameNumber) throws Exception {
if (hasVideo()) setTimestamp(Math.round(1000000L * frameNumber / getFrameRate()));
if (hasVideo()) setTimestamp((long)Math.floor(1000000L * frameNumber / getFrameRate()));
else super.frameNumber = frameNumber;
}

/** if there is video stream tries to seek to video frame with corresponding timestamp
* otherwise sets super.frameNumber only because frameRate==0 if there is no video stream */
public void setVideoFrameNumber(int frameNumber) throws Exception {
// best guess, AVSEEK_FLAG_FRAME has not been implemented in FFmpeg...
if (hasVideo()) setVideoTimestamp(Math.round(1000000L * frameNumber / getFrameRate()));
if (hasVideo()) setVideoTimestamp((long)Math.floor(1000000L * frameNumber / getFrameRate()));
else super.frameNumber = frameNumber;
}

/** if there is audio stream tries to seek to audio frame with corresponding timestamp
* ignoring otherwise */
public void setAudioFrameNumber(int frameNumber) throws Exception {
// best guess, AVSEEK_FLAG_FRAME has not been implemented in FFmpeg...
if (hasAudio()) setAudioTimestamp(Math.round(1000000L * frameNumber / getAudioFrameRate()));
if (hasAudio()) setAudioTimestamp((long)Math.floor(1000000L * frameNumber / getAudioFrameRate()));

}

Expand Down Expand Up @@ -755,9 +756,14 @@ else if (frameTypesToSeek.contains(Frame.Type.AUDIO)) {
else if (seekFrame.samples != null && samples_frame != null && getSampleRate() > 0) {
frameDuration = AV_TIME_BASE * samples_frame.nb_samples() / (double)getSampleRate();
}
// if(frameDuration>0.0) {
// maxSeekSteps = (long)(10*(timestamp - initialSeekPosition - frameDuration)/frameDuration);
// if (maxSeekSteps<0) maxSeekSteps = 0;
// }
if(frameDuration>0.0) {
maxSeekSteps = (long)(10*(timestamp - initialSeekPosition - frameDuration)/frameDuration);
if (maxSeekSteps<0) maxSeekSteps = 0;
maxSeekSteps = 0; //no more grab if the distance to the requested timestamp is smaller than frameDuration
if (timestamp - initialSeekPosition + 1 > frameDuration) //allow for a rounding error
maxSeekSteps = (long)(10*(timestamp - initialSeekPosition)/frameDuration);
}
else if (initialSeekPosition < timestamp) maxSeekSteps = 1000;

Expand All @@ -768,7 +774,7 @@ else if (seekFrame.samples != null && samples_frame != null && getSampleRate() >
if (seekFrame == null) return; //is it better to throw NullPointerException?

count++;
double ts=this.timestamp;
double ts=seekFrame.timestamp;
frameDuration = 0.0;
if (seekFrame.image != null && this.getFrameRate() > 0)
frameDuration = AV_TIME_BASE / (double)getFrameRate();
Expand Down Expand Up @@ -933,10 +939,12 @@ public synchronized void startUnsafe(boolean findStreamInfo) throws Exception {
video_st = audio_st = null;
AVCodecParameters video_par = null, audio_par = null;
int nb_streams = oc.nb_streams();
streams = new int[nb_streams];
for (int i = 0; i < nb_streams; i++) {
AVStream st = oc.streams(i);
// Get a pointer to the codec context for the video or audio stream
AVCodecParameters par = st.codecpar();
streams[i] = par.codec_type();
if (video_st == null && par.codec_type() == AVMEDIA_TYPE_VIDEO && (videoStream < 0 || videoStream == i)) {
video_st = st;
video_par = par;
Expand Down Expand Up @@ -1294,7 +1302,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do

if (oc == null || oc.isNull()) {
throw new Exception("Could not grab: No AVFormatContext. (Has start() been called?)");
} else if ((!doVideo || video_st == null) && (!doAudio || audio_st == null)) {
} else if ((!doVideo || video_st == null) && (!doAudio || audio_st == null) && !doData) {
return null;
}
if (!started) {
Expand All @@ -1303,19 +1311,8 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do

boolean videoFrameGrabbed = frameGrabbed && frame.image != null;
boolean audioFrameGrabbed = frameGrabbed && frame.samples != null;
boolean dataFrameGrabbed = frameGrabbed && frame.data != null;
frameGrabbed = false;
frame.keyFrame = false;
frame.imageWidth = 0;
frame.imageHeight = 0;
frame.imageDepth = 0;
frame.imageChannels = 0;
frame.imageStride = 0;
frame.image = null;
frame.sampleRate = 0;
frame.audioChannels = 0;
frame.samples = null;
frame.data = null;
frame.opaque = null;
saudet marked this conversation as resolved.
Show resolved Hide resolved
if (doVideo && videoFrameGrabbed) {
if (doProcessing) {
processImage();
Expand All @@ -1328,7 +1325,24 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
}
frame.keyFrame = samples_frame.key_frame() != 0;
return frame;
} else if (doData && dataFrameGrabbed) {
return frame;
}

frame.keyFrame = false;
frame.imageWidth = 0;
frame.imageHeight = 0;
frame.imageDepth = 0;
frame.imageChannels = 0;
frame.imageStride = 0;
frame.image = null;
frame.sampleRate = 0;
frame.audioChannels = 0;
frame.samples = null;
frame.data = null;
frame.opaque = null;
frame.type = null;

boolean done = false;
boolean readPacket = pkt.stream_index() == -1;
while (!done) {
Expand All @@ -1355,7 +1369,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
frame.streamIndex = pkt.stream_index();

// Is this a packet from the video stream?
if (doVideo && video_st != null && pkt.stream_index() == video_st.index()
if (doVideo && video_st != null && frame.streamIndex == video_st.index()
&& (!keyFrames || pkt.flags() == AV_PKT_FLAG_KEY)) {
// Decode video frame
if (readPacket) {
Expand Down Expand Up @@ -1393,7 +1407,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
AVRational time_base = video_st.time_base();
timestamp = 1000000L * pts * time_base.num() / time_base.den();
// best guess, AVCodecContext.frame_number = number of decoded frames...
frameNumber = (int)Math.round(timestamp * getFrameRate() / 1000000L);
frameNumber = (int)Math.floor(timestamp * getFrameRate() / 1000000L);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is floor() better than round()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While fixing #1697 I decided that in all cases when we round some floating results we should simulate result of integer division, so we should round towards zero, like floor does

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean that when we use round(), we might be getting the next frame by mistake, but when we use floor() this doesn't happen, and in the worst case we might be getting the previous frame instead, which is better?

frame.image = image_buf;
if (doProcessing) {
processImage();
Expand All @@ -1404,9 +1418,10 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
frame.timestamp = timestamp;
frame.keyFrame = picture.key_frame() != 0;
frame.pictType = (char)av_get_picture_type_char(picture.pict_type());
frame.type = Frame.Type.VIDEO;
}
}
} else if (doAudio && audio_st != null && pkt.stream_index() == audio_st.index()) {
} else if (doAudio && audio_st != null && frame.streamIndex == audio_st.index()) {
// Decode audio frame
if (readPacket) {
ret = avcodec_send_packet(audio_c, pkt);
Expand Down Expand Up @@ -1440,15 +1455,24 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
done = true;
frame.timestamp = timestamp;
frame.keyFrame = samples_frame.key_frame() != 0;
frame.type = Frame.Type.AUDIO;
}
} else if (doData) {
if (!readPacket) {
readPacket = true;
continue;
}
} else if (readPacket && doData
&& frame.streamIndex > -1 && frame.streamIndex < streams.length
&& streams[frame.streamIndex] != AVMEDIA_TYPE_VIDEO && streams[frame.streamIndex] != AVMEDIA_TYPE_AUDIO) {
// Export the stream byte data for non audio / video frames
frame.data = pkt.data().position(0).capacity(pkt.size()).asByteBuffer();
frame.opaque = pkt;
done = true;
switch (streams[frame.streamIndex]) {
case AVMEDIA_TYPE_DATA: frame.type = Frame.Type.DATA; break;
case AVMEDIA_TYPE_SUBTITLE: frame.type = Frame.Type.SUBTITLE; break;
case AVMEDIA_TYPE_ATTACHMENT: frame.type = Frame.Type.ATTACHMENT; break;
default: frame.type = null;
}
} else {
// Current packet is not needed (different stream index required)
readPacket = true;
}
}
return frame;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/bytedeco/javacv/FFmpegFrameRecorder.java
Expand Up @@ -1302,7 +1302,7 @@ private boolean record(AVFrame frame) throws Exception {

private void writePacket(int mediaType, AVPacket avPacket) throws Exception {

AVStream avStream = (mediaType == AVMEDIA_TYPE_VIDEO) ? audio_st : (mediaType == AVMEDIA_TYPE_AUDIO) ? video_st : null;
AVStream avStream = (mediaType == AVMEDIA_TYPE_VIDEO) ? video_st : (mediaType == AVMEDIA_TYPE_AUDIO) ? audio_st : null;
String mediaTypeStr = (mediaType == AVMEDIA_TYPE_VIDEO) ? "video" : (mediaType == AVMEDIA_TYPE_AUDIO) ? "audio" : "unsupported media stream type";

synchronized (oc) {
Expand Down
11 changes: 9 additions & 2 deletions src/main/java/org/bytedeco/javacv/Frame.java
Expand Up @@ -75,11 +75,13 @@ public class Frame implements AutoCloseable, Indexable {
DEPTH_FLOAT = 32,
DEPTH_DOUBLE = 64;

/** Constants defining data type in the frame*/
/** Constants defining data type in the frame. */
public static enum Type {
VIDEO,
AUDIO,
DATA
DATA,
SUBTITLE,
ATTACHMENT
}

/** Information associated with the {@link #image} field. */
Expand All @@ -104,6 +106,9 @@ public static enum Type {
/** Stream number the audio|video|other data is associated with. */
public int streamIndex;

/** The type of the stream. */
public Type type;

/** The underlying data object, for example, Pointer, AVFrame, IplImage, or Mat. */
public Object opaque;

Expand Down Expand Up @@ -132,6 +137,7 @@ public Frame(int width, int height, int depth, int channels, int imageStride) {
this.image = new Buffer[1];
this.data = null;
this.streamIndex = -1;
this.type = null;

Pointer pointer = new BytePointer(imageHeight * imageStride * pixelSize(depth));
ByteBuffer buffer = pointer.asByteBuffer();
Expand Down Expand Up @@ -222,6 +228,7 @@ public Frame clone() {
newFrame.keyFrame = keyFrame;
newFrame.pictType = pictType;
newFrame.streamIndex = streamIndex;
newFrame.type = type;
newFrame.opaque = new Pointer[3];
if (image != null) {
newFrame.image = new Buffer[image.length];
Expand Down