Skip to content
Permalink
Browse files

feature(scale_video): scale video use ffmpeg

  • Loading branch information...
numb3r3 committed Aug 23, 2019
1 parent 02f70a0 commit 829d148ca211d6bdee112ea9e22e804ed9e9525a
Showing with 102 additions and 15 deletions.
  1. +2 −3 gnes/preprocessor/io_utils/audio.py
  2. +9 −9 gnes/preprocessor/io_utils/helper.py
  3. +91 −3 gnes/preprocessor/io_utils/video.py
@@ -17,7 +17,6 @@
import re
import ffmpeg
import numpy as np
import subprocess as sp
import soundfile as sf

from typing import List
@@ -45,7 +44,7 @@ def capture_audio(filename: str = 'pipe:',
ac=1,
ar=16000)

stdout, err = stream.run(
stdout, _ = stream.run(
input=video_data, capture_stdout=True, capture_stderr=True)

audio_stream = io.BytesIO(stdout)
@@ -142,7 +141,7 @@ def split_audio(filename: str = 'pipe:',
time = end_time - start_time
stream = ffmpeg.input(filename, ss=start_time, t=time)
stream = stream.output('pipe:', format='wav')
stdout, stderr = stream.run(
stdout, _ = stream.run(
input=video_data, capture_stdout=True, capture_stderr=True)

audio_stream = io.BytesIO(stdout)
@@ -16,15 +16,15 @@
import re


def ffmpeg_probe_pattern():
mediaprobe_re = re.compile(
r"Duration:\s+(?P<dur>(?:(?:\d:?)+[.]?\d*)|N/A)(?:.+start:\s+(?P<start>\d+[.]\d+))?.+bitrate:\s+(?P<bitrate>(?:\d+\s*..[/]s)|N/A)"
)
streamprobe_re = re.compile(
r"\s*Stream.+:\s+Video:.+\s+(?P<res>\d+x\d+)(?:.*,\s*(?P<fps>\d+[.]?\d*)\sfps)?(?:.+\(default\))?"
)
audioprobe_re = re.compile(r"\s*Stream.+:\s+Audio:.*")
fftime_re = re.compile(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)\.(?P<fract>\d+)")
# def ffmpeg_probe_pattern():
# mediaprobe_re = re.compile(
# r"Duration:\s+(?P<dur>(?:(?:\d:?)+[.]?\d*)|N/A)(?:.+start:\s+(?P<start>\d+[.]\d+))?.+bitrate:\s+(?P<bitrate>(?:\d+\s*..[/]s)|N/A)"
# )
# streamprobe_re = re.compile(
# r"\s*Stream.+:\s+Video:.+\s+(?P<res>\d+x\d+)(?:.*,\s*(?P<fps>\d+[.]?\d*)\sfps)?(?:.+\(default\))?"
# )
# audioprobe_re = re.compile(r"\s*Stream.+:\s+Audio:.*")
# fftime_re = re.compile(r"(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)\.(?P<fract>\d+)")


def extract_frame_size(ffmpeg_parse_info: str):
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import ffmpeg
import numpy as np

@@ -22,6 +21,94 @@
from .helper import extract_frame_size


def scale_video(input_filename: str = 'pipe:',
output_filename: str = 'pipe:',
video_data: bytes = None,
start_time: float = None,
end_time: float = None,
scale: str = None,
frame_rate: int = 15,
crf: int = 16,
vcodec: str = 'libx264',
format: str = 'mpeg',
pix_fmt: str = 'yuv420p',
**kwargs):
capture_stdin = (input_filename == 'pipe:')
if capture_stdin and video_data is None:
raise ValueError(
"the buffered video data for stdin should not be empty")

capture_stdout = (output_filename == 'pipe:')

input_kwargs = {}
if start_time is not None:
input_kwargs['ss'] = start_time
else:
start_time = 0.
if end_time is not None:
input_kwargs['t'] = end_time - start_time

stream = ffmpeg.input(input_filename, **input_kwargs)

out_kwargs = {
'vcodec': vcodec,
'pix_fmt': pix_fmt,
'crf': crf,
's': scale,
'framerate': frame_rate
}

if capture_stdout:
out_kwargs['format'] = format

stream = stream.output(output_filename, **out_kwargs).overwrite_output()
stdout, stderr = stream.run(
input=video_data if capture_stdin else None,
capture_stdout=capture_stdout)
if capture_stdout:
return stdout
return None


def encode_video(filename: str,
images: List['np.ndarray'],
frame_rate: int = 15,
vcodec: str = 'libx264',
**kwargs):
packet_size = 4096

n = len(images)
height, width, channels = images[0].shape

capture_stdout = (filename == 'pipe:')
process = ffmpeg.input(
'pipe:',
format='rawvideo',
pix_fmt='rgb24',
s='{}x{}'.format(width, height)).output(
filename, pix_fmt='yuv420p', vcodec=vcodec,
r=frame_rate).overwrite_output().run_async(
pipe_stdin=True, pipe_stdout=capture_stdout)
for frame in images:
process.stdin.write(frame.astype(np.uint8).tobytes())
process.stdin.close()

output = None
if capture_stdout:
stream = io.BytesIO(b'')
while True:
in_bytes = process.stdout.read(packet_size)
if not in_bytes:
process.stdout.close()

break
stream.write(in_bytes)

output = stream.getvalue()
process.wait()
return output


def capture_frames(filename: str = 'pipe:',
video_data: bytes = None,
pix_fmt: str = 'rgb24',
@@ -37,7 +124,7 @@ def capture_frames(filename: str = 'pipe:',

input_kwargs = {
'err_detect': 'aggressive',
'fflags': 'discardcorrupt' # discard corrupted frames
'fflags': 'discardcorrupt' # discard corrupted frames
}
if start_time is not None:
input_kwargs['ss'] = start_time
@@ -66,5 +153,6 @@ def capture_frames(filename: str = 'pipe:',
if pix_fmt == 'rgba':
depth = 4

frames = np.frombuffer(out, np.uint8).reshape([-1, int(height), int(width), depth])
frames = np.frombuffer(out, np.uint8).reshape(
[-1, int(height), int(width), depth])
return list(frames)

0 comments on commit 829d148

Please sign in to comment.
You can’t perform that action at this time.