-
Notifications
You must be signed in to change notification settings - Fork 3
/
video_preprocess.py
78 lines (68 loc) · 2.47 KB
/
video_preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import subprocess
import soundfile as sf
import noisereduce as nr
from glob import glob
import os
from tqdm import tqdm
from multiprocessing import Pool
import sys
def pipeline_resize(v):
dest = v.replace('.mp4', '_resize.mp4')
if os.path.exists(dest) or '_resize.mp4' in v:
return
cmd = f'ffmpeg -v quiet -i {v} -vf scale=640:360 -y {dest}'
print(cmd)
result = subprocess.run(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
result = result.stdout.decode('utf-8')
return dest
def pipeline_reencode_mov(v):
dest = v.replace('.mov', '.mp4')
if os.path.exists(dest) or '.mp4' in v:
return
cmd = f'ffmpeg -v quiet -i {v} -vcodec h264 -acodec aac -y {dest}'
print(cmd)
result = subprocess.run(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
result = result.stdout.decode('utf-8')
return dest
def pipeline_extract_audio(v):
dest = v.replace('.mp4', '.wav')
if os.path.exists(dest) or '.wav' in v:
return
cmd = f'ffmpeg -v quiet -i {v} -f wav -vn -ac 1 -ab 16k -ar 22050 -y {dest}'
result = subprocess.run(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
result = result.stdout.decode('utf-8')
return dest
def pipeline_resample_video(v):
dest = v.replace('.mp4', '_15fps.mp4')
if os.path.exists(dest) or '_15fps.mp4' in v:
return
cmd = f'ffmpeg -v quiet -i {v} -filter:v fps=fps=15 -y {dest}'
result = subprocess.run(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
result = result.stdout.decode('utf-8')
return dest
def pipeline_denoise_audio(a):
dest = a.replace('.wav', '_denoised.wav')
if os.path.exists(dest) or '_denoised.wav' in a:
return
wav, sr = sf.read(a)
if len(wav.shape) == 1:
wav = wav[None, :]
wav_clean = nr.reduce_noise(y=wav, sr=sr, n_fft=1024, hop_length=1024//4)
wav_clean = wav_clean.squeeze()
sf.write(dest, wav_clean, samplerate=sr)
return dest
def video_pre_process(v):
if '.mov' in v:
v = pipeline_reencode_mov(v)
v = pipeline_resize(v)
a = pipeline_extract_audio(v)
v = pipeline_resample_video(v)
a = pipeline_denoise_audio(a)
return v, a
if __name__ == '__main__':
# Define video list for training
video_path = sys.argv[1]
video_list = glob(os.path.join(video_path, '*/*.mp4')) + glob(os.path.join(video_path, '*.mp4'))
print(len(video_list))
with Pool(8) as p:
p.map(video_pre_process, video_list)