-
Notifications
You must be signed in to change notification settings - Fork 8
/
audio_utils.py
executable file
·80 lines (60 loc) · 2.68 KB
/
audio_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import soundfile as sf
import numpy as np
import resampy
from math import ceil
import librosa
from scipy.signal import stft
import pyloudnorm as pyln
import math
import warnings
# We silence this warning as we peak normalize the samples before bouncing them
warnings.filterwarnings("ignore", message="Possible clipped samples in output.")
def trim_relative_silence_from_audio(audio, sr, frame_duration=0.04,
hop_duration=0.01):
assert 0 < hop_duration <= frame_duration
frame_length = int(frame_duration * sr)
hop_length = int(hop_duration * sr)
_, _, S = stft(audio, nfft=frame_length, noverlap=frame_length-hop_length,
padded=True, nperseg=frame_length, boundary='constant')
rms = librosa.feature.rms(S=S, frame_length=frame_length,
hop_length=hop_length, pad_mode='constant').flatten()
threshold = 0.01 * rms.max()
active_flag = rms >= threshold
active_idxs = active_flag.nonzero()[0]
start_idx = max(int(max(active_idxs[0] - 1, 0) * hop_duration * sr), 0)
end_idx = min(int(ceil(min(active_idxs[-1] + 1,
rms.shape[0]) * hop_duration * sr)),
audio.shape[0])
return start_idx, end_idx
def lufs_norm(data, sr, norm=-6):
block_size = 0.4 if len(data) / sr >= 0.4 else len(data) / sr
# measure the loudness first
meter = pyln.Meter(rate=sr, block_size=block_size)
loudness = meter.integrated_loudness(data)
assert not math.isinf(loudness)
norm_data = pyln.normalize.loudness(data, loudness, norm)
n, d = np.sum(np.array(norm_data)), np.sum(np.array(data))
gain = n/d if d else 0.0
return norm_data, gain
def get_lufs(data, sr):
block_size = 0.4 if len(data) / sr >= 0.4 else len(data) / sr
# measure the loudness first
meter = pyln.Meter(rate=sr, block_size=block_size) # create BS.1770 meter
loudness = meter.integrated_loudness(data)
return loudness
def peak_norm(data, mx):
eps = 1e-10
max_sample = np.max(np.abs(data))
scale_factor = mx / (max_sample + eps)
return data * scale_factor
def gain_to_db(g):
return 20 * np.log10(g)
def db_to_gain(db):
return 10 ** (db / 20.)
def gain_from_combined_db_levels(dbs):
return np.prod([10 ** (db / 20.) for db in dbs])
def validate_audio(d):
assert np.isnan(d).any() == False, "Nan value found in mixture"
assert np.isneginf(d).any() == False, "Neg. Inf value found in mixture"
assert np.isposinf(d).any() == False, "Pos. Inf value found in mixture"
assert np.isinf(d).any() == False, "Inf value found in mixture"