In [2]:
import numpy as np
from pydub import AudioSegment
import os

In [5]:
def wada_snr(wav):
    # Direct blind estimation of the SNR of a speech signal.
    #
    # Paper on WADA SNR:
    #   http://www.cs.cmu.edu/~robust/Papers/KimSternIS08.pdf
    #
    # This function was adapted from this matlab code:
    #   https://labrosa.ee.columbia.edu/projects/snreval/#9

    # init
    eps = 1e-10
    # next 2 lines define a fancy curve derived from a gamma distribution -- see paper
    db_vals = np.arange(-20, 101)
    g_vals = np.array([0.40974774, 0.40986926, 0.40998566, 0.40969089, 0.40986186, 0.40999006, 0.41027138, 0.41052627, 0.41101024, 0.41143264, 0.41231718, 0.41337272, 0.41526426, 0.4178192 , 0.42077252, 0.42452799, 0.42918886, 0.43510373, 0.44234195, 0.45161485, 0.46221153, 0.47491647, 0.48883809, 0.50509236, 0.52353709, 0.54372088, 0.56532427, 0.58847532, 0.61346212, 0.63954496, 0.66750818, 0.69583724, 0.72454762, 0.75414799, 0.78323148, 0.81240985, 0.84219775, 0.87166406, 0.90030504, 0.92880418, 0.95655449, 0.9835349 , 1.01047155, 1.0362095 , 1.06136425, 1.08579312, 1.1094819 , 1.13277995, 1.15472826, 1.17627308, 1.19703503, 1.21671694, 1.23535898, 1.25364313, 1.27103891, 1.28718029, 1.30302865, 1.31839527, 1.33294817, 1.34700935, 1.3605727 , 1.37345513, 1.38577122, 1.39733504, 1.40856397, 1.41959619, 1.42983624, 1.43958467, 1.44902176, 1.45804831, 1.46669568, 1.47486938, 1.48269965, 1.49034339, 1.49748214, 1.50435106, 1.51076426, 1.51698915, 1.5229097 , 1.528578  , 1.53389835, 1.5391211 , 1.5439065 , 1.54858517, 1.55310776, 1.55744391, 1.56164927, 1.56566348, 1.56938671, 1.57307767, 1.57654764, 1.57980083, 1.58304129, 1.58602496, 1.58880681, 1.59162477, 1.5941969 , 1.59693155, 1.599446  , 1.60185011, 1.60408668, 1.60627134, 1.60826199, 1.61004547, 1.61192472, 1.61369656, 1.61534074, 1.61688905, 1.61838916, 1.61985374, 1.62135878, 1.62268119, 1.62390423, 1.62513143, 1.62632463, 1.6274027 , 1.62842767, 1.62945532, 1.6303307 , 1.63128026, 1.63204102])

    # peak normalize, get magnitude, clip lower bound
    wav = np.array(wav)
    wav = wav / abs(wav).max()
    abs_wav = abs(wav)
    abs_wav[abs_wav < eps] = eps

    # calcuate statistics
    # E[|z|]
    v1 = max(eps, abs_wav.mean())
    # E[log|z|]
    v2 = np.log(abs_wav).mean()
    # log(E[|z|]) - E[log(|z|)]
    v3 = np.log(v1) - v2

    # table interpolation
    wav_snr_idx = None
    if any(g_vals < v3):
        wav_snr_idx = np.where(g_vals < v3)[0].max()
    # handle edge cases or interpolate
    if wav_snr_idx is None:
        wav_snr = db_vals[0]
    elif wav_snr_idx == len(db_vals) - 1:
        wav_snr = db_vals[-1]
    else:
        wav_snr = db_vals[wav_snr_idx] + \
            (v3-g_vals[wav_snr_idx]) / (g_vals[wav_snr_idx+1] - \
            g_vals[wav_snr_idx]) * (db_vals[wav_snr_idx+1] - db_vals[wav_snr_idx])

    # Calculate SNR
    dEng = sum(wav**2)
    dFactor = 10**(wav_snr / 10)
    dNoiseEng = dEng / (1 + dFactor) # Noise energy
    dSigEng = dEng * dFactor / (1 + dFactor) # Signal energy
    snr = 10 * np.log10(dSigEng / dNoiseEng)

    return snr

In [10]:
def load_and_calculate_snr(file_path):
    # Load M4A file
    audio = AudioSegment.from_file(file_path)

    # Convert audio to numpy array
    wav_data = np.array(audio.get_array_of_samples())

    # Calculate SNR using your wada_snr function
    snr = wada_snr(wav_data)

    return snr

In [14]:
print(load_and_calculate_snr('/Users/got/Documents/retune/t+im/sound/001-pg_EeHgj15S.mp3'))

10.767064381807959


In [3]:
def insert_random_noise(input_path, noise_paths, output_path, snr):
    real_audio = AudioSegment.from_file(input_path)

    combined_noise = AudioSegment.silent(duration=len(real_audio))
    for noise_path in noise_paths:
        noise_audio = AudioSegment.from_file(noise_path)
        start_time = np.random.randint(0, len(noise_audio) - len(real_audio))
        noise_segment = noise_audio[start_time:start_time + len(real_audio)]
        # Append the noise to the combined_noise
        combined_noise = combined_noise.overlay(noise_segment)
    # combined_noise.export('/content/noise_1.mp3', format='mp3')
    # Calculate the noise power based on the desired SNR
    signal_power = np.sum(np.abs(np.array(real_audio.get_array_of_samples())))
    noise_power = signal_power / (10 ** (snr / 10))

    # Calculate the power of the selected noise segment
    combined_noise_power = np.sum(np.abs(np.array(combined_noise.get_array_of_samples())))

    # Adjust the amplitude of the noise segment to achieve the desired noise power
    combined_noise = combined_noise - (combined_noise.dBFS - 20 * np.log10(noise_power / combined_noise_power))

    # Add the noise segment to the real audio
    noisy_audio = real_audio.overlay(combined_noise)

    # Export the result to a new audio file
    noisy_audio.export(output_path, format='mp3')  # Preserve the original format

# Example usage
# insert_random_noise("/Users/got/Documents/retune/t+im/sound/17_66.m4a", ["/Users/got/Documents/retune/t+im/sound/high_suction.m4a","/Users/got/Documents/retune/t+im/sound/Suction_wet.mp3","/Users/got/Documents/retune/t+im/sound/Suction.mp3","/Users/got/Documents/retune/t+im/sound/ultrasonic_scaler.m4a"], "/Users/got/Documents/retune/t+im/sound/17_66_n.mp3", snr=9)

In [22]:
# insert_random_noise("/Users/got/Documents/retune/t+im/sound/Ts3-34.m4a", ["/Users/got/Documents/retune/t+im/sound/high_suction.m4a","/Users/got/Documents/retune/t+im/sound/suction_wet.mp3","/Users/got/Documents/retune/t+im/sound/Suction.mp3","/Users/got/Documents/retune/t+im/sound/ultrasonic_scaler.m4a"], "/Users/got/Documents/retune/t+im/sound/TS3-34_n.mp3", snr=9)


In [4]:
folder_path = '/Users/got/Documents/retune/t+im/data_for_AJ'
noise_folder_path = '/Users/got/Documents/retune/t+im/noise'

file_list = os.listdir(folder_path)
noise_list = os.listdir(noise_folder_path)
noise_list = [noise_folder_path +'/' + item for item in noise_list]
snr = 9

i = 1
for file_name in file_list : 
    # file_path = '/Users/got/Documents/retune/t+im/data_for_AJ/1_1.m4a'
    file_path = os.path.join(folder_path, file_name)
    output_path = "/Users/got/Documents/retune/t+im/data_with_noise/" + file_name.split('.')[0] + '_n' + '.mp3'
    insert_random_noise(file_path, noise_list, output_path, snr)
    print(i)
    i = i+1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105


KeyboardInterrupt: 

In [None]:
from pydub import AudioSegment
import os
import numpy as np

def insert_random_noise(input_path, noise_paths, output_path, snr):
    real_audio = AudioSegment.from_file(input_path)

    # Randomly select a noise segment and adjust its amplitude based on the desired SNR
    selected_noise = np.random.choice(noise_paths)
    noise_audio = AudioSegment.from_file(selected_noise)
    start_time = np.random.randint(0, len(noise_audio) - len(real_audio))
    noise_segment = noise_audio[start_time:start_time + len(real_audio)]

    signal_power = np.sum(np.abs(np.array(real_audio.get_array_of_samples())))
    noise_power = signal_power / (10 ** (snr / 10))
    noise_segment = noise_segment - (noise_segment.dBFS - 20 * np.log10(noise_power / noise_segment.rms))

    # Add the noise segment to the real audio
    noisy_audio = real_audio.overlay(noise_segment)

    # Export the result to a new audio file
    noisy_audio.export(output_path, format='mp3') # Preserve the original format

def process_files(folder_path, noise_folder_path, snr):
    file_list = os.listdir(folder_path)
    noise_list = os.listdir(noise_folder_path)
    noise_list = [os.path.join(noise_folder_path, item) for item in noise_list]

    for i, file_name in enumerate(file_list):
        file_path = os.path.join(folder_path, file_name)
        output_path = os.path.join(os.path.dirname(folder_path), "data_with_noise", f"{file_name.split('.')[0]}_n.mp3")
        insert_random_noise(file_path, noise_list, output_path, snr)
        print(i+1)

folder_path = '/Users/got/Documents/retune/t+im/data_for_AJ'
noise_folder_path = '/Users/got/Documents/retune/t+im/noise'
snr = 9

process_files(folder_path, noise_folder_path, snr)