In [None]:
#imports
import ffmpeg, numpy as np, matplotlib.pyplot as plt
import warnings
import scipy.optimize
warnings.simplefilter("ignore", DeprecationWarning)

In [None]:
#read in audio from file
def readAudio(filename):
    try:
        input_audio, err = (ffmpeg
                    .input(filename)
                    .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar='48k')
                    .overwrite_output()
                    .run(capture_stdout=True, capture_stderr=True)
                    )
    except ffmpeg.Error as e:
        print(e.stderr)
    read_audio = np.fromstring(input_audio, dtype=np.int16).astype(np.float16)
    return read_audio

#read in audio from file but add eq
def readAudioWithEQ(filename):
    try:
        input_audio, err = (ffmpeg
                    .input(filename)
                    .filter("equalizer", f=1000, t='q', w=100, g=10)
                    .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar='48k')
                    .overwrite_output()
                    .run(capture_stdout=True, capture_stderr=True)
                    )
    except ffmpeg.Error as e:
        print(e.stderr)
    read_audio = np.fromstring(input_audio, dtype=np.int16).astype(np.float16)
    return read_audio

In [None]:
def compute_distance(audio_1, audio_2):
    f_1, t_1, spec_1 = scipy.signal.spectrogram(audio_1, fs=48000)
    f_2, t_2, spec_2 = scipy.signal.spectrogram(audio_2, fs=48000)
    return np.linalg.norm(spec_1[:-1] - spec_2[:-1])

def apply_effect(params, clean_audio):
    r_gain = params[0]
    r_freq = 1000
    r_width = 100
    x = np.round(clean_audio).astype('int16')
    new_audio = None
    try:
        #define graph
        process_audio = (ffmpeg
            .input('pipe:', format='s16le', acodec='pcm_s16le', ac=1, ar='48k')
            .filter("equalizer", f=r_freq, t='q', w=r_width, g=r_gain)
            .output('pipe:', format='s16le', acodec='pcm_s16le', ac=1, ar='48k')
            .run_async(pipe_stdin=True, pipe_stdout=True)
        )
        #pipe in the audio
        process_audio.stdin.write(
            clean_audio
            .astype(np.int16)
            .tobytes()
        )
        process_audio.stdin.close()
        signal = process_audio.stdout.read()
        new_audio = np.fromstring(signal, dtype=np.int16).astype(np.float16)
        process_audio.stdout.close()
    except ffmpeg.Error as e:
        print(e.stderr)
        sys.exit(1)
    return new_audio

def func(params, target_audio, clean_audio):
    new_audio = apply_effect(params, clean_audio)
    #return distance
    return compute_distance(new_audio, target_audio)

#args = [target_audio, clean_audio]

In [None]:
target_audio = readAudioWithEQ("../recordings/sample_ffmpeg.wav")
clean_audio = readAudio("../recordings/sample_ffmpeg.wav")
target_audio = target_audio[96000:144000]
clean_audio = clean_audio[96000:144000]

#params = [gain]
params = [0.7]
result = scipy.optimize.minimize(func, params,
                                 args=(target_audio, clean_audio),
                                 method='nelder-mead',
                                 options={'disp':True})

In [None]:
result.x

In [None]:
compute_distance(clean_audio[96000:144000], target_audio[96000:144000])

In [None]:
np.shape(clean_audio)