In [10]:
import sys
import os

# Add the grand-parent directory to the Python path
parent_dir = os.path.abspath('../..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# Import modules
import ddsp_textures.signal_processors.synthesizers
import ddsp_textures.auxiliar.seeds

# Import extra packages
import numpy as np
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
import torch

# Resysnthesis function
def resynthesize(segments, sr, seed, N_filter_bank, param_per_env, N):
    for _ in range(N):
        # segment = segments[np.random.randint(len(segments))]
        segment = segments[_]
        segment = torch.tensor(segment, dtype=torch.float32)
        segment = (segment-torch.mean(segment))/torch.std(segment)
        param_real, param_imag = ddsp_textures.signal_processors.synthesizers.TexEnv_param_extractor(segment, sr, N_filter_bank, param_per_env)
        new_audio              = ddsp_textures.signal_processors.synthesizers.TexEnv(param_real, param_imag, seed)
        display(Audio(data=segment.numpy(), rate=sr))
        display(Audio(data=new_audio.numpy(), rate=sr))

# Resysnthesis function
def resynthesize_and_display(segments, sr, seed, N_filter_bank, frame_size, param_per_env, label, N):
    for _ in range(N):
        # segment = segments[np.random.randint(len(segments))]
        segment = segments[_]
        segment = torch.tensor(segment, dtype=torch.float32)
        segment = (segment-torch.mean(segment))/torch.std(segment)
        param_real, param_imag = ddsp_textures.signal_processors.synthesizers.TexEnv_param_extractor(segment, sr, N_filter_bank, param_per_env)
        new_audio              = ddsp_textures.signal_processors.synthesizers.TexEnv(param_real, param_imag, seed)
        print(f"{label} original audio")
        display(Audio(data=segment.numpy(), rate=sr))
        print(f"{label} resynthesized audio")
        display(Audio(data=new_audio.numpy(), rate=sr))
        # plot both audios together
        plt.figure()
        plt.plot(segment.numpy(), label="original")
        plt.plot(new_audio.numpy(), label="resynthesized")
        plt.legend()
        plt.show()

In [11]:
import soundfile as sf

def save_audio(audio_data, output_path, sample_rate=44100):
    """
    Save audio using librosa-compatible soundfile.
    Assumes audio_data is a 1D numpy array with float32 values between -1 and 1.
    """
    sf.write(output_path, audio_data, samplerate=sample_rate)

def full_experiment(audio_path, label):
    sr = 44100
    audio, _ = librosa.load(audio_path, sr=sr, mono=True)

    # Make list of segments for fire and water --------------
    multiplier = 4
    frame_size = 2**15*multiplier # 2**16 = 65536 correspond to around 1.5 seconds (quite long frames)
    audio_segment = audio[:frame_size]
    audio_segment = torch.tensor(audio_segment, dtype=torch.float32)
    audio_segment = (audio_segment-torch.mean(audio_segment))/torch.std(audio_segment)
    print(f"{label} original audio")
    display(Audio(data=audio_segment.numpy(), rate=sr))
    output_path = f"{label}_original.wav"
    save_audio(audio_segment.numpy(), output_path, sample_rate=sr)

    N_filter_bank = 16 # This is a high number of filter (16 is the typical)
    seed = ddsp_textures.auxiliar.seeds.seed_maker(frame_size, 44100, N_filter_bank)
    param_per_env = 256 * multiplier
    param_real, param_imag = ddsp_textures.signal_processors.synthesizers.TexEnv_param_extractor(audio_segment, sr, N_filter_bank, param_per_env)
    new_audio              = ddsp_textures.signal_processors.synthesizers.TexEnv(param_real, param_imag, seed)
    print(f"{label} resynthesized with N_F=",N_filter_bank," and param_per_env=",param_per_env//multiplier)
    display(Audio(data=new_audio.numpy(), rate=sr))
    output_path = f"{label}_resynth_N_F_{N_filter_bank}_param_per_env_{param_per_env//multiplier}.wav"
    save_audio(new_audio.numpy(), output_path, sample_rate=sr)
    param_per_env = 512 * multiplier
    param_real, param_imag = ddsp_textures.signal_processors.synthesizers.TexEnv_param_extractor(audio_segment, sr, N_filter_bank, param_per_env)
    new_audio              = ddsp_textures.signal_processors.synthesizers.TexEnv(param_real, param_imag, seed)
    print(f"{label} resynthesized with N_F=",N_filter_bank," and param_per_env=",param_per_env//multiplier)
    display(Audio(data=new_audio.numpy(), rate=sr))
    output_path = f"{label}_resynth_N_F_{N_filter_bank}_param_per_env_{param_per_env//multiplier}.wav"
    save_audio(new_audio.numpy(), output_path, sample_rate=sr)
    N_filter_bank = 24 # This is a high number of filter (16 is the typical)
    seed = ddsp_textures.auxiliar.seeds.seed_maker(frame_size, 44100, N_filter_bank)
    param_per_env = 256 * multiplier
    param_real, param_imag = ddsp_textures.signal_processors.synthesizers.TexEnv_param_extractor(audio_segment, sr, N_filter_bank, param_per_env)
    new_audio              = ddsp_textures.signal_processors.synthesizers.TexEnv(param_real, param_imag, seed)
    print(f"{label} resynthesized with N_F=",N_filter_bank," and param_per_env=",param_per_env//multiplier)
    display(Audio(data=new_audio.numpy(), rate=sr))
    output_path = f"{label}_resynth_N_F_{N_filter_bank}_param_per_env_{param_per_env//multiplier}.wav"
    save_audio(new_audio.numpy(), output_path, sample_rate=sr)
    param_per_env = 512 * multiplier
    param_real, param_imag = ddsp_textures.signal_processors.synthesizers.TexEnv_param_extractor(audio_segment, sr, N_filter_bank, param_per_env)
    new_audio              = ddsp_textures.signal_processors.synthesizers.TexEnv(param_real, param_imag, seed)
    print(f"{label} resynthesized with N_F=",N_filter_bank," and param_per_env=",param_per_env//multiplier)
    display(Audio(data=new_audio.numpy(), rate=sr))
    output_path = f"{label}_resynth_N_F_{N_filter_bank}_param_per_env_{param_per_env//multiplier}.wav"
    save_audio(new_audio.numpy(), output_path, sample_rate=sr)
    
path_dict = {
    "fire": "../sounds/all sounds/fire.wav",
    "water": "../sounds/all sounds/water.wav",
    "wind": "../sounds/all sounds/wind_interior.wav",
    "bubbles": "../sounds/all sounds/bubbles_short.wav",
}

for label in path_dict.keys():
    audio_path = path_dict[label]
    full_experiment(audio_path, label)

fire original audio


fire resynthesized with N_F= 16  and param_per_env= 256


fire resynthesized with N_F= 16  and param_per_env= 512


fire resynthesized with N_F= 24  and param_per_env= 256


fire resynthesized with N_F= 24  and param_per_env= 512


water original audio


water resynthesized with N_F= 16  and param_per_env= 256


water resynthesized with N_F= 16  and param_per_env= 512


water resynthesized with N_F= 24  and param_per_env= 256


water resynthesized with N_F= 24  and param_per_env= 512


wind original audio


wind resynthesized with N_F= 16  and param_per_env= 256


wind resynthesized with N_F= 16  and param_per_env= 512


wind resynthesized with N_F= 24  and param_per_env= 256


wind resynthesized with N_F= 24  and param_per_env= 512


bubbles original audio


bubbles resynthesized with N_F= 16  and param_per_env= 256


bubbles resynthesized with N_F= 16  and param_per_env= 512


bubbles resynthesized with N_F= 24  and param_per_env= 256


bubbles resynthesized with N_F= 24  and param_per_env= 512
