In [1]:
import sys
import os

# Add the parent directory to the Python path
parent_dir = os.path.abspath('..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)
    
# Import modules
import signal_processors.synthesizers
import auxiliar.seeds

# Import extra packages
import numpy as np
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
import torch

# Resysnthesis function
def resynthesize_and_display(segments, sr, seed, N_filter_bank, frame_size, param_per_env, label, N):
    for _ in range(N):
        segment = segments[np.random.randint(len(segments))]
        segment = torch.tensor(segment, dtype=torch.float32)
        param_real, param_imag = signal_processors.synthesizers.TextEnv_param_extractor(segment, sr, N_filter_bank, param_per_env)
        new_audio = signal_processors.synthesizers.TextEnv(param_real, param_imag, seed)
        print(f"{label} original audio")
        display(Audio(data=segment.numpy(), rate=sr))
        print(f"{label} resynthesized audio")
        display(Audio(data=new_audio.numpy(), rate=sr))

In [2]:
# FIRE RESYNTHESIS --------------------------------------

# Load audio to resynthesize ----------------------------
fire_path = "../sounds/fire.wav"
sr = 44100
fire_audio, _ = librosa.load(fire_path, sr=sr)

# Make list of segments for fire and water --------------
frame_size = 2**16 # 2**16 = 65536 correspond to around 1.5 seconds (quite long frames)
hop_size = 2**16
fire_segments = []

for i in range(0, len(fire_audio) - frame_size, hop_size):
    fire_segments.append(fire_audio[i:i + frame_size])

# Seed creation -----------------------------------------
N_filter_bank = 24 # This is a high number of filter (16 is the typical)
seed = auxiliar.seeds.seed_maker(frame_size, 44100, N_filter_bank)

# Run the resynthesizer for a bunch of fire and water segments
param_per_env = 1024  # 1024*24 = 2**13 * 3 => compression of (2**13 * 3)/2**16 = 3/8 ~ 0.375 
resynthesize_and_display(fire_segments,  sr, seed, N_filter_bank, frame_size, param_per_env, "Fire",  3)

Fire original audio


  erb_subbands = torch.tensor(subbands[:, 1:-1], dtype=torch.float32)


Fire resynthesized audio


Fire original audio


Fire resynthesized audio


Fire original audio


Fire resynthesized audio


In [3]:
# WATER RESYNTHESIS --------------------------------------

# Load audio to resynthesize ----------------------------
water_path = "../sounds/water.wav"
sr = 44100
water_audio, _ = librosa.load(water_path, sr=sr)

# Water segments ----------------------------------------
frame_size = 2**14 # 2**14 corresponds to around 0.37 seconds (sort of short frames)
hop_size = 2**14
water_segments = []

for i in range(0, len(water_audio) - frame_size, hop_size):
    water_segments.append(water_audio[i:i + frame_size])

# Seed creation -----------------------------------------
N_filter_bank = 16
seed = auxiliar.seeds.seed_maker(frame_size, 44100, N_filter_bank)

# Run the resynthesizer for a bunch of fire and water segments
param_per_env = 256  # 256*16 = 2**12 => compression of 2**12/2**14 = 1/4
resynthesize_and_display(water_segments,  sr, seed, N_filter_bank, frame_size, param_per_env, "Water",  3)

Water original audio


Water resynthesized audio


Water original audio


Water resynthesized audio


Water original audio


Water resynthesized audio
