In [1]:
import sys
import os

# Add the parent directory to the Python path
parent_dir = os.path.abspath('../..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

from ddsp_textures.dataset.makers    import read_wavs_from_folder
from ddsp_textures.auxiliar.features import *

audio_path    = "../sounds/loss_optimization_dataset"
sampling_rate = 44100
frame_size    = 2**16
hop_size      = 2**15
audios_list   = read_wavs_from_folder(audio_path, sampling_rate)
data          = []

j = 0
for audio in audios_list:
    size = len(audio)
    number_of_segments = (size - frame_size) // hop_size
    number_of_segments = number_of_segments if number_of_segments < 256 else 256
    for i in range(number_of_segments):
        segment = audio[i * hop_size : i * hop_size + frame_size]
        segment = audio_improver(segment, sampling_rate, 4)
        segment = signal_normalizer(segment)
        data.append([segment, torch.tensor(j)])
    j += 1

In [2]:
import torch
from ddsp_textures.loss.functions import *
from torch.utils.data import DataLoader, TensorDataset

# Set device to GPU if available, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Define constants (replace these with actual values)
N_filter_bank = 32
M_filter_bank = 24
erb_bank = ddsp_textures.auxiliar.filterbanks.EqualRectangularBandwidth(frame_size, sampling_rate, N_filter_bank, 20, sampling_rate // 2)
new_frame_size = frame_size // 4
new_sampling_rate = sampling_rate // 4
log_bank = ddsp_textures.auxiliar.filterbanks.Logarithmic(new_frame_size, new_sampling_rate, M_filter_bank, 10, new_sampling_rate // 4)
downsampler = torchaudio.transforms.Resample(sampling_rate, new_sampling_rate).to(device)

# Convert data into tensors if necessary
signals    = [item[0] for item in data]
categories = [item[1] for item in data]

# Move data to the appropriate device (GPU or CPU)
signals_tensor = torch.stack(signals).to(device)
categories_tensor = torch.stack(categories).to(device)

# Create a DataLoader for batching
batch_size = 8  # Choose a batch size based on available memory
dataset = TensorDataset(signals_tensor, categories_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize alpha with random values and move it to the device
alpha = torch.randn(8, requires_grad=True, device=device)

# Optimizer
optimizer = torch.optim.Adam([alpha], lr=0.01)
from tqdm import tqdm

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0
    
    # Use tqdm to show progress for each epoch
    with tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as pbar:
        for batch in pbar:
            batch_signals, batch_categories = batch

            # Move batch data to device
            batch_signals    = batch_signals.to(device)
            batch_categories = batch_categories.to(device)

            optimizer.zero_grad()

            # Apply softmax to ensure sum constraint on alpha
            normalized_alpha = torch.softmax(alpha, dim=0)

            # Calculate loss within the batch
            batch_loss = 0
            batch_size = len(batch_signals)

            for i in range(batch_size):
                for j in range(i + 1, batch_size):
                    signal_1, category_1 = batch_signals[i], batch_categories[i]
                    signal_2, category_2 = batch_signals[j], batch_categories[j]

                    # Calculate loss using batch_statistics_loss
                    if category_1 == category_2:
                        # Minimize distance for same class
                        batch_loss += statistics_loss(
                            signal_1, signal_2, N_filter_bank, M_filter_bank,
                            erb_bank, log_bank, downsampler, normalized_alpha
                        )
                    else:
                        # Maximize distance for different classes
                        batch_loss -= statistics_loss(
                            signal_1, signal_2, N_filter_bank, M_filter_bank,
                            erb_bank, log_bank, downsampler, normalized_alpha
                        )

            # Backpropagate and optimize
            batch_loss.backward()
            optimizer.step()

            # Track total loss for reporting
            total_loss += batch_loss.item()

            # Update tqdm description (optional, if you want to print batch-level progress)
            pbar.set_postfix(batch_loss=batch_loss.item())

    # Print the total loss at the end of each epoch
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

# Final optimized parameters
final_alpha = torch.softmax(alpha, dim=0).detach()
print("Optimized Parameters:", final_alpha)


Device: cuda


Epoch 1/10: 100%|██████████| 128/128 [09:07<00:00,  4.28s/batch, batch_loss=-88.6]


Epoch 1/10, Loss: -20414.9560


Epoch 2/10: 100%|██████████| 128/128 [09:06<00:00,  4.27s/batch, batch_loss=-1.01e+3]


Epoch 2/10, Loss: -128963.3793


Epoch 3/10: 100%|██████████| 128/128 [09:07<00:00,  4.28s/batch, batch_loss=-93]     


Epoch 3/10, Loss: -161573.2984


Epoch 4/10: 100%|██████████| 128/128 [09:09<00:00,  4.29s/batch, batch_loss=-1.18e+3]


Epoch 4/10, Loss: -163912.9622


Epoch 5/10: 100%|██████████| 128/128 [09:07<00:00,  4.27s/batch, batch_loss=-2.42e+3]


Epoch 5/10, Loss: -180718.0450


Epoch 6/10: 100%|██████████| 128/128 [09:06<00:00,  4.27s/batch, batch_loss=-2.77e+3]


Epoch 6/10, Loss: -174318.3100


Epoch 7/10: 100%|██████████| 128/128 [09:09<00:00,  4.30s/batch, batch_loss=-1.38e+3]


Epoch 7/10, Loss: -174834.5464


Epoch 8/10: 100%|██████████| 128/128 [09:06<00:00,  4.27s/batch, batch_loss=-403]    


Epoch 8/10, Loss: -180187.6891


Epoch 9/10: 100%|██████████| 128/128 [09:09<00:00,  4.29s/batch, batch_loss=-1.51e+3]


Epoch 9/10, Loss: -171309.1865


Epoch 10/10: 100%|██████████| 128/128 [09:08<00:00,  4.28s/batch, batch_loss=-920]    

Epoch 10/10, Loss: -173425.0385
Optimized Parameters: tensor([3.4230e-04, 1.7296e-04, 1.3878e-03, 9.9564e-01, 7.8497e-04, 3.8259e-04,
        1.3160e-04, 1.1537e-03], device='cuda:0')





In [3]:
# Save the optimized parameters to a file
torch.save(final_alpha, "optimized_parameters_v1.pt")

In [4]:
import torch
from ddsp_textures.loss.functions import *
from torch.utils.data import DataLoader, TensorDataset

# Set device to GPU if available, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Define constants (replace these with actual values)
N_filter_bank = 32
M_filter_bank = 24
erb_bank = ddsp_textures.auxiliar.filterbanks.EqualRectangularBandwidth(frame_size, sampling_rate, N_filter_bank, 20, sampling_rate // 2)
new_frame_size = frame_size // 4
new_sampling_rate = sampling_rate // 4
log_bank = ddsp_textures.auxiliar.filterbanks.Logarithmic(new_frame_size, new_sampling_rate, M_filter_bank, 10, new_sampling_rate // 4)
downsampler = torchaudio.transforms.Resample(sampling_rate, new_sampling_rate).to(device)

# Convert data into tensors if necessary
signals    = [item[0] for item in data]
categories = [item[1] for item in data]

# Move data to the appropriate device (GPU or CPU)
signals_tensor = torch.stack(signals).to(device)
categories_tensor = torch.stack(categories).to(device)

# Create a DataLoader for batching
batch_size = 8  # Choose a batch size based on available memory
dataset = TensorDataset(signals_tensor, categories_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize alpha with random values and move it to the device
alpha = torch.randn(8, requires_grad=True, device=device)

# Optimizer
optimizer = torch.optim.Adam([alpha], lr=0.01)
from tqdm import tqdm

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0
    
    # Use tqdm to show progress for each epoch
    with tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as pbar:
        for batch in pbar:
            batch_signals, batch_categories = batch

            # Move batch data to device
            batch_signals    = batch_signals.to(device)
            batch_categories = batch_categories.to(device)

            optimizer.zero_grad()

            # Apply softmax to ensure sum constraint on alpha
            normalized_alpha = torch.softmax(alpha, dim=0)

            # Calculate loss within the batch
            batch_loss = 0
            batch_size = len(batch_signals)

            for i in range(batch_size):
                for j in range(i + 1, batch_size):
                    signal_1, category_1 = batch_signals[i], batch_categories[i]
                    signal_2, category_2 = batch_signals[j], batch_categories[j]

                    # Calculate loss using batch_statistics_loss
                    if category_1 == category_2:
                        # Minimize distance for same class
                        batch_loss += statistics_loss(
                            signal_1, signal_2, N_filter_bank, M_filter_bank,
                            erb_bank, log_bank, downsampler, normalized_alpha
                        )
                    else:
                        # Maximize distance for different classes
                        batch_loss += 1/statistics_loss(
                            signal_1, signal_2, N_filter_bank, M_filter_bank,
                            erb_bank, log_bank, downsampler, normalized_alpha
                        )

            # Backpropagate and optimize
            batch_loss.backward()
            optimizer.step()

            # Track total loss for reporting
            total_loss += batch_loss.item()

            # Update tqdm description (optional, if you want to print batch-level progress)
            pbar.set_postfix(batch_loss=batch_loss.item())

    # Print the total loss at the end of each epoch
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

# Final optimized parameters
final_alpha = torch.softmax(alpha, dim=0).detach()
print("Optimized Parameters:", final_alpha)


Device: cuda


Epoch 1/10: 100%|██████████| 128/128 [09:08<00:00,  4.28s/batch, batch_loss=74.4]


Epoch 1/10, Loss: 9369.0780


Epoch 2/10: 100%|██████████| 128/128 [09:05<00:00,  4.26s/batch, batch_loss=28.1]


Epoch 2/10, Loss: 4631.9188


Epoch 3/10: 100%|██████████| 128/128 [09:05<00:00,  4.26s/batch, batch_loss=47.6]


Epoch 3/10, Loss: 4225.0310


Epoch 4/10: 100%|██████████| 128/128 [09:06<00:00,  4.27s/batch, batch_loss=40.4]


Epoch 4/10, Loss: 4181.0420


Epoch 5/10: 100%|██████████| 128/128 [09:05<00:00,  4.26s/batch, batch_loss=22.7]


Epoch 5/10, Loss: 3826.9369


Epoch 6/10: 100%|██████████| 128/128 [09:06<00:00,  4.27s/batch, batch_loss=24.9]


Epoch 6/10, Loss: 3769.4456


Epoch 7/10: 100%|██████████| 128/128 [09:06<00:00,  4.27s/batch, batch_loss=30]  


Epoch 7/10, Loss: 3595.0089


Epoch 8/10: 100%|██████████| 128/128 [09:04<00:00,  4.26s/batch, batch_loss=29.3]


Epoch 8/10, Loss: 3443.7540


Epoch 9/10: 100%|██████████| 128/128 [09:06<00:00,  4.27s/batch, batch_loss=28.3]


Epoch 9/10, Loss: 3345.5898


Epoch 10/10: 100%|██████████| 128/128 [09:06<00:00,  4.27s/batch, batch_loss=24.5]

Epoch 10/10, Loss: 3428.0847
Optimized Parameters: tensor([0.0070, 0.0035, 0.8993, 0.0049, 0.0431, 0.0265, 0.0067, 0.0089],
       device='cuda:0')





In [5]:
# Save the optimized parameters to a file
torch.save(final_alpha, "optimized_parameters_v2.pt")