In [1]:
import sys
import os

# Add the parent directory to the Python path
parent_dir = os.path.abspath('../..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)
    
# import modules
import ddsp_textures.dataset.makers
import ddsp_textures.auxiliar.features

# Extra packages
import random
import matplotlib.pyplot as plt
from IPython.display import Audio

# For dataset visualization
def random_viewer(dataset_sounds):
    size = len(dataset_sounds)
    file_number = random.randint(0, size-1)
    features = dataset_sounds[file_number][0]
    audio    = dataset_sounds[file_number][1].detach().cpu().numpy()
    plt.plot(audio)
    title = "feature 0: "+ str(features[0].detach().cpu().numpy())
    plt.title(title)
    plt.show()
    display(Audio(data=audio, rate=44100))

In [2]:
# Fire dataset creation

# Dataset parameters
audio_folder_path = ".."+"/sounds/fire_sounds"
frame_size        = 2**16 # 2**16 is around 1.5 seconds
hop_size          = 2**15 # samples are 50% overlapped
sampling_rate     = 44100

from ddsp_textures.auxiliar.features import *
# features_annotator = [features_freqavg, features_freqavg_freqstd, features_rate, features_energy_bands] # for fire, the features will be sp centroid and rate
features_annotator = [features_energy_bands, features_freqavg] # for fire, the features will be sp centroid and rate
freq_avg_level = 4    # The spectral centroid will et more informative by pushing it a little
N_filter_bank = 32    # Number of filters in the filter bank

# Dataset creation
dataset_raw  = ddsp_textures.dataset.makers.DDSP_Dataset(audio_folder_path, frame_size, hop_size, sampling_rate, N_filter_bank, features_annotator)
dataset_fire = dataset_raw.compute_dataset()

print("Dataset created with ", len(dataset_fire), " segments")

print("Example of an element of the dataset:")
print(dataset_fire[0])
print(type(dataset_fire[0]))
for i in range(len(dataset_fire[0])):
    print(dataset_fire[0][i].size())

import torch
from torch.utils.data import DataLoader

# Load dataset to torch object
dataloader = DataLoader(dataset_fire, batch_size=32, shuffle=True)

# Check the batch size
for batch in dataloader:
    len_batch = len(batch)
    print("Batch features/segments number: ", len_batch)
    for i in range(len_batch):
        print(batch[i].shape)
    break

Audio loaded from  ../sounds/fire_sounds
Segments extracted!
Number of segments:  106
Dataset computed!
Dataset created with  106  segments
Example of an element of the dataset:
[tensor([-0.0032, -0.0032, -0.0027,  ..., -0.2776, -0.2997, -0.3020]), tensor([1.8265e+02, 8.6136e+01, 6.3320e+01, 3.9747e+01, 1.8091e+01, 7.6841e+00,
        2.6337e+00, 9.1614e+00, 1.3860e+01, 8.0863e+00, 1.0811e+01, 2.2499e+01,
        2.6003e+01, 1.5420e+01, 1.8092e+01, 1.5955e+01, 8.5298e+00, 7.5192e+00,
        1.0451e+01, 5.0972e+00, 7.4703e+00, 7.4172e+00, 8.1261e+00, 6.6661e+00,
        8.2293e+00, 7.2452e+00, 8.9013e+00, 1.2345e+01, 1.4690e+01, 1.2995e+01,
        3.4151e+00, 1.6314e-01]), tensor(2095.9150)]
<class 'list'>
torch.Size([65536])
torch.Size([32])
torch.Size([])
Batch features/segments number:  3
torch.Size([32, 65536])
torch.Size([32, 32])
torch.Size([32])


In [3]:
# energy_bands_30 = dataset_fire[30][5]
# print("Energy bands:\n", energy_bands_30)
# segment_30 = dataset_fire[30][0]

In [4]:
# # Import modules
# import ddsp_textures.signal_processors.synthesizers

# # Import extra packages
# import numpy as np
# import librosa
# import matplotlib.pyplot as plt
# from IPython.display import Audio
# import torch


# print("std of the segment: ", torch.std(segment_30))
# print("norm of the segment: ", torch.norm(segment_30))

# # compute 128 real and imag param 
# param_real, param_imag = ddsp_textures.signal_processors.synthesizers.TexEnv_param_extractor(segment_30, sampling_rate, N_filter_bank, 128)

# amplitudes = np.sqrt(param_real**2 + param_imag**2)

# for i in range(128):
#     print("Amplitude of the "+str(i)+"-th filter: ", amplitudes[i])