In [1]:
import sys
sys.path.insert(0, '/tf/utils/')

In [2]:
base_shape_size = 8192
ws = 255
ol = 128
input_shape = (128, 64, 2)

In [3]:
from utils import calculate_stft_magnitude_and_phase, reconstruct_signal_from_stft
from sound import Sound
from data_generators import NoisyTargetGenerator
from artigos.PRIDNet import create_model
import tensorflow as tf
# tf.debugging.set_log_device_placement(True)

from tensorflow.keras.optimizers import Adam

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

from IPython.display import Audio
from IPython import display
import librosa

In [4]:
sound_base = Sound('../../../Dados/Vozes/', '../../../Dados/Ruido/', base_shape_size)

Loading Speech Files: 100%|██████████| 8179/8179 [00:05<00:00, 1513.68it/s]
Loading Noise Files: 100%|██████████| 8137/8137 [00:05<00:00, 1444.38it/s]
  clean_sounds = [sound for sound in clean_sounds if sound != self.TOO_SHORT_ERROR]
  noise_sounds = [sound for sound in noise_sounds if sound != self.TOO_SHORT_ERROR]


In [5]:
data_generator_train = NoisyTargetGenerator(sound_base.train_X, sound_base.noise_sounds)
data_generator_val = NoisyTargetGenerator(sound_base.val_X, sound_base.noise_sounds)

In [6]:
def generate_images(model, test_input, tar):
    
    prediction = model(test_input, training=True)
    plt.figure(figsize=(22, 7))
    
    display_list = [test_input[0], tar[0], prediction[0]]
    title = ['Log Power Spectrum - Som ruidoso', 'Log Power Spectrum - Som original', 'Log Power Spectrum - Som filtrado']
    
    for i in range(3):
        plt.subplot(1, 3, i+1)
        plt.title(title[i])
        # Getting the pixel values in the [0, 1] range to plot.
        plt.imshow(10 * np.log10((display_list[i][..., 0])**2), aspect='auto', cmap='inferno')
        plt.colorbar(format='%+2.0f dB')  # Removi a variável 'im' e 'axs[0]'
        plt.axis('off')
    
    plt.show()

In [7]:
# with tf.device('/GPU:0'):
#     # TensorFlow operations that you want to run on the GPU

#     model = create_model(input_shape)

#     print(model.summary())

#     steps_per_epoch_train = len(sound_base.train_X)
#     steps_per_epoch_validation = len(sound_base.val_X)

#     best_models_path = "/tf/Etapa 3/Artigos/PRIDNet/model_checkpoints"
#     callbacks_lst = [
#                      tf.keras.callbacks.ModelCheckpoint(filepath=best_models_path+"best_PRIDNet_blindnoise_128x64.h5", save_freq=100, save_weights_only=False),
#         tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', min_lr=0.0000009, min_delta=0.0001, factor=0.70, patience=3, verbose=1, mode='min'),
#         tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, min_delta=0.0001, patience=10)
#     ]

#     model.compile(loss=tf.keras.losses.MeanSquaredLogarithmicError(), optimizer=Adam(learning_rate=0.0009))

#     batch_size = 32
#     steps_per_epoch = len(sound_base.train_X) // batch_size
    
#     print('Starting training')
    
#     for i, epoch in enumerate(range(20)):
#         print(f"Epoch {epoch + 1}")
        
#         # Gera um novo lote de validação para cada época
#         validation_batch = next(data_generator_val.generate_sample_completo(batch_size=batch_size))
#         x_val, y_val = validation_batch
    
#         model.fit(data_generator_train.generate_sample_completo(batch_size=batch_size, include_clean=True),
#                   steps_per_epoch=steps_per_epoch,
#                   epochs=1,
#                   validation_data=(x_val, y_val),
#                   callbacks=callbacks_lst
#                  )
    
#         indice_aleatorio = np.random.choice(x_val.shape[0])
        
#         # Selecione a amostra correspondente
#         amostra_noisy_module = x_val[indice_aleatorio]
#         amostra_noisy_module = amostra_noisy_module[np.newaxis, ...]
    
#         amostra_original_module = y_val[indice_aleatorio]
#         amostra_original_module = amostra_original_module[np.newaxis, ...]
        
#         generate_images(model, amostra_noisy_module, amostra_original_module)

## Estrutura do modelo para 3 canais

![Descrição da imagem](model_structure.png)

In [8]:
# with tf.device('/GPU:0'):
model = create_model(input_shape)

Input = (None, 128, 64, 2)
Conv block = (None, 128, 64, 64)
Channel Attention = (None, 128, 64, 64)
Channel Attention Last CNN = (None, 128, 64, 2)
First phase = (None, 128, 64, 4)

Multi-scale feature extraction = (None, 128, 64, 12)
Kernel Selection Module = (None, 128, 64, 2)


In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 128, 64, 2)  0           []                               
                                ]                                                                 
                                                                                                  
 convolutional_block (Convoluti  (None, 128, 64, 64)  112000     ['input_layer[0][0]']            
 onal_block)                                                                                      
                                                                                                  
 channel_attention (Channel_att  (None, 128, 64, 64)  322        ['convolutional_block[0][0]']    
 ention)                                                                                      

In [10]:
steps_per_epoch_train = len(sound_base.train_X)
steps_per_epoch_validation = len(sound_base.val_X)

In [11]:
best_models_path = "/tf/Etapa 3/Artigos/PRIDNet/model_checkpoints"
callbacks_lst = [
                 tf.keras.callbacks.ModelCheckpoint(filepath=best_models_path+"best_PRIDNet_blindnoise_128x64.h5", save_freq=100, save_weights_only=False),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', min_lr=0.0000009, min_delta=0.0001, factor=0.70, patience=3, verbose=1, mode='min'),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, min_delta=0.0001, patience=10)
]

In [12]:
model.compile(loss=tf.keras.losses.MeanSquaredLogarithmicError(), optimizer=Adam(learning_rate=0.0009))

In [None]:
batch_size = 32
steps_per_epoch = len(sound_base.train_X) // batch_size

print('Starting training')

for i, epoch in enumerate(range(20)):
    print(f"Epoch {epoch + 1}")
    
    # Gera um novo lote de validação para cada época
    validation_batch = next(data_generator_val.generate_sample_completo(batch_size=batch_size))
    x_val, y_val = validation_batch

    model.fit(data_generator_train.generate_sample_completo(batch_size=batch_size, include_clean=True),
              steps_per_epoch=steps_per_epoch,
              epochs=1,
              validation_data=(x_val, y_val),
              # callbacks=callbacks_lst
             )

    indice_aleatorio = np.random.choice(x_val.shape[0])
    
    # Selecione a amostra correspondente
    amostra_noisy_module = x_val[indice_aleatorio]
    amostra_noisy_module = amostra_noisy_module[np.newaxis, ...]

    amostra_original_module = y_val[indice_aleatorio]
    amostra_original_module = amostra_original_module[np.newaxis, ...]
    
    generate_images(model, amostra_noisy_module, amostra_original_module)

Starting training
Epoch 1

In [None]:
validation_batch = next(data_generator_val.generate_sample_completo(batch_size=8))
x_test, y_test = validation_batch

x_test = x_test[0, ...]
y_test = y_test[0, ...]

x_test = x_test[np.newaxis, ...]
y_test = y_test[np.newaxis, ...]

In [None]:
print(np.max(x_test[:, :, : , 0]))
print(np.min(x_test[:, :, : , 0]))
print(np.max(y_test[:, :, : , 0]))
print(np.min(y_test[:, :, : , 0]))

In [None]:
F_f = model.predict(x_test)

In [None]:
print(np.max(F_f))
print(np.min(F_f))
print(F_f.shape)

In [None]:
A = y_test.reshape(y_test.shape[1], y_test.shape[2], y_test.shape[3])[:, :, 0]
phi = (y_test.reshape(y_test.shape[1], y_test.shape[2], y_test.shape[3])[:, :, 1] - 0.5) * 2 * np.pi

A_f = F_f.reshape(F_f.shape[1], F_f.shape[2], F_f.shape[3])[:, :, 0]
phi_f = (F_f.reshape(F_f.shape[1], F_f.shape[2], F_f.shape[3])[:, :, 1] - 0.5) * 2 * np.pi

A_n = x_test.reshape(x_test.shape[1], x_test.shape[2], x_test.shape[3])[:, :, 0]
phi_n = (x_test.reshape(x_test.shape[1], x_test.shape[2], x_test.shape[3])[:, :, 1] - 0.5) * 2 * np.pi

In [None]:
print(np.max(A_f))
print(np.max(phi_f))
print(np.min(A_f))
print(np.min(phi_f))

In [None]:
generate_images(model, x_test, y_test)

In [None]:
s = reconstruct_signal_from_stft(A, phi)
s_n = reconstruct_signal_from_stft(A_n, phi_n)
s_f = reconstruct_signal_from_stft(A_f, phi_f)

In [None]:
# Criando figuras e eixos separados para cada array
fig, (ax1, ax2, ax3) = plt.subplots(3, 1)

# Plotando o primeiro array
ax1.plot(s)
ax1.set_ylabel('Sinal de voz ruidoso')

# Plotando o primeiro array
ax2.plot(s_n)
ax2.set_ylabel('Sinal de voz ruidoso')

ax3.plot(s_f)
ax3.set_ylabel('Sinal de voz filtrado')

# Exibindo os gráficos
plt.show()

In [None]:
Audio(data=s, rate=8000)

In [None]:
Audio(data=s_n, rate=8000)

In [None]:
Audio(data=s_f, rate=8000)

In [None]:
a = (128, 64)

In [None]:
a[1]/4

In [None]:
64/ (8*8)