In [1]:
import sys
sys.path.insert(0, '/tf/utils/')

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pesq import pesq
import pystoi

from utils import performance, calculate_snr, reconstruct_signal_from_stft
from data_generators import NoisyTargetWithMetricsGenerator
from artigos import AttentionResUnet
from sound import Sound
from kalman import kalman
from tqdm import tqdm
import time

from tensorflow.keras.models import Model, load_model
import tensorflow.keras.backend as K

from wiener import wiener_filter

In [3]:
from artigos.PRIDNet import Convolutional_block, Channel_attention, AttResUnetConvLSTM, Avg_pool_Unet_Upsample_msfe, Multi_scale_feature_extraction, Kernel_selecting_module
from keras.utils import custom_object_scope

In [4]:
from artigos.DeepNoiseSuppressionWithNonIntrusivePESQNet import weighted_msle

In [5]:
base_shape_size = 8192
ws = 255
ol = 128

In [8]:
# att_unet_path = '/tf/Etapa 3/Artigos/Attention ResU-Net/AttentionResUnet_checkpoints/attention_resunet_epochs_53_loss_mse_0.0106_vallos_0.0112.h5'
att_unet_path = '/tf/Etapa 3/Artigos/Attention ResU-Net/AttentionResUnet_checkpoints/1_mirror_attention_res_unet_custom-mlse.h5'
# att_unet_path = '/tf/Etapa 3/Artigos/Attention ResU-Net/checkpoint-2024-04-21_04-20-09.h5'
fcrn_path = '/tf/Etapa 3/Artigos/Non Intrusive PESQNet/DNS_checkpoints/best_DNS_128x64.h5'
pridnet_path = '/tf/Etapa 3/Artigos/PRIDNet/PRIDNet_loss-0.0165-epochs-54.h5'

In [9]:
unet = load_model(att_unet_path)

with custom_object_scope({'weighted_msle': weighted_msle}):
    fcrn = load_model(fcrn_path)

with custom_object_scope({'Convolutional_block': Convolutional_block,
                          'Channel_attention': Channel_attention,
                          'AttResUnetConvLSTM': AttResUnetConvLSTM,
                          'Avg_pool_Unet_Upsample_msfe': Avg_pool_Unet_Upsample_msfe,
                          'Multi_scale_feature_extraction': Multi_scale_feature_extraction,
                          'Kernel_selecting_module': Kernel_selecting_module}):
    pridnet = load_model(pridnet_path)

In [10]:
sound_base = Sound('/tf/Dados/Vozes/train/', '/tf/Dados/Ruido/train/', base_shape_size)

Loading Noise Files: 100%|██████████| 5695/5695 [01:30<00:00, 62.74it/s]]
Loading Speech Files: 100%|██████████| 5725/5725 [01:39<00:00, 57.59it/s] 


In [19]:
data_generator = NoisyTargetWithMetricsGenerator(sound_base.clean_sounds, sound_base.noise_sounds)

In [40]:
import soundfile as sf
import os

# Função para salvar o áudio com as métricas no nome
def save_audio_with_metrics(audio, filename, folder, SNR=None, PESQ=None, STOI=None):
    metrics = []
    if SNR is not None:
        metrics.append(f"SNR_{SNR:.2f}")
    if PESQ is not None:
        metrics.append(f"PESQ_{PESQ:.2f}")
    if STOI is not None:
        metrics.append(f"STOI_{STOI:.2f}")
    
    metrics_str = "_".join(metrics)
    if metrics_str:
        filename = f"{filename}_{metrics_str}.wav"
    else:
        filename = f"{filename}.wav"
    
    file_path = os.path.join(folder, filename)
    sf.write(file_path, audio, samplerate=8000)
    print(f"Saved: {file_path}")

In [41]:
base_folder = "/tf/Resultado - Sons Filtrados"

# Criar diretório se não existir
os.makedirs(base_folder, exist_ok=True)

In [42]:
batch_num = 1

In [75]:
window_size = 600
order = 11
df_resultado = pd.DataFrame()
batch_size=128

# Acumulador para os tempos de execução
wiener_execution_times = []
kalman_execution_times = []
att_execution_times = []
fcrn_execution_times = []
pridnet_execution_times = []

In [76]:
it = 1
while df_resultado.shape[0] < 10:
    try:
        for _ in tqdm(range(batch_num)):
            (x_wave_batch, y_wave_batch), (x_stft_batch, y_stft_batch), metrics_batch_df = next(data_generator.generate_sample_completo(batch_size=batch_size))
        
            ## Filtragem dos sinais
            
            # Aplica o filtro de Wiener aos sinais ruidosos
            wiener_start_time = time.time()  # Início da medição de tempo
            wiener_batch = [wiener_filter(noisy_signal, snr) for noisy_signal, snr in zip(x_wave_batch, metrics_batch_df['SNR'])]
            wiener_end_time = time.time()  # Fim da medição de tempo
            wiener_execution_times.append((wiener_end_time - wiener_start_time) / batch_size)
        
            # Aplica o filtro de Kalman aos sinais ruidosos
            kalman_start_time = time.time()  # Início da medição de tempo
            kalman_batch = [kalman(noisy_signal, 600, 11, 8000, snr) for noisy_signal, snr in zip(x_wave_batch, metrics_batch_df['SNR'])]
            kalman_end_time = time.time()  # Fim da medição de tempo
            kalman_execution_times.append((kalman_end_time - kalman_start_time) / (batch_size * 6))
        
            # Aplica o filtro de Attention Res U-Net aos sinais ruidosos
            att_batch = []
        
            for noisy_signal in x_stft_batch:
                x = noisy_signal[..., 0]
                x = x[np.newaxis, ..., np.newaxis]
                
                # print(f'Noisy signal shape: {noisy_signal.shape}')
                # print(f'X shape: {x.shape}')
                
                att_start_time = time.time()  # Início da medição de tempo
                F_f = unet.predict(x, verbose=False)
                att_end_time = time.time()  # Fim da medição de tempo
                att_execution_times.append(att_end_time - att_start_time)
                
                # print(f'NN output shape: {F_f.shape}')
        
                A_f = F_f.reshape(128, 64)
                phi_n = (noisy_signal[..., 1].reshape(128, 64) - 0.5) * 2 * np.pi
        
                s_f = reconstruct_signal_from_stft(A_f, phi_n)
                # print(f'Shape sinal final: {s_f.shape}')
                att_batch.append(s_f)
                ###################################################
                # x = noisy_signal[np.newaxis, ...]
                
                # # print(f'Noisy signal shape: {noisy_signal.shape}')
                # # print(f'X shape: {x.shape}')
                
                # att_start_time = time.time()  # Início da medição de tempo
                # F_f = unet.predict(x, verbose=False)
                # att_end_time = time.time()  # Fim da medição de tempo
                # att_execution_times.append(att_end_time - att_start_time)
                
                # # print(f'NN output shape: {F_f.shape}')
                # F_f = F_f.reshape(128, 64, 2)
                
                # A_f = F_f[:, :, 0]
                # phi_n = (F_f[:, :, 1]- 0.5) * 2 * np.pi
        
                # s_f = reconstruct_signal_from_stft(A_f, phi_n)
                # # print(f'Shape sinal final: {s_f.shape}')
                # att_batch.append(s_f)
        
            # Aplica o filtro de FCRN aos sinais ruidosos
            fcrn_batch = []
        
            for noisy_signal in x_stft_batch:
                x = noisy_signal[..., 0]
                x = x[np.newaxis, ..., np.newaxis]
                
                # print(f'Noisy signal shape: {noisy_signal.shape}')
                # print(f'X shape: {x.shape}')
                
                fcrn_start_time = time.time()  # Início da medição de tempo
                F_f = fcrn.predict(x, verbose=False)
                fcrn_end_time = time.time()  # Fim da medição de tempo
                fcrn_execution_times.append(fcrn_end_time - fcrn_start_time)
                
                # print(f'NN output shape: {F_f.shape}')
        
                A_f = F_f.reshape(128, 64)
                phi_n = (noisy_signal[..., 1].reshape(128, 64) - 0.5) * 2 * np.pi
        
                s_f = reconstruct_signal_from_stft(A_f, phi_n)
                # print(f'Shape sinal final: {s_f.shape}')
                fcrn_batch.append(s_f)
        
            # Aplica o filtro de PRIDNet aos sinais ruidosos
            pridnet_batch = []
        
            for noisy_signal in x_stft_batch:
                x = noisy_signal[np.newaxis, ...]
                
                # print(f'Noisy signal shape: {noisy_signal.shape}')
                # print(f'X shape: {x.shape}')
                
                pridnet_start_time = time.time()  # Início da medição de tempo
                F_f = pridnet.predict(x, verbose=False)
                pridnet_end_time = time.time()  # Fim da medição de tempo
                pridnet_execution_times.append(pridnet_end_time - pridnet_start_time)
                
                # print(f'NN output shape: {F_f.shape}')
                F_f = F_f.reshape(128, 64, 2)
                
                A_f = F_f[:, :, 0]
                phi_n = (F_f[:, :, 1]- 0.5) * 2 * np.pi
        
                s_f = reconstruct_signal_from_stft(A_f, phi_n)
                # print(f'Shape sinal final: {s_f.shape}')
                pridnet_batch.append(s_f)
        
        
            
            #Calculo das metricas
            
            # Wiener Calcula as métricas para os sinais filtrados
            wiener_pesq_scores = [pesq(8000, clean.reshape(-1), filtered.reshape(-1), 'nb') for clean, filtered in zip(y_wave_batch, wiener_batch)]
            wiener_stoi_scores = [pystoi.stoi(clean, filtered, 8000) for clean, filtered in zip(y_wave_batch, wiener_batch)]
            wiener_snr_scores = [calculate_snr(clean, filtered) for clean, filtered in zip(y_wave_batch, wiener_batch)]
        
            # Kalman Calcula as métricas para os sinais filtrados
            kalman_pesq_scores = [pesq(8000, clean.reshape(-1), filtered.reshape(-1), 'nb') for clean, filtered in zip(y_wave_batch, kalman_batch)]
            kalman_stoi_scores = [pystoi.stoi(clean, filtered, 8000) for clean, filtered in zip(y_wave_batch, kalman_batch)]
            kalman_snr_scores = [calculate_snr(clean, filtered) for clean, filtered in zip(y_wave_batch, kalman_batch)]
        
            # Att Calcula as métricas para os sinais filtrados
            att_pesq_scores = [pesq(8000, clean[:8064].reshape(-1), filtered.reshape(-1), 'nb') for clean, filtered in zip(y_wave_batch, att_batch)]
            att_stoi_scores = [pystoi.stoi(clean[:8064], filtered, 8000) for clean, filtered in zip(y_wave_batch, att_batch)]
            att_snr_scores = [calculate_snr(clean[:8064], filtered) for clean, filtered in zip(y_wave_batch, att_batch)]
        
            # FCRN Calcula as métricas para os sinais filtrados
            fcrn_pesq_scores = [pesq(8000, clean[:8064].reshape(-1), filtered.reshape(-1), 'nb') for clean, filtered in zip(y_wave_batch, fcrn_batch)]
            fcrn_stoi_scores = [pystoi.stoi(clean[:8064], filtered, 8000) for clean, filtered in zip(y_wave_batch, fcrn_batch)]
            fcrn_snr_scores = [calculate_snr(clean[:8064], filtered) for clean, filtered in zip(y_wave_batch, fcrn_batch)]
        
            # PRIDNet Calcula as métricas para os sinais filtrados
            pridnet_pesq_scores = [pesq(8000, clean[:8064].reshape(-1), filtered.reshape(-1), 'nb') for clean, filtered in zip(y_wave_batch, pridnet_batch)]
            pridnet_stoi_scores = [pystoi.stoi(clean[:8064], filtered, 8000) for clean, filtered in zip(y_wave_batch, pridnet_batch)]
            pridnet_snr_scores = [calculate_snr(clean[:8064], filtered) for clean, filtered in zip(y_wave_batch, pridnet_batch)]
        
        
            
            # Juntar no dataframe
            
            # Wiener Adiciona as métricas dos sinais filtrados ao DataFrame
            metrics_batch_df['PESQ (Wiener)'] = wiener_pesq_scores
            metrics_batch_df['STOI (Wiener)'] = wiener_stoi_scores
            metrics_batch_df['SNR (Wiener)'] = wiener_snr_scores
        
            # Kalman Adiciona as métricas dos sinais filtrados ao DataFrame
            metrics_batch_df['PESQ (Kalman)'] = kalman_pesq_scores
            metrics_batch_df['STOI (Kalman)'] = kalman_stoi_scores
            metrics_batch_df['SNR (Kalman)'] = kalman_snr_scores
        
            # Att Adiciona as métricas dos sinais filtrados ao DataFrame
            metrics_batch_df['PESQ (Attention Res U-Net)'] = att_pesq_scores
            metrics_batch_df['STOI (Attention Res U-Net)'] = att_stoi_scores
            metrics_batch_df['SNR (Attention Res U-Net)'] = att_snr_scores
        
            # FCRN Adiciona as métricas dos sinais filtrados ao DataFrame
            metrics_batch_df['PESQ (FCRN)'] = fcrn_pesq_scores
            metrics_batch_df['STOI (FCRN)'] = fcrn_stoi_scores
            metrics_batch_df['SNR (FCRN)'] = fcrn_snr_scores
        
            # PRIDNet Adiciona as métricas dos sinais filtrados ao DataFrame
            metrics_batch_df['PESQ (PRIDNet)'] = pridnet_pesq_scores
            metrics_batch_df['STOI (PRIDNet)'] = pridnet_stoi_scores
            metrics_batch_df['SNR (PRIDNet)'] = pridnet_snr_scores
        
            df_resultado = pd.concat([df_resultado, metrics_batch_df], ignore_index=True)

            # Salvar sons
            sound_name = "som_"+str(it)
            
            # Salvar arquivos de áudio
            save_audio_with_metrics(y_wave_batch[0], f"{sound_name}_limpo", base_folder)
            save_audio_with_metrics(x_wave_batch[0], f"{sound_name}_ruidoso", base_folder, SNR=metrics_batch_df['SNR'][0], PESQ=metrics_batch_df['PESQ'][0], STOI=metrics_batch_df['STOI'][0])
            save_audio_with_metrics(wiener_batch[0], f"{sound_name}_wiener", base_folder, SNR=wiener_snr_scores[0], PESQ=wiener_pesq_scores[0], STOI=wiener_stoi_scores[0])
            save_audio_with_metrics(kalman_batch[0], f"{sound_name}_kalman", base_folder, SNR=kalman_snr_scores[0], PESQ=kalman_pesq_scores[0], STOI=kalman_stoi_scores[0])
            save_audio_with_metrics(att_batch[0], f"{sound_name}_attresunet", base_folder, SNR=att_snr_scores[0], PESQ=att_pesq_scores[0], STOI=att_stoi_scores[0])
            save_audio_with_metrics(fcrn_batch[0], f"{sound_name}_fcrn", base_folder, SNR=fcrn_snr_scores[0], PESQ=fcrn_pesq_scores[0], STOI=fcrn_stoi_scores[0])
            save_audio_with_metrics(pridnet_batch[0], f"{sound_name}_pridnet", base_folder, SNR=pridnet_snr_scores[0], PESQ=pridnet_pesq_scores[0], STOI=pridnet_stoi_scores[0])

            it += 1
    except:
        continue

100%|██████████| 1/1 [00:30<00:00, 30.94s/it]

Saved: /tf/Resultado - Sons Filtrados/som_1_limpo.wav
Saved: /tf/Resultado - Sons Filtrados/som_1_ruidoso_SNR_11.76_PESQ_2.29_STOI_0.93.wav
Saved: /tf/Resultado - Sons Filtrados/som_1_wiener_SNR_11.34_PESQ_1.84_STOI_0.89.wav
Saved: /tf/Resultado - Sons Filtrados/som_1_kalman_SNR_12.01_PESQ_2.39_STOI_0.93.wav
Saved: /tf/Resultado - Sons Filtrados/som_1_attresunet_SNR_10.12_PESQ_2.80_STOI_0.94.wav
Saved: /tf/Resultado - Sons Filtrados/som_1_fcrn_SNR_9.97_PESQ_3.08_STOI_0.95.wav
Saved: /tf/Resultado - Sons Filtrados/som_1_pridnet_SNR_9.49_PESQ_3.49_STOI_0.95.wav





In [39]:
df_resultado.shape

(127, 18)

In [18]:
w = np.array(wiener_execution_times)
k = np.array(kalman_execution_times)
at = np.array(att_execution_times)
fc = np.array(fcrn_execution_times)
prid = np.array(pridnet_execution_times)

In [36]:
print(f'Wiener {w.shape}')
print(f'Kalman {k.shape}')
print(f'Atte {at.shape}')
print(f'FCRN {fc.shape}')
print(f'PRID {prid.shape}')

Wiener (92,)
Kalman (90,)
Atte (11493,)
FCRN (11493,)
PRID (11493,)


In [40]:
print(f'Wiener {w.mean()*1e03}')
print(f'Kalman {k.mean()*1e03}')
print(f'Atte {at.mean()*1e03}')
print(f'FCRN {fc.mean()*1e03}')
print(f'PRID {prid.mean()*1e03}')

Wiener 0.49234325390147127
Kalman 31.995118164491874
Atte 56.09748888957555
FCRN 87.64949629473374
PRID 80.4183102666393


In [14]:
df_resultado

Unnamed: 0,PESQ,STOI,SNR,PESQ (Wiener),STOI (Wiener),SNR (Wiener),PESQ (Kalman),STOI (Kalman),SNR (Kalman),PESQ (Attention Res U-Net),STOI (Attention Res U-Net),SNR (Attention Res U-Net),PESQ (FCRN),STOI (FCRN),SNR (FCRN),PESQ (PRIDNet),STOI (PRIDNet),SNR (PRIDNet)
0,1.296263,0.775160,4.962552,1.125602,0.723287,5.530267,1.360447,0.780339,5.754963,1.394868,0.791704,8.082024,1.803185,0.816958,10.677764,1.947838,0.837218,10.930436
1,1.407392,0.863934,7.951278,1.376512,0.676396,7.785751,1.562695,0.875750,8.259641,2.160428,0.909643,12.341846,2.165346,0.919648,12.170937,2.851363,0.934347,12.391657
2,1.184606,0.673506,1.862845,1.154473,0.490274,1.458280,1.241539,0.682699,2.587217,1.586685,0.692993,7.239708,1.363692,0.724794,7.331686,1.959027,0.744906,8.531525
3,1.719569,0.951005,15.418911,1.788009,0.952284,15.506893,1.826130,0.954020,15.652736,2.584886,0.953096,14.929259,2.538016,0.956434,14.726611,3.179116,0.952863,13.248985
4,2.278465,0.864223,14.771183,2.333027,0.853745,15.254821,2.377969,0.866999,14.973914,2.296513,0.858088,13.100498,2.650498,0.859748,12.805365,3.104145,0.879569,12.070394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10850,1.750201,0.939401,9.300510,1.888451,0.914632,9.388430,1.983881,0.950928,9.656060,2.315873,0.955524,9.419830,2.350873,0.950698,9.418120,2.731254,0.955346,9.549722
10851,1.861786,0.529188,8.179628,1.788958,0.563420,8.772682,1.914153,0.563147,8.737069,1.690594,0.570096,10.338312,2.319134,0.619370,10.586369,2.179341,0.633971,11.715372
10852,2.738901,0.397229,4.810878,2.078522,0.410867,5.183040,3.004475,0.388723,5.293273,2.537107,0.393211,10.316987,2.994769,0.411846,9.547555,3.015818,0.400261,11.336930
10853,1.511017,0.863345,5.478723,1.523882,0.848525,5.269584,1.744922,0.877106,6.216210,2.381502,0.913785,10.589583,2.146684,0.877629,9.606663,3.083261,0.905983,10.221471


In [15]:
df = df_resultado

In [16]:
final_table = pd.DataFrame({
    'SNR': [df['SNR'].mean(), df['SNR (Wiener)'].mean(), df['SNR (Kalman)'].mean(), df['SNR (Attention Res U-Net)'].mean(), df['SNR (FCRN)'].mean(), df['SNR (PRIDNet)'].mean()],
    'PESQ': [df['PESQ'].mean(), df['PESQ (Wiener)'].mean(), df['PESQ (Kalman)'].mean(), df['PESQ (Attention Res U-Net)'].mean(), df['PESQ (FCRN)'].mean(), df['PESQ (PRIDNet)'].mean()],
    'STOI': [df['STOI'].mean(), df['STOI (Wiener)'].mean(), df['STOI (Kalman)'].mean(), df['STOI (Attention Res U-Net)'].mean(), df['STOI (FCRN)'].mean(), df['STOI (PRIDNet)'].mean()],
}, index=["Ruidoso", "Wiener", "Kalman", "Attention Res U-Net", "FCRN com PESQNet", "PRIDNet"])

In [17]:
final_table

Unnamed: 0,SNR,PESQ,STOI
Ruidoso,9.039147,2.03647,0.834623
Wiener,9.915601,1.898672,0.810032
Kalman,9.517446,2.134114,0.837591
Attention Res U-Net,10.128784,2.611082,0.87993
FCRN com PESQNet,9.952261,2.581096,0.876949
PRIDNet,8.526181,2.994734,0.887201


In [None]:
# # Criando figuras e eixos separados para cada array
# fig, (ax1, ax2, ax3) = plt.subplots(3, 1)

# # Plotando o primeiro array
# ax1.plot(y_wave_batch[0][-20:])
# ax1.set_ylabel('Sinal de voz ruidoso')

# # Plotando o primeiro array
# ax2.plot(att_batch[0][-20:])
# ax2.set_ylabel('Sinal de voz ruidoso')

# # ax3.plot(s_f)
# # ax3.set_ylabel('Sinal de voz filtrado')

# # Exibindo os gráficos
# plt.show()

In [33]:
from datetime import datetime
# Get the current datetime
current_datetime = datetime.now()

# Format the datetime as a string to use in the file name
datetime_str = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")

# Define the file name with the datetime
file_name = f"compilado-v3_{datetime_str}.xlsx"

In [34]:
df_resultado[df_resultado['SNR'] != np.inf].to_excel(file_name, index=False)