In [13]:
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as layers
import matplotlib.pyplot as plt
import pathlib

from model import *

import librosa
import os
import IPython.display as ipd
import pesq

devices=tf.config.list_physical_devices('GPU')
for i in range(len(devices)):
  tf.config.experimental.set_memory_growth(devices[i],True)

In [14]:
g_opt=tf.keras.optimizers.RMSprop(learning_rate=0.0002)
d_opt=tf.keras.optimizers.RMSprop(learning_rate=0.0002)

ATT_model=ATT(512)
ATT_model.compile(g_opt,g_opt)
ATT_model.load_weights('checkpoints/')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f420c4cace0>

In [15]:
time=2
def padding_and_reshape(noisy):
    pad_count=noisy.shape[0]%(time*16000)
    padding=np.zeros([(time*16000)-pad_count])
    noisy_=np.concatenate([noisy,padding])
    return np.reshape(noisy_,(-1,(time*16000)))
        
def enhance_audio(generator,audio):
    audio1=padding_and_reshape(audio)
    noisy_stft=librosa.stft(audio1,n_fft=512,win_length=512,hop_length=256)
    noisy_stft=np.transpose(noisy_stft,axes=[0,2,1])

    amp=np.abs(noisy_stft)
    # amp=np.expand_dims(amp,axis=0)
    angle=np.angle(noisy_stft)

    enhanced=generator.predict(amp)
    # enhanced=np.squeeze(enhanced,axis=0)

    enhanced_stft=np.transpose(enhanced*(np.exp(angle*1j)),axes=[0,2,1])
    enhanced=librosa.istft(enhanced_stft,n_fft=512,win_length=512,hop_length=256)
    enhanced=np.reshape(enhanced,(-1))
    enhanced=enhanced[:audio.shape[0]]
        
    return enhanced

def print_spectogram(audio):
    audio_stft=librosa.stft(audio,n_fft=1024)
    audio_db=librosa.amplitude_to_db(np.abs(audio_stft),ref=np.max)
    return audio_db

In [16]:
directory='/home/skdm/Aniket/Datasets/Voicebank_DEMAND/test/noisy','/home/skdm/Aniket/Datasets/Voicebank_DEMAND/test/clean'
audios=os.listdir(directory[0])
noisy_=np.array([os.path.join(directory[0],audios)for audios in audios])
clean_=np.array([os.path.join(directory[1],audios)for audios in audios])
noisy_clean=np.stack([noisy_,clean_],axis=1)
dataset=tf.data.Dataset.from_tensor_slices(noisy_clean)
it=iter(dataset)

In [17]:
pesq_enhanced_ATT=[]
# pesq_enhanced_transformer=[]
pesq_noise=[]
it=iter(dataset)
# tf.keras.Model.load_weights(ATT_model,'2906_Model_ATT_v3.ckpt')

for i in range(824):
    noisy_data,clean_data=next(it)

    noise,sr=librosa.load(noisy_data.numpy().decode())
    clean,sr2=librosa.load(clean_data.numpy().decode())
    noise=librosa.resample(noise,orig_sr=sr,target_sr=16000)
    clean=librosa.resample(clean,orig_sr=sr2,target_sr=16000)

    enhanced_ATT=enhance_audio(ATT_model.generator,noise)
    # enhanced_transformer=enhance_audio(transformer,noise)



    # Calculate PESQ score
    pesq_score_enhanced1 = pesq.pesq(16000, clean, enhanced_ATT) 
    # pesq_score_enhanced2 = pesq.pesq(16000, clean, enhanced_transformer)  
    pesq_score_noise = pesq.pesq(16000, clean, noise) 

    pesq_enhanced_ATT.append(pesq_score_enhanced1)
    # pesq_enhanced_transformer.append(pesq_score_enhanced2)
    pesq_noise.append(pesq_score_noise)

ATT_pesq=np.array(pesq_enhanced_ATT).mean()
# transformer_pesq=np.array(pesq_enhanced_transformer).mean()
noise_pesq=np.array(pesq_noise).mean()

ATT_pesq,noise_pesq



(2.516441949796908, 1.9673282907715122)

In [18]:
# tf.keras.Model.load_weights(GAN_model,'2705_GAN')
noisy_data,clean_data=next(it)
# import IPython.display as ipd
# import pesq

noise,sr=librosa.load(noisy_data.numpy().decode())
clean,sr2=librosa.load(clean_data.numpy().decode())
# noise,sr=librosa.load('D:\\Mtech\\Project\\Speech Enhancement\\Dataset\\mix\\p236_300.wav')
# clean,sr2=librosa.load('D:\\Mtech\\Project\\Speech Enhancement\\Dataset\\clean\\p236_300.wav')
noise=librosa.resample(noise,orig_sr=sr,target_sr=16000)
clean=librosa.resample(clean,orig_sr=sr2,target_sr=16000)

enhanced_time=enhance_audio(ATT_model.generator,noise)

print("pesq of noisy",pesq.pesq(16000,clean,noise))
print("pesq of Enhanced_time",pesq.pesq(16000,clean,enhanced_time))
print("pesq of clean",pesq.pesq(16000,clean,clean))


# plt.figure(figsize=(15,5))
# plt.subplot(1,3,1)
# librosa.display.specshow(print_spectogram(noise))
# plt.title("Noisy")
# plt.subplot(1,3,2)
# librosa.display.specshow(print_spectogram(enhanced_time))
# plt.title("Enhanced")
# plt.subplot(1,3,3)
# librosa.display.specshow(print_spectogram(clean))
# plt.title("clean")
# plt.show()

StopIteration: 

In [None]:
ipd.Audio(enhanced_time,rate=16000)