# **Audio Denoising using fully-connected network**

In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import torchvision
from torch import nn, optim
import torch.nn.functional as F

In [4]:
import librosa

s, sr=librosa.load('/content/drive/MyDrive/Colab Notebooks/HW1/data/train_clean_male.wav', sr=None)
S=librosa.stft(s, n_fft=1024, hop_length=512)
sn, sr=librosa.load('/content/drive/MyDrive/Colab Notebooks/HW1/data/train_dirty_male.wav', sr=None)
X=librosa.stft(sn, n_fft=1024, hop_length=512)

S_abs = torch.tensor(np.abs(S))
X_abs = torch.tensor(np.abs(X))
print("S_abs: ", S_abs.shape)
print("X_abs: ", X_abs.shape)

S_abs = S_abs.t()
X_abs = X_abs.t()
print("S_abs (transpose): ", S_abs.shape)
print("X_abs (transpose): ", X_abs.shape)

S_abs:  torch.Size([513, 2459])
X_abs:  torch.Size([513, 2459])
S_abs (transpose):  torch.Size([2459, 513])
X_abs (transpose):  torch.Size([2459, 513])


In [5]:
# Create a dataset and dataloader
import torch.utils.data as utils

dataset = utils.TensorDataset(X_abs, S_abs)
train_loader = utils.DataLoader(dataset, batch_size=50, shuffle=True)

# a, b = next(iter(train_loader))
# print(a.shape, b.shape)

In [6]:
class Denoiser(nn.Module):
    def __init__(self):
        super(Denoiser, self).__init__()
        self.input = nn.Linear(513, 1026)
        self.fc1 = nn.Linear(1026, 1026)
        self.fc2 = nn.Linear(1026, 1026)
        self.fc3 = nn.Linear(1026, 1026)
        self.out = nn.Linear(1026, 513)
    
    def forward(self, x):    
        x = F.relu(self.input(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.out(x))
        return x

def weights_init(layer):
    if isinstance(layer, nn.Linear):
        nn.init.xavier_uniform_(layer.weight.data)
        nn.init.zeros_(layer.bias.data)

In [7]:
model2 = Denoiser()
model2.apply(weights_init)
print(model2)

err_func = nn.SmoothL1Loss()
optimizer = optim.Adam(model2.parameters(), lr=0.0001)

Denoiser(
  (input): Linear(in_features=513, out_features=1026, bias=True)
  (fc1): Linear(in_features=1026, out_features=1026, bias=True)
  (fc2): Linear(in_features=1026, out_features=1026, bias=True)
  (fc3): Linear(in_features=1026, out_features=1026, bias=True)
  (out): Linear(in_features=1026, out_features=513, bias=True)
)


In [8]:
# Training the network
epochs = 30
 
for e in range(1, epochs+1):
    train_loss_epoch = 0
    for row, target in train_loader:
        # set the gradients to zero as PyTorch automatically accumulates gradients
        optimizer.zero_grad()
        
        # get the output from the model
        outs = model2(row)
        
        # calculate loss
        loss = err_func(outs, target)
        
        # Calculate gradients
        loss.backward()
        
        # Propagate gradients back
        optimizer.step()
        
        train_loss_epoch += loss.item()
    else:
        print(f"Overall Training loss for epoch {e}: {round(train_loss_epoch/len(train_loader),4)}")

Overall Training loss for epoch 1: 0.0182
Overall Training loss for epoch 2: 0.0123
Overall Training loss for epoch 3: 0.0084
Overall Training loss for epoch 4: 0.0066
Overall Training loss for epoch 5: 0.0057
Overall Training loss for epoch 6: 0.0049
Overall Training loss for epoch 7: 0.0043
Overall Training loss for epoch 8: 0.0041
Overall Training loss for epoch 9: 0.0041
Overall Training loss for epoch 10: 0.0039
Overall Training loss for epoch 11: 0.0034
Overall Training loss for epoch 12: 0.0032
Overall Training loss for epoch 13: 0.0031
Overall Training loss for epoch 14: 0.0029
Overall Training loss for epoch 15: 0.0028
Overall Training loss for epoch 16: 0.0028
Overall Training loss for epoch 17: 0.0028
Overall Training loss for epoch 18: 0.0026
Overall Training loss for epoch 19: 0.0025
Overall Training loss for epoch 20: 0.0024
Overall Training loss for epoch 21: 0.0024
Overall Training loss for epoch 22: 0.0023
Overall Training loss for epoch 23: 0.0023
Overall Training los

In [10]:
t1, tr1=librosa.load('/content/drive/MyDrive/Colab Notebooks/HW1/data/test_x_01.wav', sr=None)
test_01=librosa.stft(t1, n_fft=1024, hop_length=512)
t2, tr2=librosa.load('/content/drive/MyDrive/Colab Notebooks/HW1/data/test_x_02.wav', sr=None)
test_02=librosa.stft(t2, n_fft=1024, hop_length=512)

test_audio_01 = torch.tensor(np.abs(test_01))
test_audio_02 = torch.tensor(np.abs(test_02))
print("test_audio_01: ", test_audio_01.shape)
print("test_audio_02: ", test_audio_02.shape)

test_audio_01 = test_audio_01.t()
test_audio_02 = test_audio_02.t()
print("test_audio_01 (transpose): ", test_audio_01.shape)
print("test_audio_02 (transpose): ", test_audio_02.shape)

test_audio_01:  torch.Size([513, 142])
test_audio_02:  torch.Size([513, 380])
test_audio_01 (transpose):  torch.Size([142, 513])
test_audio_02 (transpose):  torch.Size([380, 513])


In [11]:
test_01_dataset = utils.TensorDataset(test_audio_01, test_audio_01)
test_01_loader = utils.DataLoader(test_01_dataset, batch_size=50, shuffle=False)

test_02_dataset = utils.TensorDataset(test_audio_02, test_audio_02)
test_02_loader = utils.DataLoader(test_02_dataset, batch_size=50, shuffle=False)

test_01_list = []
with torch.no_grad():
    for row, row_copy in test_01_loader:
        outs = model2(row)
        test_01_list.append(outs)
test_01_output = torch.cat(test_01_list, dim=0)

test_02_list = []
with torch.no_grad():
    for row, row_copy, in test_02_loader:
        outs = model2(row)
        test_02_list.append(outs)
test_02_output = torch.cat(test_02_list, dim=0)

def signal_cleaner(x_org, output):
    temp = x_org/np.abs(x_org)
    return np.multiply(temp, output.numpy())

clean_test_01 = signal_cleaner(test_01, test_01_output.t())
print("Shape of clean_test_01: ", clean_test_01.shape)
test_01_istft = librosa.core.istft(clean_test_01, hop_length=512)

clean_test_02 = signal_cleaner(test_02, test_02_output.t())
print("Shape of clean_test_02: ", clean_test_02.shape)
test_02_istft = librosa.core.istft(clean_test_02, hop_length=512)

Shape of clean_test_01:  (513, 142)
Shape of clean_test_02:  (513, 380)


In [13]:
!pip install soundfile
import soundfile

soundfile.write("/content/drive/MyDrive/Colab Notebooks/HW1/result_01.wav", test_01_istft, tr1)
soundfile.write("/content/drive/MyDrive/Colab Notebooks/HW1/result_02.wav", test_02_istft, tr2)

Collecting soundfile
  Downloading https://files.pythonhosted.org/packages/eb/f2/3cbbbf3b96fb9fa91582c438b574cff3f45b29c772f94c400e2c99ef5db9/SoundFile-0.10.3.post1-py2.py3-none-any.whl
Installing collected packages: soundfile
Successfully installed soundfile-0.10.3.post1


In [15]:
# result for test_x_01.wav
import IPython.display as ipd
ipd.Audio('/content/drive/MyDrive/Colab Notebooks/HW1/result_01.wav')

In [16]:
# result for test_x_02.wav
import IPython.display as ipd
ipd.Audio('/content/drive/MyDrive/Colab Notebooks/HW1/result_02.wav')