<a href="https://colab.research.google.com/github/advaitkumar3107/Speech-Denoising-Using-Deep-Learning/blob/master/RCNN_speech_denoising.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torchvision
import torch.nn as nn
import torchvision.datasets as datasets
import os
import math
import glob
import sys
import scipy
import random
import pywt
import time
from PIL import Image
from torch.nn import init
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import cv2
import librosa
import librosa.display
from tqdm import tqdm_notebook
from scipy import signal
from scipy.io.wavfile import read, write
from numpy.fft import fft, ifft
from google.colab import drive
from torch.autograd import Variable
from IPython.display import Audio
drive.mount('/content/gdrive')
%cd /content/gdrive/My\ Drive/sample_audio_dataset

torch.cuda.manual_seed(7)
torch.manual_seed(7)
np.random.seed(7)
torch.cuda.empty_cache()

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/My Drive/sample_audio_dataset


In [None]:
class AudioDataset(torch.utils.data.Dataset):
  def __init__(self,ids):
    self.ids = ids
    self.inputs = []
    self.targets = []
    self.length = len(self.ids) // 16

    self.random_ids = random.sample(self.ids, self.length)

 #   self.mean = mean
 #   self.std = std
 #   self.mean_target = mean_target
 #   self.std_target = std_target
    
    for id_ in self.random_ids:
      input_location = 'noisy_dataset/noisy_trainset_56spk_wav/' + id_
      target_location = 'clean_dataset/' + id_
      
#      y, sr = librosa.load(input_location)
#      D = y
#      D = torch.from_numpy(D)
#      input_ = D.unsqueeze_(0)
#      self.inputs.append(input_)
      y, _ = librosa.load(input_location)
      ca, cd = pywt.dwt(y, 'db1')
      ca, cd = torch.from_numpy(ca), torch.from_numpy(cd)
      input_ = torch.cat((ca.unsqueeze_(1), cd.unsqueeze_(1)), axis = 1)
      self.inputs.append(input_)

#      y, sr = librosa.load(target_location)
#      D = torch.from_numpy(y)
#      target = D.unsqueeze_(0)
#      target = input_ - target
#      self.targets.append(target)
      y, _ = librosa.load(target_location)
      ca, cd = pywt.dwt(y, 'db1')
      ca, cd = torch.from_numpy(ca), torch.from_numpy(cd)
      target = torch.cat((ca.unsqueeze_(1), cd.unsqueeze_(1)), axis = 1)
      self.targets.append(target)


  def __len__(self):
    return self.length

  def __getitem__(self,index):
    input_ = self.inputs[index]
    target = self.targets[index]

    return input_, target

In [None]:
def dataset_loader(dataset, train_percent, batch_size):
  length = len(dataset)
  train_length = int(train_percent*length/100)
  val_length = length - train_length
  train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_length, val_length])

  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, True)
  val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, False)

  return train_dataloader, val_dataloader

In [None]:
class rnn_model(nn.Module):
  def __init__(self, input_dim, hidden_dim, batch_size, batch_first = True, dropout = 0.5):
    super(rnn_model, self).__init__()
    self.lstm1 = nn.LSTM(input_size = input_dim, hidden_size = hidden_dim[0], num_layers = 1, bias = True, batch_first = batch_first, dropout = dropout, bidirectional = True)
    self.linear1 = nn.Linear(2*hidden_dim[0], hidden_dim[0])

    self.lstm2 = nn.LSTM(input_size = hidden_dim[0], hidden_size = hidden_dim[1], num_layers = 1, bias = True, batch_first = batch_first, dropout = dropout, bidirectional = True)
    self.linear2 = nn.Linear(2*hidden_dim[1], hidden_dim[1])

    self.lstm3 = nn.LSTM(input_size = hidden_dim[1], hidden_size = hidden_dim[2], num_layers = 1, bias = True, batch_first = batch_first, dropout = dropout, bidirectional = True)
    self.linear3 = nn.Linear(2*hidden_dim[2], hidden_dim[2])

    self.forward_hidden_state0 = nn.Parameter(torch.randn(2,batch_size,hidden_dim[0]).type(torch.FloatTensor), requires_grad = True)
    self.forward_cell_state0 = nn.Parameter(torch.randn(2,batch_size,hidden_dim[0]).type(torch.FloatTensor), requires_grad = True)

    self.forward_hidden_state1 = nn.Parameter(torch.randn(2,batch_size,hidden_dim[1]).type(torch.FloatTensor), requires_grad = True)
    self.forward_cell_state1 = nn.Parameter(torch.randn(2,batch_size,hidden_dim[1]).type(torch.FloatTensor), requires_grad = True)

    self.forward_hidden_state2 = nn.Parameter(torch.randn(2,batch_size,hidden_dim[2]).type(torch.FloatTensor), requires_grad = True)
    self.forward_cell_state2 = nn.Parameter(torch.randn(2,batch_size,hidden_dim[2]).type(torch.FloatTensor), requires_grad = True)


  def forward(self, input_):
#    print(input_.size())
    forward_hidden_states = (self.forward_hidden_state0, self.forward_cell_state0)
    output,_ = self.lstm1(input_, forward_hidden_states)
#    print(output.size())
    output = self.linear1(output)
    forward_hidden_states = (self.forward_hidden_state1, self.forward_cell_state1)
    output,_ = self.lstm2(output, forward_hidden_states)
    output = self.linear2(output)

    forward_hidden_states = (self.forward_hidden_state2, self.forward_cell_state2)
    output,_ = self.lstm3(output, forward_hidden_states)
    output = self.linear3(output)

    return output

In [None]:
def train(model, dataloader, optimizer, loss):
  model.train()
  train_losses.append(0)
  progbar = tqdm_notebook(total = len(dataloader), desc = 'Train')

  for i, (inputs, targets) in enumerate(dataloader):
    optimizer.zero_grad()
    inputs, targets = inputs.cuda(), targets.cuda()
    outputs = model(inputs)
#    outputs = unpack_outputs(outputs)
    error = loss(outputs, targets)
    error.backward()
    optimizer.step()

    train_losses[-1] = train_losses[-1] + error.data
    progbar.set_description('Train (loss=%.4f)' % (train_losses[-1]/(i+1)))
    progbar.update(1)

  train_losses[-1] = train_losses[-1]/len(dataloader)

    
def val(model, dataloader, loss):
  global best_loss
  model.eval()
  val_losses.append(0)
  progbar = tqdm_notebook(total = len(dataloader), desc = 'Val')

  for i, (inputs, targets) in enumerate(dataloader):
    optimizer.zero_grad()
    inputs, targets = inputs.cuda(), targets.cuda()
    outputs = model(inputs)
#    outputs = unpack_outputs(outputs)
    error = loss(outputs, targets)

    val_losses[-1] = val_losses[-1] + error.data
    progbar.set_description('Val (loss=%.4f)' % (val_losses[-1]/(i+1)))
    progbar.update(1)

  val_losses[-1] = val_losses[-1]/len(dataloader)

  if val_losses[-1] < best_loss:
    best_loss = error
    checkpoints = {'model' : model}
    print("SAVING.......................")
    torch.save(checkpoints, 'rnn_denoising_best.ckpt.t7')

In [None]:
batch_size = 1
train_percent = 80
input_dim = 2
hidden_dim = [8,4,2]

In [None]:
ids = os.listdir('clean_dataset')
dataset = AudioDataset(ids)

OSError: ignored

In [None]:
train_loader, val_loader = dataset_loader(dataset, train_percent, batch_size)

In [None]:
checkpoints = torch.load('rnn_denoising_load.ckpt.t7')
model = checkpoints['model']
epochs = 2000 - checkpoints['epoch']
#model = rnn_model(input_dim, hidden_dim, batch_size).cuda()
loss = nn.L1Loss(reduction = 'sum')
optimizer = torch.optim.Adam(model.parameters(), lr = 0.00001)
#best_loss = 1e5
best_loss = checkpoints['best_loss']
train_losses = []
val_losses = []

In [None]:
for epoch in range(epochs):
  train(model, train_loader, optimizer, loss)

  checkpoints = {'model' : model, 'best_loss' : best_loss, 'epoch' : epoch}
  torch.save(checkpoints, 'rnn_denoising_load.ckpt.t7')

  val(model, val_loader, loss)

In [None]:
linear1 = nn.Linear(10,20)
a = torch.ones((1,3,10))
output = linear1(a)
output.size()

In [None]:
lengths = get_sizes(d)
sorted_lengths = get_sizes(e)
packed_inputs = nn.utils.rnn.pack_padded_sequence(padded_inputs_sorted, sorted_lengths, enforce_sorted = True)

In [None]:
a = torch.randn((23433,2))
b = torch.randn((52321,2))
c = torch.randn((33232,2))

d = [a,b,c]

In [None]:
packed_train_inputs.data.size()