<a href="https://colab.research.google.com/github/jakobatgithub/unreverb/blob/main/PreprocessAndSaveData_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import glob

path = '/content/drive/My Drive/dsr_project/data/HarvardWordList/'
save_path = path + 'datasets/'
audiopaths = glob.glob(path + '*.wav')
audiopaths

In [None]:
import glob

path = '/content/drive/My Drive/dsr_project/data/but-czas_v1.0/'
save_path = path + 'datasets/'
audiopaths = glob.glob(path + 'wavs/*/F*.wav')
audiopaths

In [None]:
import glob

ir_base_path = '/content/drive/My Drive/dsr_project/data/samplicity-ir/'
#irpaths = glob.glob(ir_base_path + '*Rooms*[0]2*/*M-to-S.wav')
irpaths = glob.glob(ir_base_path + '*/*M-to-S.wav')
irpaths

In [None]:
# this is done here:
#   we load the audio data without reverb - the input
#   we load the impulse response (IR) functions
#   we generate the audio data with reverb by convolving all impulse response functions with all audio data without reverb
#   we transform all data to tensorflow datasets
#   one can solve two different tasks:
#     classification: which impulse response function is used
#     regression: remove the reverb from the input
#   correspondingly, we can generate two different datasets here:
#     one with the labels (consequtive numbers of the IRs) as the targets for classification
#     one with the audios without reverb as the targets (commented out)

import numpy as np
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio 
from IPython.core.display import display
from numpy.fft import fft, ifft
import tensorflow as tf
import tensorflow_datasets as tfds
import os, shutil

# sampling rate for resampling
sample_rate = 2**14

# all audio input should have the same length
# we split all audio data into chunks of length n
# n = 2*sample_rate corresponds to 2 seconds
# correspondingly, all audio data without reverb must be at least 2 seconds long
n = sample_rate

#save_path = '/content/drive/My Drive/dsr_project/data/HarvardWordList/datasets/'

# paths for audio without reverb
#audiopaths = ['/content/drive/My Drive/dsr_project/data/HarvardWordList/SIHarvardWordListsFemale.wav',
#              '/content/drive/My Drive/dsr_project/data/HarvardWordList/SIHarvardWordListsMale.wav']

# paths to IR functions
#irpaths = ["/content/drive/My Drive/dsr_project/data/r1-nuclear-reactor-hall/b-format/r1_bformat.wav", 
#           "/content/drive/My Drive/dsr_project/data/arthur-sykes-rymer-auditorium-university-york/b-format/s1r2.wav",
#           "/content/drive/My Drive/dsr_project/data/trollers-gill/b-format/dales_site1_1way_bformat.wav"]

# function to delete all content in a folder
def delete_folder_contents(folder):
  for filename in os.listdir(folder):
      file_path = os.path.join(folder, filename)
      try:
          if os.path.isfile(file_path) or os.path.islink(file_path):
              os.unlink(file_path)
          elif os.path.isdir(file_path):
              shutil.rmtree(file_path)
      except Exception as e:
          print('Failed to delete %s. Reason: %s' % (file_path, e))  

# function to convolve IRs with audio
def my_conv(signal, kernel):
  kernel_padded = np.hstack((kernel, np.zeros(len(signal) - len(kernel))))
  convolved_signal = np.real(ifft(fft(kernel_padded)*fft(signal)))
  return convolved_signal

# function to deconvolve IRs from audio
def wiener_deconvolution(signal, kernel, lambd=1e-3):
  kernel = np.hstack((kernel, np.zeros(len(signal) - len(kernel)))) # zero pad the kernel to same length
  H = fft(kernel)
  Y = fft(signal)
  #S = np.abs(fft(signal))**2
  #GY = Y*np.conj(H)*S/(H*np.conj(H)*S + lambd**2)
  GY = Y*np.conj(H)/(H*np.conj(H) + lambd**2)
  deconvolved = np.real(ifft(GY))
  return deconvolved


# placeholder lists
audios = []
audio_chunks = []

# loop over all audiofiles without reverb
for audiopath in audiopaths:
  # load audio without reverb 
  audio, audio_sample_rate = librosa.load(audiopath, sr=sample_rate)
  #print(f"audio.shape[0]: {audio.shape[0]}")

  # collect alls audio without reverb in a list
  audios.append(audio)

  # split audio into chunks
  split_audio = np.split(audio[:n*int(len(audio)//n)], int(len(audio)//n))
  split_audio = np.asarray(split_audio)

  # collect all audio chunks in a list
  audio_chunks.extend(split_audio)

# transform list to numpy array
audio_chunks = np.asarray(audio_chunks)  


In [None]:
# we convolve all audios with all IRs

# delete all content in save_path
#delete_folder_contents(save_path)

# check if there are already tf datasets
dataset_filenames = glob.glob(save_path + 'tf_IR_*')
labels = np.asarray([int(filename.split('_')[-1]) for filename in dataset_filenames])

# counter for irpaths
if labels.shape[0] > 0:
  iridx = np.max(labels) + 1
else:
  iridx = 0

# placeholder lists
irs = []
reverb_audios = []

for irpath in irpaths:
  
  print(f"iridx: {iridx}")

  # placeholder list
  reverb_audio_chunks = []

  # load IR functions
  #ir_raw, _ = my_get_ir_sample(irpath, resample=sample_rate)
  ir, IR_sample_rate = librosa.load(irpath, sr=sample_rate)
  
  # shorten all IR functions to the same length
  #ir = ir[0:int(sample_rate*2.0)]
      
  # collect IRs in a list
  irs.append(ir)
    
  # loop over all audios without reverb
  # counter for audios
  audioidx = 0
  for audio in audios:

    # convole audio without reverb with IRs to obtain audio with reverb
    reverb_audio = my_conv(audio, ir)
    #print(f"reverb_audio.shape[1]: {reverb_audio.shape[1]}")
      
    # collect audios with reverb in a list
    reverb_audios.append(reverb_audio)

    # split audio into chunks
    split_reverb_audio = np.split(reverb_audio[:n*int(len(reverb_audio)//n)], int(len(reverb_audio)//n))
    split_reverb_audio = np.asarray(split_reverb_audio)

    reverb_audio_chunks.extend(split_reverb_audio)
    
    # increase counter for audios
    audioidx = audioidx + 1

  # transform list to numpy array
  reverb_audio_chunks = np.asarray(reverb_audio_chunks)  
  
  features = reverb_audio_chunks
  targets = audio_chunks
  label = iridx
  labels = np.full(features.shape[0], label)
  print(f"features.shape: {features.shape}")
  print(f"targets.shape: {targets.shape}")
  print(f"labels.shape: {labels.shape}")
  dataset = tf.data.Dataset.from_tensor_slices((features, labels, targets))
  #dataset = tf.data.Dataset.from_tensor_slices((features, targets))
  
  # save the data as tensorflow dataset
  tf.data.experimental.save(dataset, save_path + 'tf_IR_' + str(iridx))
  tot_save_path = save_path + 'IR_' + str(iridx) + '_' + irpaths[0].split('/')[-1]
  #!cp "$irpath" "$tot_save_path"
  shutil.copy(irpath, tot_save_path)

  # increase counter for irpaths
  iridx = iridx + 1

In [None]:
audio_idx = 1
ir_idx = 1
reverb_idx = ir_idx*len(audios) + audio_idx
deconvolved = wiener_deconvolution(reverb_audios[reverb_idx], irs[ir_idx])
display(Audio(audios[audio_idx][:10*sample_rate], rate=sample_rate))
display(Audio(reverb_audios[reverb_idx][:10*sample_rate], rate=sample_rate))
display(Audio(deconvolved[:10*sample_rate], rate=sample_rate))

In [None]:
audio_length = len(audios)
ir_length = len(irs)
for audio_idx in range(audio_length):
  for ir_idx in range(ir_length):
    reverb_idx = ir_idx*audio_length + audio_idx
    print(f"audio_idx: {audio_idx}, ir_idx: {ir_idx}, reverb_idx: {ir_idx*audio_length + audio_idx}")

In [None]:
import glob

dataset_filenames = glob.glob(save_path + 'tf_IR_*')
labels = [int(filename.split('_')[-1]) for filename in dataset_filenames]
print(f"labels: {labels}")

target_datasets = []
for dataset_filename in dataset_filenames:
  # load the tensorflow dataset
  target_datasets.append(tf.data.experimental.load(dataset_filename))

target_dataset = target_datasets[0]
for i in range(2, len(target_datasets)):
  target_dataset = target_dataset.concatenate(target_datasets[i])

# determine size of the dataset
dataset_size = sum(1 for _ in target_dataset)
print(f"dataset_size: {dataset_size}")