This notebook contains audio util class that has methods to perform preprocessing on raw audio files like converting raw audio to spectrogram and vice-versa

In [1]:
import librosa
import numpy as np

In [2]:
class AudioUtils():
  
  def wave_to_spectrogram(self, file_name, n_fft = 2048):
    """
    Function Parameters:
    file_name, str: Name or path to file.
    n_fft, int: Window size for fast fourier transformation.
    Return:
    s: Spectrogram matrix
    sampling_rate: Rate of sampling
    """
    audio_time_series , sampling_rate  = librosa.load(file_name)
    
    s = librosa.stft(audio_time_series, n_fft)
    p = np.angle(s)
    
    s = np.log1p(np.abs(s[:,:430]))  
    
    return s, sampling_rate
  
  
  def spectrogram_to_wave(self, a, n_fft, output_file_name, sampling_rate):
    """
    Function Parameters:
    a: signal  
    n_fft: FFT window size
    output_file_name: name of the file to write to
    sampling_rate: Rate of sampling
    """
    # This code is supposed to do phase reconstruction
    p = 2 * np.pi * np.random.random_sample(a.shape) - np.pi
    for i in range(500):
        S = a * np.exp(1j*p)
        x = librosa.istft(S)
        p = np.angle(librosa.stft(x, n_fft))

    librosa.output.write_wav(output_file_name, x, sampling_rate)