# Short time fourier transforming wav files

## Imports and drive mounting

Librosa is a popular audio signal processing library that we used for handling wav files. os is used for handling file systems

In [0]:
import librosa
import numpy as np
import os

In [2]:
# mounting google drive to reach its contents --e.g. wav files
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


## Reading file names

In [3]:
from os import listdir
from os.path import isfile, join

# the base folder contains the 5[s] splits of the audiobooks in .wav format
baseFolder = "/content/gdrive/My Drive/Nagy házi/audiobooks/utterances_5s"
baseFileNames = [(baseFolder + os.sep + f) for f in listdir(baseFolder) if isfile(join(baseFolder, f))]
print(len(baseFileNames))

3349


## Executing short time fourier transformation


Some of the following explanations are copied from the librosa documentation: 
link: http://librosa.github.io/librosa/generated/librosa.core.stft.html?highlight=stft#librosa.core.stft 

In [0]:
# The stft folder contains the short time fourier transforms of the 5[s] signal slpits. 
stftFolder = "single_stft"
# default sample rate: 22050Hz
sampleRate = 22050

# Length of the windowed signal after padding with zeros. The number of rows in the STFT matrix D is (1 + n_fft/2). 
# The default value, n_fft=2048 samples, corresponds to a physical duration of 93 milliseconds at a sample rate of 22050 Hz, 
# i.e. the default sample rate in librosa.
nFFT = 512

# Number of audio samples between adjacent STFT columns.
hopLength = int(nFFT / 2)

# Each frame of audio is windowed by window() of length win_length and then padded with zeros to match n_fft.
winLength = nFFT

counter = 0
print("Processing wav files...")
for bfn in baseFileNames:
  # Processing the file paths
  path = os.path.normpath(bfn)
  filename = os.path.splitext(path)[0].split(os.sep)[-1]
  tree = path.split(os.sep)[:-2]
  tree.append(stftFolder)
  path = os.sep.join(tree) + os.sep + filename
  
  # processing
  y, sr = librosa.load(bfn, sr = sampleRate)
  # http://librosa.github.io/librosa/generated/librosa.core.stft.html?highlight=stft#librosa.core.stft
  stftArray = librosa.stft(y, n_fft=nFFT, hop_length=hopLength, win_length=winLength,
                           window='hann', center=True, dtype=np.complex64, pad_mode='reflect')
  # saving created data
  # https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html
  np.save(path, stftArray, allow_pickle=False, fix_imports=True)
  counter += 1
  print(str(counter/len(baseFileNames)*100) + "%")
  
print("Processing finished")