# Imports and Google Drive mount

##Imports

In [1]:
!pip install soundfile



In [0]:
import numpy as np
import librosa
import soundfile as sf
import os

##Google Drive mount

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


##Read files names
Files names are read from the "audiobooks" folder, where the raw wav files are saved.

In [4]:
from os import listdir
from os.path import isfile, join

baseFolder = "/content/gdrive/My Drive/Nagy házi/audiobooks"
baseFileNames = [(baseFolder + os.sep + f) for f in listdir(baseFolder) if isfile(join(baseFolder, f))]
print(baseFileNames)

['/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_03_dickens__Rosalind_Wills.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_04_dickens__Joseph_Ugoretz.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_13_dickens__Debra_Lynn.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_02_dickens__Stewart_Wills.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_14_dickens__Zachary_Brewster-Geisz.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_15_dickens__Jemma_Blythe.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_01_dickens__Kristen_Ferreri.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_19_dickens__Graham_Thomsen.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_09_dickens__Stuart_Bell.wav', '/content/gdrive/My Drive/Nagy házi/audiobooks/hardtimes_05_dickens__Paul_Hansen.wav']


#Splitting WAV files

In this section we iterate through the file names read above, read the sound files, and split them into 5-second utterances. These files are then saved as WAV files into the "utterances_5s" folder.

In [5]:
# The folder where small sound files are saved
utterancesFolder = "utterances_5s"
# The length of the small sound pieces
utteranceLengthInSeconds = 5
# Sample rate of the files. 22kHz is suitable for human speech.
sampleRate = 22050
# The length of the utterances in frames
utteranceLengthInSamples = int(utteranceLengthInSeconds*sampleRate)

for bfn in baseFileNames:
  # Processing the file paths
  # File pathes are splitted and then the utterancesFolder gets appended, and 
  # the small files are saved here later. The file name is also extracted 
  # from the path.
  path = os.path.normpath(bfn)
  filename = os.path.splitext(path)[0].split(os.sep)[-1]
  tree = path.split(os.sep)[:-1]
  tree.append(utterancesFolder)
  path = os.sep.join(tree)

  # Loading the audio data
  print("Processing",filename,"...")
  y, sr = librosa.load(bfn, sr = sampleRate)
  # Convert to mono
  y = librosa.core.to_mono(y)
  
  # Calculate how many 5 second utterances can be extracted from the file
  numberOfWholeUtterances = int(len(y) / utteranceLengthInSamples)
  # Split the sound data
  yUtterances = np.split(
      y[:numberOfWholeUtterances*utteranceLengthInSamples],
      numberOfWholeUtterances)
  
  #Save the utterances in the specified folder
  for i in range(len(yUtterances)):
    utteranceFilename = path + os.sep + filename + '_' + str(i) + '.wav'
    sf.write(utteranceFilename, yUtterances[i], sr, 'PCM_16')
    
print("Done")

Processing hardtimes_03_dickens__Rosalind_Wills ...
Processing hardtimes_04_dickens__Joseph_Ugoretz ...
Processing hardtimes_13_dickens__Debra_Lynn ...
Processing hardtimes_02_dickens__Stewart_Wills ...
Processing hardtimes_14_dickens__Zachary_Brewster-Geisz ...
Processing hardtimes_15_dickens__Jemma_Blythe ...
Processing hardtimes_01_dickens__Kristen_Ferreri ...
Processing hardtimes_19_dickens__Graham_Thomsen ...
Processing hardtimes_09_dickens__Stuart_Bell ...
Processing hardtimes_05_dickens__Paul_Hansen ...
Done


#Test playback

In [7]:
import IPython.display
IPython.display.Audio(data=yUtterances[1], rate=sr)