In [1]:
"""
This script loads a .wav file, and depending on the script parameters specified below, creates an array of
overlapping audio clips.

Alexander Brown, 2/2022



So we want to load the entire audio clip, split it into overlapping
clips, then save the array of those clips to file

If we use exactly 25min (1500sec) of audio, with 4sec blocks and 1sec hops,
we would get 1497 4sec clips
"""

import numpy as np
import librosa

In [2]:
"""
Specify script parameters
"""
# specify the working directory; note that directory notation must use '/' rather than '\'
folderpath = 'F:/ZaknafeinII_Backup_02-02-22/daea/prepareForCtnTraining/practice/'
filename = 'id10016_merged_cleaned'
extension = '.wav'
savepath = 'F:/ZaknafeinII_Backup_02-02-22/daea/prepareForCtnTraining/practice/'

clip_length = 4 # seconds
hop_length = 0.25 # seconds

# load the .wav file
audio_orig, s_rate = librosa.load(folderpath+filename+extension) 

In [3]:
print('Length of audio file in frames: %i frames' % (len(audio_orig)))
print('Audio sampling rate: %i frames/sec' % (s_rate))

Length of audio file in frames: 33134237 frames
Audio sampling rate: 22050 frames/sec


In [4]:
"""
if the sampling rate is 22050 frames/sec:
    if we want 4 sec clips, then we need clips of 88200 frames
    if we want 1 sec hops, then we need hops of 22050 frames
"""

frames_per_clip = int(clip_length * s_rate)
frames_per_hop = int((hop_length * s_rate)//1)

print('Clip length: %f seconds \nHop length: %f seconds \n'\
      'Frames per clip: %f frames \nFrames per hop: %f frames' 
      % (clip_length, hop_length, frames_per_clip, frames_per_hop))

Clip length: 4.000000 seconds 
Hop length: 0.250000 seconds 
Frames per clip: 88200.000000 frames 
Frames per hop: 5512.000000 frames


In [5]:
"""
So we start at index 0, forming clips of length frames_per_clip (probably 88200 frames) 
and we jump up frames_per_hop (probably 22050 frames) before we take the next clip
"""
clip_starting_frame = 0
curr_clip = None
savenote = None
count = 0

# when we get close to the end, the clips will start pulling from the beginning
while clip_starting_frame <= len(audio_orig):
    
    # determine the ending frame
    clip_ending_frame = clip_starting_frame + frames_per_clip
    
    # append the array slice to the list of audio clips
    if clip_ending_frame < len(audio_orig):
        curr_clip = np.array(audio_orig[clip_starting_frame:clip_ending_frame])
        savenote = clip_starting_frame+'-'+clip_ending_frame
    else:
        leftover = clip_ending_frame - len(audio_orig)
        curr_clip = np.concatenate((audio_orig[clip_starting_frame:], audio_orig[0:leftover]))
        savenote = clip_starting_frame+'-'+leftover
        
    # save the selected clip to file
    librosa.output.write_wav(filename+'_'+savenote+extension, curr_clip, s_rate)
    
    # update the starting frame
    clip_starting_frame = clip_starting_frame + frames_per_hop
    count = count + 1

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [None]:
print('created %i .wav files', count)