In [3]:
"""
This script loads a .wav file, and depending on the script parameters specified below, creates an array of
overlapping audio clips.

Alexander Brown, 2/2022



So we want to load the entire audio clip, split it into overlapping
clips, then save the array of those clips to file

If we use exactly 25min (1500sec) of audio, with 4sec blocks and 1sec hops,
we would get 1497 4sec clips
"""

import numpy as np
import librosa

In [2]:
"""
Specify script parameters
"""
# specify the working directory; note that directory notation must use '/' rather than '\'
folderpath = 'F:/ZaknafeinII_Backup_02-02-22/daea/cleanedWaves/'
filename = 'id10484_merged_cleaned.wav'
savepath = 'F:/ZaknafeinII_Backup_02-02-22/daea/cleanedWavesClips/'
audio_length_to_keep = 25 # minutes
clip_length = 4 # seconds
hop_length = 1 # seconds

# load the .wav file
audio_orig, s_rate = librosa.load(folderpath+filename) 

In [3]:
print('Length of audio file in frames: %i frames' % (len(audio_orig)))
print('Audio sampling rate: %i frames/sec' % (s_rate))

Length of audio file in frames: 34396952 frames
Audio sampling rate: 22050 frames/sec


In [4]:
"""
if the sampling rate is 22050 frames/sec:
    if we want to 'truncate' the audio data to 25min (1500sec), then we need to truncate at 33075000 frames
    if we want 4 sec clips, then we need clips of 88200 frames
    if we want 1 sec hops, then we need hops of 22050 frames
"""

frames_to_keep = audio_length_to_keep * 60 * s_rate
audio_trunc = audio_orig[0:frames_to_keep]

frames_per_clip = clip_length * s_rate
frames_per_hop = hop_length * s_rate

print('Audio length to keep: %i minutes \nFrames to keep: %i frames \nFrames truncated: %i frames \n'\
      'New frames count: %i frames \n\nClip length: %i seconds \nHop length: %i secoonds \n'\
      'Frames per clip: %i frames \nFrames per hop: %i frames' 
      % (audio_length_to_keep, frames_to_keep, len(audio_orig)-frames_to_keep, len(audio_trunc), 
         clip_length, hop_length, frames_per_clip, frames_per_hop))

Audio length to keep: 25 minutes 
Frames to keep: 33075000 frames 
Frames truncated: 1321952 frames 
New frames count: 33075000 frames 

Clip length: 4 seconds 
Hop length: 1 secoonds 
Frames per clip: 88200 frames 
Frames per hop: 22050 frames


In [5]:
"""
So we start at index 0, forming clips of length frames_per_clip (probably 88200 frames) 
and we jump up frames_per_hop (probably 22050 frames) before we take the next clip
"""
clip_starting_frame = 0
clip_list = []

# won't enter unless there is at least a full clip of frames left
while clip_starting_frame + frames_per_clip <= len(audio_trunc):
    
    # determine the ending frame
    clip_ending_frame = clip_starting_frame + frames_per_clip
    
    # append the array slice to the list of audio clips
    clip_list.append(np.array(audio_trunc[clip_starting_frame:clip_ending_frame]))
    
    # update the starting frame
    clip_starting_frame = clip_starting_frame + frames_per_hop
    
clip_list = np.array(clip_list)

In [6]:
print('clip_list shape: ', clip_list.shape)
print('Clip count: %i, frames per clip: %i' % (clip_list.shape[0], clip_list.shape[1]))

clip_list shape:  (1497, 88200)
Clip count: 1497, frames per clip: 88200


In [7]:
with open(savepath+filename+'_clips.npy', 'wb') as f:
    np.save(f, clip_list)