In [1]:
"""
This script loads a .wav file, and depending on the script parameters specified below, creates an array of
overlapping audio clips.

Alexander Brown, 2/2022



So we want to load the entire audio clip, split it into overlapping
clips, then save the array of those clips to file

If we use exactly 25min (1500sec) of audio, with 4sec blocks and 1sec hops,
we would get 1497 4sec clips
"""

import numpy as np
import librosa
import soundfile as sf
import os
import json

In [2]:
"""
Specify script parameters
"""
# specify the working directory; note that directory notation must use '/' rather than '\'
sourcepath = 'C:/Users/eholy/Desktop/SD/farfan_files/orig/'
savepath = 'C:/Users/eholy/Desktop/SD/farfan_files/'

clip_length = 4 # seconds
hop_length = 1.0 # seconds
s_rate = 22050

In [3]:
"""
if the sampling rate is 22050 frames/sec:
    if we want 4 sec clips, then we need clips of 88200 frames
    if we want 1 sec hops, then we need hops of 22050 frames
"""

frames_per_clip = int(clip_length * s_rate)
frames_per_hop = int((hop_length * s_rate)//1)

print('Clip length: %f seconds \nHop length: %f seconds \n'\
      'Frames per clip: %f frames \nFrames per hop: %f frames' 
      % (clip_length, hop_length, frames_per_clip, frames_per_hop))

Clip length: 4.000000 seconds 
Hop length: 1.000000 seconds 
Frames per clip: 88200.000000 frames 
Frames per hop: 22050.000000 frames


In [4]:
def clipify(filename, audio_orig):
    """
    So we start at index 0, forming clips of length frames_per_clip (probably 88200 frames) 
    and we jump up frames_per_hop (probably 22050 frames) before we take the next clip
    """
    clip_starting_frame = 0
    curr_clip = None
    savenote = None
    count = 0
    namelist = []
    
    # make the output directory for the given audio file
    os.mkdir(savepath+filename)

    # when we get close to the end, the clips will start pulling from the beginning
    while clip_starting_frame <= len(audio_orig): 
        
        # determine the ending frame
        clip_ending_frame = clip_starting_frame + frames_per_clip

        # append the array slice to the list of audio clips
        if clip_ending_frame < len(audio_orig):
            curr_clip = np.array(audio_orig[clip_starting_frame:clip_ending_frame])
            savenote = str(clip_starting_frame)+'-'+str(clip_ending_frame)
        else:
            leftover = clip_ending_frame - len(audio_orig)
            curr_clip = np.concatenate((audio_orig[clip_starting_frame:], audio_orig[0:leftover]))
            savenote = str(clip_starting_frame)+'-'+str(leftover)

        # save the selected clip to file
        sf.write(savepath+filename+'/'+filename+'_'+savenote+'.wav', curr_clip, s_rate)

        # update the starting frame
        clip_starting_frame = clip_starting_frame + frames_per_hop
        count = count + 1
        namelist.append(filename+'_'+savenote+'.wav')
        
    print('for %s: created %i .wav files' % (filename, count))
    
    return namelist

In [5]:
# stores the generated filenames for all the .wav files
metadata_dict = {}

# iterate through each source file, "clipifying" each of them
source_contents = os.listdir(sourcepath)
print('%s files found in %s' % (len(source_contents), sourcepath))
for filename in source_contents:
    audio, _ = librosa.load(sourcepath+filename)
    filename = filename[:-4]
    metadata_dict[filename] = clipify(filename, audio)
    
# output the metadata file as a JSON object
json = json.dumps(metadata_dict)
with open(savepath+'metadata.json', 'w') as f:
    f.write(json)

1 files found in C:/Users/eholy/Desktop/SD/farfan_files/orig/


  audio, _ = librosa.load(sourcepath+filename)


for AFarfan: created 5004 .wav files
