In [65]:
from pydub import AudioSegment

def generate_non_silent_segments(file_name, max_segment_length=500, min_segment_length=100):
    """
    A function that generates non silent segment
    :param file_name: the file name to find non silent segments from
    :param max_segment_length: the max length that a non silent segment can be. In miliseconds
                               If a segment is longer than this max_segment_length, it will be splitted into another segment.
    :param min_segment_length: the minimum length that a non silent segment can be. In miliseconds
    :return: returns a list of tuple that represents start and end of each segments.
    """
    # load audio using AudioSegment
    song =  AudioSegment.from_wav(file_name)
    
    before_value = float('-inf')   # variable for storing before value, starts with -inf
    non_silent_start = list()  # a list that stores when non silent segment starts
    non_silent_end = list()  # a list that stores when non silent segment ends
    consecutive_speech = 0  # a variable for storing length of current segment

    cur_index = 0  # a variable for storing current playhead position

    for i in song:  # Iterate over all values
        cur_dBFS = i.dBFS   
        if cur_dBFS == float("-inf"):  # if current value is -inf 
            if before_value == float("-inf"):
                # which means -inf to -inf, then it means just silent, so pass.
                pass
            else:
                # which means non inf to -inf, then it means the non_silent was ended here.
                non_silent_end.append(cur_index)

        else:
            if before_value == float("-inf"):
                # which means that this was -inf to non inf, then it means non silent has started here.
                non_silent_start.append(cur_index)
            else:
                # which means it was non inf to non inf, then it means it is a non silent segment
                consecutive_speech += 1  # so add 1 to current non silent segment's length

        before_value = cur_dBFS  # update before value
        if consecutive_speech > max_segment_length:  # if cur segment was longer than max segment length
            non_silent_end.append(cur_index) # split the audio
            before_value = float("-inf")  # reset before value as -inf so that next step can be a new start
            consecutive_speech = 0  # reset as 0
        cur_index += 1  # increment count

    # add last index as the end of non silent. Since there might be sounds that contine till end
    # Till here, the non_silent_start and end is determined, now it is time for segments.
    non_silent_end.append(len(song))  
    
    non_silent_segment = list()  # variable for storing non_silent segment
    for i in range(len(non_silent_start)):
        if ((non_silent_end[i] - non_silent_start[i]) > min_segment_length):  # if the sound lasted more than min_segment_length
            non_silent_segment.append((non_silent_start[i], non_silent_end[i]))

    return non_silent_segment

In [66]:
def separate_audio_segments(non_silent_segment):
    """
    A function that separates audio segments according to non_silent_segment.
    This function will generate directory named separated and will place separated audios into that directory
    :param non_silent_segment: A list of tuple that has start and end of the non silent segment
    """
    cur_path = os.getcwd()
    out_path = os.path.join(cur_path, "separated")

    for i in range(len(non_silent_segment)):
        start = non_silent_segment[i][0]
        end = non_silent_segment[i][1]
        cur_song = song[start:end]
        export_path = os.path.join(out_path, str(i) + ".wav")
        try:
            cur_song.export(export_path, format="wav")
        except FileNotFoundError:  # If directory does not exist
            os.mkdir(out_path)
            cur_song.export(export_path, format="wav")
        print("Exported : " + str(i) + ".wav")

In [67]:
a = generate_non_silent_segments("./test/captcha_6b11d4ebc70e4eb79614c1acc756f1da.wav")

In [68]:
separate_audio_segments(a)

Exported : 0.wav
Exported : 1.wav
Exported : 2.wav
Exported : 3.wav
Exported : 4.wav
Exported : 5.wav
Exported : 6.wav
