In [None]:
import os
import time
import pydub
import seaborn as sns
from pydub.silence import split_on_silence
from pydub import AudioSegment

#### Establish Sound Processing Criteria

In [None]:
silence_length_ms = 4_000
silence_threshold_delta_dBFS = -5

min_chunk_length_ms = 2_000
max_chunk_length_ms = 60_000

#### Identify File Paths Specific to Your Machine

In [None]:
# File Path to Raw Audio
raw_path = "./mp3_download/"

# Verify Appropriate Files Exist in this Path
os.listdir(raw_path)

In [None]:
# File Path for Structured Audio
output_path = "./wav_output/"

# Verify Outpath Path Exists
os.listdir(output_path)

#### Split Archive Feeds into Chunks and Export Chunks that meet Criteria

In [None]:
# Start Timer
start_time = time.time()

# Counter for file names
counter = 0
# Calculat total length of audio
total_length_to_process = 0

for raw_file in os.listdir(raw_path): 
    if raw_file.endswith('.mp3'):
        print(f"Examine {raw_file}")

        # Pull in File
        audio_to_split = AudioSegment.from_mp3(raw_path + raw_file)

        # Split based on silence critiera
        chunks = split_on_silence(audio_to_split,
                                 min_silence_len = silence_length_ms,
                                 silence_thresh = audio_to_split.dBFS + silence_threshold_delta_dBFS)

        # Calculate Processing Time
        processing_time = round(time.time() - start_time,0)

        # Return informative statement
        print(f'File {raw_file} required {processing_time} seconds to split into {len(chunks)} candidate chunks of audio')

        # Export qualifying chunks
        for chunk in chunks:
            sound_length = len(chunk)
            if sound_length >= min_chunk_length_ms and sound_length <= max_chunk_length_ms:
                total_length_to_process += len(chunk)
    #             print(f"this chunk is {len(chunk)/1_000} seconds long and our count is {counter}")
                counter +=1
                file_name = "sample{}-{}.wav".format(counter,raw_file.split(".")[0])
    #             print(f'write this file as {file_name}')
                chunk.export(output_path + file_name,
                         format = "wav")
        print('\n')
    
print(f'Exporting {counter} qualifying chunks required {time.time()-start_time} seconds') 
print(f'Total audio length of {total_length_to_process/36_000} minutes')

#### Confirm Files Exist

In [None]:
os.listdir(output_path)