In [1]:
import os
import time
import pydub
import seaborn as sns
from pydub.silence import split_on_silence
from pydub import AudioSegment

#### Establish Sound Processing Criteria

In [8]:
silence_length_ms = 4_000
silence_threshold_delta_dBFS = 0

min_chunk_length_ms = 2_000
max_chunk_length_ms = 60_000

#### Identify File Paths Specific to Your Machine

In [3]:
# File Path to Raw Audio
raw_path = "./mp3_download/"

# Verify Appropriate Files Exist in this Path
os.listdir(raw_path)

['13467-20200501-1252.mp3',
 '13467-20200501-1322.mp3',
 '13467-20200501-1351.mp3']

In [5]:
# File Path for Structured Audio
output_path = "./wav_output/"

# Verify Outpath Path Exists
os.listdir(output_path)

[]

#### Split Archive Feeds into Chunks and Export Chunks that meet Criteria

In [88]:
# Start Timer
start_time = time.time()

# Counter for file names
counter = 0

for raw_file in os.listdir(raw_path):
    
    # Pull in File
    audio_to_split = AudioSegment.from_mp3(raw_path + raw_file)[:200_000]
    
    # Audio Processing - Increase Loudness
    audio_to_split += 40
    
    # Audio Processing - Set Channels to 1
    audio_to_split.set_channels(1)
    
    # Split based on silence critiera
    chunks = split_on_silence(audio_to_split,
                             min_silence_len = silence_length_ms,
                             silence_thresh = audio_to_split.dBFS + silence_threshold_delta_dBFS)
    
    # Calculate Processing Time
    processing_time = round(time.time() - start_time,0)
    
    # Return informative statement
    print(f'File {raw_file} required {processing_time} seconds to split into {len(chunks)} candidate chunks of audio \n')
    
    # Export qualifying chunks
    for chunk in chunks:
        if len(chunk) >= min_chunk_length_ms and len(chunk) <= max_chunk_length_ms:
            counter +=1
            file_name = "sample{}-{}.wav".format(counter,raw_file.split(".")[0])
            chunk.export(output_path + file_name,
                     format = "wav")

    
print(f'Exporting {counter} qualifying chunks required {time.time()-start_time} seconds')    

File 13467-20200501-1252.mp3 required 27.0 seconds to split into 2 candidate chunks of audio 

File 13467-20200501-1322.mp3 required 55.0 seconds to split into 3 candidate chunks of audio 

File 13467-20200501-1351.mp3 required 82.0 seconds to split into 2 candidate chunks of audio 

Exporting 0 qualifying chunks required 81.86603331565857 seconds


#### Confirm Files Exist

In [74]:
os.listdir(output_path)

['sample1-13467-20200501-1252.wav',
 'sample1-13467-20200501-1322.wav',
 'sample2-13467-20200501-1322.wav',
 'sample2-13467-20200501-1351.wav',
 'sample3-13467-20200501-1322.wav',
 'sample4-13467-20200501-1351.wav',
 'sample5-13467-20200501-1351.wav']

In [87]:
chunks[5]