In [27]:
# Import the AudioSegment class for processing audio and the 
# split_on_silence function for separating out silent chunks.
from pydub import AudioSegment
from pydub.silence import split_on_silence
import numpy as np
import librosa
from pydub.utils import make_chunks
import os

In [28]:
#normalize a chunk to a target amplitude.
def match_target_amplitude(aChunk, target_dBFS):
    ''' Normalize given audio chunk '''
    change_in_dBFS = target_dBFS - aChunk.dBFS
    return aChunk.apply_gain(change_in_dBFS)

#silence removal, normalization and trimming
def remove_silence(path):
    # Load your audio.
    song = AudioSegment.from_file(path)
    song = song.set_frame_rate(22050)
    final_chunk=AudioSegment.empty()
    # Split track where the silence is 3 seconds or more and get chunks using 
    # the imported function.
    chunks = split_on_silence (
        # Use the loaded audio.
        song, 
        # Specify that a silent chunk must be at least 3 seconds or 3000 ms long.
        min_silence_len = 3000,
        # Consider a chunk silent if it's quieter than -16 dBFS.
        # (You may want to adjust this parameter.)
        silence_thresh = -30
    )
    
    # Process each chunk with your parameters
    for i, chunk in enumerate(chunks):

        # Normalize the entire chunk.
        normalized_chunk = match_target_amplitude(chunk, -20.0)

        #final chunk made by joining all non silent chunks
        final_chunk+=normalized_chunk
    
    # trimming the song, taking only 60 sec from beginning
    final_song = final_chunk[:60000]
    return final_song

In [29]:
#training dataset location text file
location = '/media/abhiroopd/New Volume/audio/'
train_dest = '/media/abhiroopd/New Volume/dataset/training/'
test_dest = '/media/abhiroopd/New Volume/dataset/testing/'

counter = 1
with open("final_dataset.txt", "r") as training_file:
    for path in training_file:
        if(counter == 37):
            counter = 1
        #remove leading and trailing spaces
        path = path.strip()
        singer_id = path.split('_')[0]
        song = remove_silence(location+path)
        chunks = make_chunks(song, 10000)
        for i, chunk in enumerate(chunks):
            if(i>3):
                chunk.export(test_dest + singer_id + '_' + str(counter), bitrate = "192k", format = "wav")
            else:
                chunk.export(train_dest + singer_id + '_' + str(counter), bitrate = "192k", format = "wav")

            counter = counter + 1        

KeyboardInterrupt: 

In [None]:
training_files = os.listdir(train_dest)
testing_files = os.listdir(test_dest)
#manually remove the last new line at the end of train and test txt files
with open("train.txt", "a") as file:
    for item in training_files:
        file.write(item + '\n')
    
with open("test.txt", "a") as file:
    for item in testing_files:
        file.write(item + '\n')