# Import Statements

In [101]:
%matplotlib inline
from pydub import AudioSegment
from pydub.playback import play
from IPython.display import Audio 
from IPython.core.display import display
import os
import re
import shutil
import matplotlib.pyplot as plt

# Strip Spaces from Filename

In [102]:
def strip_space(dest_dir):
    files = os.listdir(dest_dir)
    print(f'{len(files)} files found in the directory.')
    
    for filename in files:
        name, ext = os.path.splitext(filename)
        newname = name.strip() + '.wav'
        os.rename(os.path.join(dest_dir, filename), os.path.join(dest_dir, newname))

In [98]:
dest_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs"

strip_space(dest_dir)

12534 found in the directory.


# Resample .wav Files

In [103]:
def set_sr(src_dir, dest_dir, sr=22050):
    files = os.listdir(src_dir)
    print(f'{len(files)} found in the directory.')
    
    for file in files:
        audio = AudioSegment.from_file(os.path.join(src_dir, file))
        audio = audio.set_frame_rate(sr)
        audio.export(os.path.join(dest_dir, file), format="wav")

In [100]:
src_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs"
dest_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k"
target_sr = 22050
set_sr(src_dir, dest_dir, target_sr)

12534 found in the directory.


# Metadata
Prepare a `metadata.csv` file similar to the LJSpeech dataset. The entries of this file should be in the following format:
```
id|text|normalized-text
```
If the text is already normalized, then the 2nd column may be left empty.

In [104]:
# a list containing (idx, text) touples
# populate it from the dataset-specific section below
samples = []

In [105]:
def create_metadata(src_dir):
    with open(os.path.join(src_dir, 'metadata.csv'), 'w') as f:
        with open(os.path.join(src_dir, 'exclude.csv'), 'w') as ex:
            for (idx, text) in sorted(samples):
                if idx in exclude:
                    ex.write(idx + "| |" + text + "\n")
                else:
                    f.write(idx + "| |" + text + "\n")

In [130]:
src_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female"

create_metadata(src_dir)

## Parsing CMU Data

In [75]:
src = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/cmu_indic_ben_rm/etc/txt.done.data"

samples.clear()
with open(src) as f:
    total = 0
    for line in f:
        line = line.replace("(", "").replace(")", "").strip()
        words = line.split()
        idx = words[0]
        text = " ".join(words[1:]).strip('"')
        
        #print(f'{idx} -> {text}')
        samples.append((idx, text))
        
        total += 1
#         if total == 5: 
#             break

print(f'Total samples: {len(samples)}')        

Total samples: 548


## Parsing SUST Female Data

In [108]:
src = '/media/arif/Resource/Etc/Notebooks/bn_speech_db_new/sust_tts_2_female.txt' 

samples.clear()
with open(src) as f:
    total = 0
    for line in f:
        idx, text = line.strip().split('\t')
        
        if text[-1] not in "।?!;,.":
            text += "।"
        
        #print(f"{idx} -> {text}")
        samples.append((idx, text))
        
        total += 1
        #if total == 5: break
        
print(f"Total samples: {len(samples)}")

Total samples: 12537


# Trim Silences from Start and End

In [109]:
#from pydub import AudioSegment

def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10):
    '''
    sound is a pydub.AudioSegment
    silence_threshold in dB
    chunk_size in ms

    iterate over chunks until you find the first one with sound
    '''
    trim_ms = 0 # ms

    assert (chunk_size > 0) # to avoid infinite loop
    while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
        trim_ms += chunk_size

    if trim_ms > chunk_size: trim_ms -= chunk_size
    if trim_ms > chunk_size: trim_ms -= chunk_size
    return trim_ms


def remove_silence(src_dir, dest_dir, verbose=False):
    files = os.listdir(src_dir)
    for file in files:
        sound = AudioSegment.from_file(os.path.join(src_dir, file), format="wav")

        start_trim = detect_leading_silence(sound)
        end_trim = detect_leading_silence(sound.reverse())

        duration = len(sound)    
        trimmed_sound = sound[start_trim:duration-end_trim]
        
        if verbose:
            play(sound)
            play(trimmed_sound)

        trimmed_sound.export(os.path.join(dest_dir, file), format="wav")

In [111]:
src_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k"
dest_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k_trimmed"

remove_silence(src_dir, dest_dir, False)

# Detect Noisy Files
Detect SNRs of the audio files from `TTS/notebooks/dataset_analysis/CheckDatasetSNR.ipynb` and extract a list of the most noisy files.

In [112]:
# cmu dataset
too_noisy = ['bn_00400', 'bn_00273', 'bn_00434', 'bn_00229', 'bn_00012', 'bn_00405', 'bn_00561', 'bn_00321', 'bn_00398', 'bn_00476', 'bn_00442', 'bn_00232', 'bn_00450', 'bn_00412', 'bn_00389', 'bn_00506', 'bn_00431', 'bn_00502', 'bn_00530', 'bn_00421'] 

# sust female
too_noisy = ['17601', '14702', '08781', '09541', '11585', '07214', '10840', '11150', '12204', '13035', '13726', '06692', '17125', '11043', '14007', '06770', '14041', '13205', '12356', '11066', '13589', '13090', '10210']

len(too_noisy)

23

# Detect Too Long and Too Short Files

In [113]:
def detect_short_long_samples(src_dir):
    '''
    Detects samples that are too short (e.g. <0.38 sec)
    or too long (e.g. >15 sec)
    '''

    too_short = []
    too_long = []
    files = os.listdir(src_dir)
    for file in files:
        audio = AudioSegment.from_file(os.path.join(src_dir, file))

        duration = audio.duration_seconds

        # remove samples less than 0.38 sec
        if duration <  0.38:
            print(f'{file} -> {duration:.2f} sec')
            too_short.append(file[:-4])

        if duration > 17:
            print(f'{file} -> {duration:.2f} sec')
            too_long.append(file[:-4])
    
    print(f'{len(too_short)} short and {len(too_long)} long samples detected.')
    return (too_short, too_long)

In [114]:
src_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k_trimmed"

too_short, too_long = detect_short_long_samples(src_dir)

01127.wav -> 19.38 sec
01167.wav -> 18.14 sec
07010.wav -> 18.82 sec
0 short and 3 long samples detected.


# Normalize Amplitude

In [120]:
def normalize_samples(src_dir, dest_dir, gain=-20):
    '''
    Normalizes all audio samples from `src_dir` and 
    stores them in `dest_dir`.
    '''
    files = os.listdir(src_dir)
    
    #from random import shuffle
    #shuffle(files)

    total = 0
    for file in files:
        total += 1
        #if total < 1100: continue
        #if total == 4: break

        filename = os.path.join(src_dir,file)

        clip = AudioSegment.from_file(filename)

        #print(f'Before: {clip.dBFS}')
        #play(clip)
        #display(Audio(clip.get_array_of_samples(), rate=clip.frame_rate))

        #clip = effects.normalize(clip) # or
        clip = clip.apply_gain(gain - clip.dBFS)

        print(f'After: {clip.dBFS}')
        #play(clip)
        #display(Audio(clip.get_array_of_samples(), rate=clip.frame_rate))

        clip.export(os.path.join(dest_dir, file), format="wav")



    print(f'{total} files updated.')

In [122]:
src_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k_trimmed"
dest_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k_trimmed_normalized"

normalize_samples(src_dir, dest_dir)

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.996819714504753
After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19

After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.0

After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.0

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.996819714504753
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.

After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.9

After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.

After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.0

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.9

After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.996819714504753
After: -19.99946987154684
After: -19.996819714504753
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.996819714504753
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -20.00212083742674
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -20.00212083742674
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
Aft

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -1

After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.0

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.0

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.996819714504753
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.

After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.9

After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.9

After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.0

After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.9

After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.0

After: -19.99946987154684
After: -19.996819714504753
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.

After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.0

After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -20.0

After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -20.00212083742674
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -19.99946987154684
After: -20.00212083742674
After: -19.99946987154684
After: -19.9

# Stereo to Mono

In [125]:
def convert_to_mono(src_dir, dest_dir):
    '''
    Conerts all samples from stereo to mono (if required) from `src_dir`
    and stores them in `dest_dir`    
    '''
    files = os.listdir(src_dir)

    total = 0
    for file in files:
        filename = os.path.join(src_dir,file)

        clip = AudioSegment.from_file(filename)

        if clip.channels == 2:
            clip = clip.set_channels(1)
            total += 1
            print(file)

        clip.export(os.path.join(dest_dir, file.strip()), format="wav")



    print(f'{total} files converted from stereo to mono.')

In [126]:
src_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k_trimmed_normalized"

convert_to_mono(src_dir, src_dir)

03136.wav
03059.wav
03060.wav
03061.wav
03062.wav
03063.wav
03064.wav
03065.wav
03066.wav
03067.wav
03068.wav
03069.wav
03070.wav
03071.wav
03072.wav
03073.wav
03075.wav
03076.wav
03077.wav
03078.wav
03079.wav
03080.wav
03081.wav
03082.wav
03083.wav
03084.wav
03085.wav
03086.wav
03087.wav
03088.wav
03089.wav
03090.wav
03091.wav
03092.wav
03093.wav
03094.wav
03096.wav
03097.wav
03098.wav
03099.wav
03100.wav
03101.wav
03102.wav
03103.wav
03104.wav
03105.wav
03106.wav
03107.wav
03108.wav
03109.wav
03110.wav
03111.wav
03112.wav
03113.wav
03114.wav
03115.wav
03117.wav
03118.wav
03119.wav
03120.wav
03121.wav
03122.wav
03123.wav
03124.wav
03125.wav
03126.wav
03127.wav
03128.wav
03129.wav
03130.wav
03131.wav
03132.wav
03133.wav
03134.wav
03135.wav
03074.wav
03095.wav
03116.wav
03137.wav
03138.wav
03139.wav
03140.wav
03141.wav
83 files converted from stereo to mono.


# Move Cleaned Files
Move all audio files to a new folder excluding noisy, short and long samples.

In [127]:
exclude = sorted(too_noisy + too_short + too_long)
print(f'Total {len(exclude)} files to be excluded.')
print(f'Excluded Files: {exclude}')

Total 26 files to be excluded.
Excluded Files: ['01127', '01167', '06692', '06770', '07010', '07214', '08781', '09541', '10210', '10840', '11043', '11066', '11150', '11585', '12204', '12356', '13035', '13090', '13205', '13589', '13726', '14007', '14041', '14702', '17125', '17601']


In [128]:
def move_files(src_dir, dest_dir):
    '''
    Moves all audio files from `src_dir` to `dest_dir`
    '''
    files = os.listdir(src_dir)
    i = 0
    for file in files:
        if file[:-4] in exclude:
            print(f'{file} to be excluded.')
            continue
        
        shutil.move(os.path.join(src_dir, file), os.path.join(dest_dir, file))
        
        i += 1
        #if i == 3: break
        
    print(f'Total: {i}')

In [129]:
src_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k_trimmed_normalized"
dest_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k_trimmed_normalized_cleaned"

move_files(src_dir, dest_dir)

01127.wav to be excluded.
01167.wav to be excluded.
06692.wav to be excluded.
06770.wav to be excluded.
07010.wav to be excluded.
07214.wav to be excluded.
09541.wav to be excluded.
11043.wav to be excluded.
11066.wav to be excluded.
11150.wav to be excluded.
11585.wav to be excluded.
10210.wav to be excluded.
10840.wav to be excluded.
13090.wav to be excluded.
13205.wav to be excluded.
13589.wav to be excluded.
13726.wav to be excluded.
14007.wav to be excluded.
14041.wav to be excluded.
12204.wav to be excluded.
12356.wav to be excluded.
13035.wav to be excluded.
17125.wav to be excluded.
Total: 12511


# Validate

In [96]:
metafile = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/cmu_indic_ben_rm/metadata.csv"
wav_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/cmu_indic_ben_rm/wavs_22.05k_trimmed_normalized_cleaned"

samples = []
with open(metafile) as f:
    for line in f:
        idx, _, text = line.strip().split("|")
        samples.append((idx, text))
     
    
files = os.listdir(wav_dir)
assert(len(samples) == len(files))

file_idx = [file[:-4] for file in files]
for sample in samples:
    assert(sample[0] in file_idx)

i = 0
for file in files:
    audio = AudioSegment.from_wav(os.path.join(wav_dir, file))
    
    # mono
    assert(audio.channels == 1)
    
    # sampling rate
    assert(audio.frame_rate == 22050)
    
    # loudness
    print(audio.rms)
    #assert(int(audio.rms) in [3276, 3277, 3278])
    
    i += 1
    #if i == 3: break
    

3277
3276
3277
3277
3277
3277
3277
3277
3276
3277
3277
3276
3277
3277
3277
3277
3277
3277
3276
3277
3277
3277
3277
3277
3277
3277
3277
3277
3276
3276
3277
3277
3277
3276
3276
3276
3276
3276
3277
3277
3277
3277
3277
3277
3277
3276
3277
3276
3277
3276
3277
3277
3276
3276
3277
3277
3277
3277
3277
3277
3276
3277
3277
3277
3277
3277
3277
3277
3277
3276
3277
3277
3277
3277
3277
3277
3277
3277
3276
3277
3277
3277
3277
3276
3277
3277
3277
3277
3276
3277
3276
3277
3277
3278
3277
3277
3277
3277
3277
3277
3277
3277
3277
3276
3276
3277
3277
3277
3276
3277
3277
3277
3277
3276
3277
3277
3277
3277
3277
3277
3277
3277
3277
3277
3276
3277
3277
3277
3277
3277
3277
3277
3277
3277
3276
3277
3277
3276
3277
3277
3277
3277
3277
3277
3277
3276
3277
3277
3277
3276
3277
3277
3277
3277
3276
3277
3277
3277
3276
3276
3277
3277
3277
3277
3277
3277
3277
3277
3277
3276
3277
3277
3277
3276
3277
3277
3277
3277
3277
3276
3277
3277
3278
3277
3277
3277
3277
3277
3277
3277
3277
3276
3277
3278
3275


AssertionError: 

# Statistics

In [131]:
def duration_stats(src_dir):
    total_duration = 0
    max_duration = 0
    min_duration = 100000000
    files = os.listdir(src_dir)
    #print(len(files))
    for file in files:
        assert(file.endswith(".wav"))
        audio = AudioSegment.from_wav(os.path.join(src_dir, file))

        duration = audio.duration_seconds
        total_duration += duration
        max_duration = max(max_duration, duration)
        min_duration = min(min_duration, duration)

    avg_duration = total_duration / len(files)
    total_duration /= 60 # sec. to min.
    duration_hours = total_duration // 60
    duration_mins = total_duration - (duration_hours * 60)

    print(f"Total duration: {int(duration_hours)} hours {duration_mins:.2f} minutes")
    print(f'Maximum duration: {max_duration:.2f} seconds')
    print(f'Minimum duration: {min_duration:.2f} seconds')
    print(f'Average duration: {avg_duration:.2f} seconds')    

In [132]:
src_dir = "/media/arif/Resource/My PC/Tutorial/PhD (Offline)/Resources/Sound Corpus/Bangla Speech Database 2018/Female/wavs_22.05k_trimmed_normalized_cleaned"

duration_stats(src_dir)

Total duration: 17 hours 45.31 minutes
Maximum duration: 16.99 seconds
Minimum duration: 0.67 seconds
Average duration: 5.11 seconds
