In [17]:
import soundfile as sf 
import librosa
import numpy as np 
import pandas as pd 
import os
from tqdm import tqdm

### Segmenting Audio Files

Core files are separated into Dog , Speech , Impact

Segment into 200ms chunks with 100ms overlap 

Saved in "class_azimuth_idx.wav" file names

In [23]:
class_types = ['Dog', 'Impact' , 'Speech']
for ct in class_types:
    
    # raw data directory
    full_data_dir = '../dataset/data/Dataset_concatenated_tracks/{}/'.format(ct)
    # cleaned, output data directory
    output_data_dir = '../dataset/cleaned_data/{}'.format(ct.lower())
    
    # create dirs
    try:
        os.makedirs(output_data_dir)
    except:
        pass
    
    # Manual settings 
    fs=48000
    frame_len = int(0.2*fs) # 200ms
    hop_len = int(0.1*fs)   # 100ms
    
    # loop through raw data dir
    for file in os.listdir(full_data_dir):
        if file.endswith('.wav'):
            fullfn = os.path.join(full_data_dir, file)
            
            # extract azimuth from gt
            vars = file.split('_')
            azimuth = vars[2]
            
            # load audio
            audio , _ = librosa.load(fullfn, sr=fs, mono=False, dtype=np.float32)
            
            # Segment the audio input into overlapping frames
            frames = librosa.util.frame(audio, frame_length=frame_len, hop_length=hop_len)
            
            # Transpose into (n_segments, timebins, channels)
            frames = frames.T
            for idx, frame in enumerate(tqdm(frames)):
                final_fn = "{}_{}_{}.wav".format(ct.lower(), azimuth, idx+1)
                final_fp = os.path.join(output_data_dir, final_fn)
                sf.write(final_fp, frame, samplerate=48000)

100%|██████████| 5983/5983 [00:35<00:00, 167.06it/s]
100%|██████████| 5983/5983 [00:38<00:00, 157.13it/s]
100%|██████████| 5983/5983 [00:38<00:00, 153.80it/s]
100%|██████████| 5983/5983 [00:35<00:00, 167.40it/s]
100%|██████████| 5983/5983 [00:37<00:00, 160.85it/s]
100%|██████████| 5983/5983 [00:37<00:00, 158.72it/s]
100%|██████████| 6679/6679 [00:31<00:00, 213.54it/s]
100%|██████████| 6679/6679 [00:33<00:00, 198.13it/s]
100%|██████████| 6679/6679 [00:32<00:00, 208.29it/s]
100%|██████████| 6679/6679 [00:32<00:00, 206.43it/s]
100%|██████████| 6679/6679 [00:32<00:00, 204.60it/s]
100%|██████████| 6679/6679 [00:32<00:00, 207.50it/s]
100%|██████████| 6013/6013 [00:28<00:00, 209.55it/s]
100%|██████████| 6013/6013 [00:30<00:00, 197.49it/s]
100%|██████████| 6013/6013 [00:29<00:00, 200.95it/s]
100%|██████████| 6013/6013 [00:29<00:00, 203.91it/s]
100%|██████████| 6013/6013 [00:29<00:00, 203.57it/s]
100%|██████████| 6013/6013 [00:28<00:00, 211.71it/s]
