# Feature Extraction for Testing
Here we will we extracting MFCC features from the Testing audio files of VoxConverse Dataset. For getting the audio part, we will be using a VAD of the file "VAD_library.ipynb". On the audio part detected, we will be partioning it into segments of 1sec and collecting the MFCC features of the same.

In [1]:
import os      # Importing Libraries
import sys
import librosa
import numpy as np
from VAD_library import vad

In [7]:
audio_len = 1.0
testing_folder = '../Data/Audio_Dataset/Testing/'
transcripts_folder = '../Data/Audio_Dataset/Transcripts/'

In [8]:
def print_progress(done,total):          #For displaying the progress bar while preprocessing audio files
    x = int(done*50.0/total)
    sys.stdout.write('['+str('='*x)+'>'+str('-'*(50-x))+']  '+str(done)+'/'+str(total)+'\r')
    sys.stdout.flush()

def progress(entity):
    print_progress(0,len(entity))
    i = 0
    for ent in entity:
        yield ent
        i+=1
        print_progress(i,len(entity))
    sys.stdout.write("\n")
    sys.stdout.flush()

In [17]:
def get_mfcc_test(audio_folder, transcript_folder, audio_len):
    files = os.listdir(audio_folder)
    file_number = 0
    for file in files:
        file_number+=1
        print("Processing File: "+str(file_number)+"/"+str(len(files))+"  ("+file+")")
        mfcc_store = []
        est_timestamps = []
        true_timestamps = []
        true_labels = []
        
        _,segments,_ = vad(audio_folder+file)
        
        for segment in progress(segments):
            if segment['is_speech']==True:
                start = segment['start']
                end = segment['finish']
                while start+audio_len<=end:
                    audio,sr = librosa.load(audio_folder+file,sr = 16000,offset = start, duration = audio_len)
                    mfcc = librosa.feature.mfcc(y = audio, sr = sr, n_mfcc=40)  # Getting MFCC
                    mfcc = mfcc.T
                    mfcc_store.append(mfcc)
                    est_timestamps.append([start,start+audio_len])
                    start+=audio_len
        
        
        transcript_file = transcript_folder+file.split('.')[0]+'.rttm'
        log = open(transcript_file,'r')
        log = log.readlines()
        speaker_dict = {}
        for line in log:
            words = line.split()
            speaker = words[7]
            if speaker not in speaker_dict.keys():
                speaker_dict[speaker] = len(speaker_dict)+1
            start = float(words[3])
            duration = float(words[4])
            end = start+duration
            true_labels.append(speaker_dict[speaker])
            true_timestamps.append([start,end])
            
        np.save('../Data/MFCC_Features/Testing/Testing MFCC/test_mfcc_'+file,mfcc_store)
        np.save('../Data/MFCC_Features/Testing/Testing Est Timestamps/est_timestamps_'+file,est_timestamps)
        np.save('../Data/MFCC_Features/Testing/Testing True Timestamps/true_timestamps_'+file,true_timestamps)
        np.save('../Data/MFCC_Features/Testing/Testing True Labels/true_labels_'+file,true_labels)

In [18]:
get_mfcc_test(testing_folder,transcripts_folder,audio_len)

Processing File: 1/36  (xmfzh.wav)
Processing File: 2/36  (xypdm.wav)
Processing File: 3/36  (wmori.wav)
Processing File: 4/36  (whmpa.wav)
Processing File: 5/36  (zvmyn.wav)
Processing File: 6/36  (ycxxe.wav)
Processing File: 7/36  (xvllq.wav)
Processing File: 8/36  (vbjlx.wav)
Processing File: 9/36  (zyffh.wav)
Processing File: 10/36  (ysgbf.wav)
Processing File: 11/36  (zrlyl.wav)
Processing File: 12/36  (wdjyj.wav)
Processing File: 13/36  (zfkap.wav)
Processing File: 14/36  (zajzs.wav)
Processing File: 15/36  (wspbh.wav)
Processing File: 16/36  (ylnza.wav)
Processing File: 17/36  (wewoz.wav)
Processing File: 18/36  (wjhgf.wav)
Processing File: 19/36  (willh.wav)
Processing File: 20/36  (ypwjd.wav)
Processing File: 21/36  (ydlfw.wav)
Processing File: 22/36  (wbqza.wav)
Processing File: 23/36  (yrsve.wav)
Processing File: 24/36  (vmbga.wav)
Processing File: 25/36  (yfcmz.wav)
Processing File: 26/36  (zcdsd.wav)
Processing File: 27/36  (zmndm.wav)
Processing File: 28/36  (vysqj.wav)
P

In [22]:
a = np.load('../Data/MFCC_Features/Testing/Testing True Labels/true_labels_ywcwr.wav.npy')