In [1]:
import os, sys
from os import path
import wave
import contextlib
import webrtcvad

In [2]:
"""Writes a .wav file.

Takes the path, and frames and writes the frame data to the path file.
"""
def write_wave(path, frames, sample_rate):
    wav_file=wave.open(path,"w")
    nchannels = 1
    sampwidth = 2
    nframes = len(frames)
    comptype = "NONE"
    compname = "not compressed"
    wav_file.setparams((nchannels, sampwidth, sample_rate, nframes, comptype, compname))
    wav_file.writeframes(b''.join(frames))
    wav_file.close()
    #print('Created chunk file at: '+path)

In [3]:
"""Reads a .wav file.

Takes the path, and returns (PCM audio data, sample rate).
"""
def read_wave(path):
    with contextlib.closing(wave.open(path, 'rb')) as wf:
        num_channels = wf.getnchannels()
        assert num_channels == 1
        sample_width = wf.getsampwidth()
        assert sample_width == 2
        sample_rate = wf.getframerate()
        assert sample_rate in (8000, 16000, 32000)
        frames = wf.getnframes()
        pcm_data = wf.readframes(frames)
        duration = frames / sample_rate
        return pcm_data, sample_rate, duration


In [4]:
class Frame(object):
    """Represents a "frame" of audio data."""
    def __init__(self, bytes, timestamp, duration):
        self.bytes = bytes
        self.timestamp = timestamp
        self.duration = duration
class Snippet(object):
    """Represents a snippet of the audio file post performing vad."""
    def __init__(self, path, from_time, to_time):
        self.path = path
        self.from_time = from_time
        self.to_time = to_time
        self.responses = []
        self.signals = []
    def add_signal(self , signal):
        self.signals.append(signal)
    def add_transcription(self, responses):
        self.responses = responses
    def set_speaker(self, speaker):
        self.speaker = speaker

In [10]:
def perform_vad_sil(file_path, chunk_folder_path, frame_duration_ms = 30, min_silence = 0.06,min_voice = 3):
    snippets = []
    print('Processing '+ file_path+' for voice activity detection...')
    if os.path.isdir(chunk_folder_path) is False:
        print('Creating chunk folder at: '+chunk_folder_path)
        os.makedirs(chunk_folder_path)
    aggressiveness = 2
    directory = os.fsencode(chunk_folder_path)
    fileName = os.path.basename(file_path).replace('.wav','')
    vad = webrtcvad.Vad(aggressiveness)
    chunk_count = 0
    accumulated_frames = []
    audio, sample_rate, audio_length = read_wave(file_path)
    assert sample_rate in (8000, 16000, 32000)
    n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
    offset = 0
    timestamp = 0.0
    chunk_from = 0.0
    chunk_to = 0.0
    duration = (float(n) / sample_rate) / 2.0
    acc_sil = 0.0
    is_sil = False
    err_counter = 0
    accu_sil = 0.0
    while offset + n < len(audio):
        frame = Frame(audio[offset:offset + n], timestamp, duration)
        is_speech = vad.is_speech(frame.bytes, sample_rate)
        print(str(chunk_from)+'->'+str(chunk_to)+'->'+str(is_speech)+'->'+str(acc_sil))
        accumulated_frames.append(audio[offset:offset + n])
        if(chunk_from == chunk_to):
            chunk_from = offset/len(audio)*audio_length
        chunk_to = (offset+n)/len(audio)*audio_length
        print(str(is_speech)+str(acc_sil))
        if not is_speech:
            accu_sil += frame_duration_ms
            if accu_sil>0.29:
                if len(accumulated_frames)>100:
                    chunk_file_path = chunk_folder_path+fileName+"_{:03}".format(chunk_count)+'.wav'
                    write_wave(chunk_file_path,accumulated_frames,sample_rate)
                    snippets.append(Snippet(chunk_file_path, chunk_from, chunk_to))
                    print('Creating chunk from: '+ str(chunk_from)  +'to: '+ str(chunk_to) +': '+ chunk_file_path)
                    chunk_count = chunk_count + 1
                accumulated_frames = []
                chunk_from = chunk_to
        else:
            accu_sil = 0.0
        timestamp += duration
        offset += n
    print('eErrors: '+str(err_counter))
    return snippets

In [5]:
def read_file_lines(file_path):
    file = open(file_path,"r") 
    lines = file.readlines()
    return lines

In [None]:
def split_by_srt(srt_path, delta, chunk_path):
    lines = read_file_lines(srt_path)
    for line in lines:
        

In [None]:
if __name__=='__main__':
        
def read_subs(sub_path):
    with open(...) as f:
    for line in f:
        if len(line.strip())>0: