In [10]:
import os
from pydub import AudioSegment
from textgrid import TextGrid, IntervalTier, Interval
import glob

In [11]:
def extract_segments(wav_path, tg_path, output_dir, pause_threshold=1000, buffer_time=500):
    audio = AudioSegment.from_wav(wav_path)

    tg = TextGrid.fromFile(tg_path)

    base_path = wav_path.split('.')[0]
    
    base_path = base_path.replace('data_segmented/', '')

    tier = tg[1]

    last_end = 0
    segment_index = 0
    total_duration = len(audio)

    ## Check if output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)


    for interval in tier:
        if interval.mark == "#" and interval.duration() > (pause_threshold/1000):

            start_time = max(0, int((last_end * 1000) - buffer_time))
            end_time = min(total_duration, int((interval.minTime * 1000) + buffer_time))
            

            segment = audio[start_time:end_time]

            segment_name = f"{base_path}_{segment_index}.wav"
            segment_path = os.path.join(output_dir, segment_name)
            segment.export(segment_path, format="wav")

            tg_segment = TextGrid()
            new_tier = IntervalTier(name=tier.name, minTime=0, maxTime=(end_time-start_time)/1000.0)
            for intv in tier.intervals:
                intv_start = intv.minTime * 1000
                intv_end = intv.maxTime * 1000
                if intv_end > start_time and intv_start < end_time:
                    adjusted_start = max(intv_start - start_time, 0) / 1000.0
                    adjusted_end = min(intv_end - start_time, end_time - start_time) / 1000.0
                    new_interval = Interval(adjusted_start, adjusted_end, intv.mark)
                    new_tier.addInterval(new_interval)
            tg_segment.append(new_tier)
            tg_segment_name = f"{base_path}_{segment_index}.TextGrid"
            tg_segment_path = os.path.join(output_dir, tg_segment_name)
            tg_segment.write(tg_segment_path)

            segment_index += 1
            last_end = interval.maxTime

In [12]:
output_directory = "data_out/"
for tg in glob.glob("data_segmented/*.textgrid"):
    tg_file = tg
    wav_file = tg.split('.')[0] + '.wav'
    extract_segments(wav_file, tg_file, output_directory, pause_threshold=1000, buffer_time=250)