In [None]:
import os
import glob
import numpy as np
import soundfile as sf

In [None]:
WINDOW_SIZE_MS = 2000  # Adjust the window size as needed
OVERLAP = 0.25  # Adjust the overlap ratio as needed
SRC_FOLDER = '../data/cropped_data/cropped_interview_data/per_question'
DEST_FOLDER = f'../data/cropped_data/cropped_interview_data/per_question/windowed_{WINDOW_SIZE_MS}_{int(OVERLAP*WINDOW_SIZE_MS)}'

In [None]:
# Create the destination folder if it doesn't exist
os.makedirs(DEST_FOLDER, exist_ok=True)

# Get a list of all WAV files in the source folder
wav_files = glob.glob(os.path.join(SRC_FOLDER, '*.wav'))

In [None]:
# Iterate over each WAV file
for wav_file in wav_files:
    # Read the audio file
    audio, sample_rate = sf.read(wav_file)

    # Calculate the number of samples per window and overlap
    window_length = int(WINDOW_SIZE_MS / 1000 * sample_rate)
    overlap_length = int(window_length * OVERLAP)

    # Generate the windowed segments
    segments = []
    start = 0
    while start + window_length <= len(audio):
        segments.append(audio[start:start + window_length])
        start += overlap_length

    # Save the windowed segments as individual files
    base_name = os.path.splitext(os.path.basename(wav_file))[0]
    for i, segment in enumerate(segments):
        new_file_name = f"{base_name}_{i+1}.wav"
        new_file_path = os.path.join(DEST_FOLDER, new_file_name)
        sf.write(new_file_path, segment, sample_rate)
    print(f'Windowing finished for {base_name}')

print("Windowing complete. Windowed segments saved in the destination folder.")