#### Standard imports with Pydub

Pydub is a library for performing segmentation and other tasks on audio files

In [1]:
import os

import pandas as pd
import numpy as np

from pydub import AudioSegment

#### Storing paths

In [2]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Access the environment variables
train_ad = os.environ.get("TRAIN_AD")
train_cn = os.environ.get("TRAIN_CN")
segmentation_ad = os.environ.get("SEGMENTATION_AD")
segmentation_cn = os.environ.get("SEGMENTATION_CN")
final_segmented_train_ad = os.environ.get("FINAL_SEGMENTED_TRAIN_AD")
final_segmented_train_cn = os.environ.get("FINAL_SEGMENTED_TRAIN_CN")
test = os.environ.get("TEST_AUDIO")
segmentation_test = os.environ.get("SEGMENTATION_TEST")
final_segmented_test = os.environ.get("FINAL_SEGMENTED_TEST")

#### Segmenting training audios

Defining function for reading the segmentation csv file for the corresponding audio file

In [3]:
def load_segmentation_data(segmentation_file):

    return pd.read_csv(segmentation_file)

Defining function for extracting only the patient speech segment

In [13]:
def extract_patient_speech(audio_file, segmentation_data, output_path):
    # Load the audio file
    audio = AudioSegment.from_file(audio_file)

    # Initialize an empty segment to store the patient's speech
    final_audio_segment = AudioSegment.silent()

    # Store pointers to INV parts of the audio
    prev_end = 0

    # Iterate through each segment in the segmentation data
    for _, segment in segmentation_data.iterrows():
        start_time = int(segment['begin'])
        end_time = int(segment['end'])
        speaker = segment['speaker']

        if speaker == 'INV':
            if start_time == prev_end:
                # If continous INV segments, append to the previous INV segment
                prev_end = end_time

            else:
                # Ignoring the INV segment and appending audio till the start of this segment
                final_audio_segment += audio[prev_end:start_time]
                prev_end = end_time   


    # Append the audio after the last INV segment or if segmentation only had PAR segments
    final_audio_segment += audio[prev_end:]

    # Save the final audio segment to a new file
    output_file = os.path.join(output_path, os.path.basename(audio_file))
    final_audio_segment.export(output_file, format="wav")

    return output_file

In [14]:
output_directory = final_segmented_train_ad

for audio_file in os.listdir(train_ad):
    audio_file_path = train_ad + audio_file
    segmentation_path = segmentation_ad + audio_file[:-4] + ".csv"

    segmentation_data = load_segmentation_data(segmentation_path)
    output_file_path = extract_patient_speech(audio_file_path, segmentation_data, output_directory)
    print(f"Final segmented audio file saved at: {output_file_path}")

Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso024.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso025.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso027.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso028.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso031.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso032.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso033.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso035.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/ad/adrso

In [15]:
output_directory = final_segmented_train_cn

for audio_file in os.listdir(train_cn):
    audio_file_path = train_cn + audio_file
    segmentation_path = segmentation_cn + audio_file[:-4] + ".csv"

    segmentation_data = load_segmentation_data(segmentation_path)
    output_file_path = extract_patient_speech(audio_file_path, segmentation_data, output_directory)
    print(f"Final segmented audio file saved at: {output_file_path}")

Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso002.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso003.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso005.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso007.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso008.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso010.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso012.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso014.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-train/train/final_segmented_audio/cn/adrso

In [16]:
output_directory = final_segmented_test

for audio_file in os.listdir(test):
    audio_file_path = test + audio_file
    segmentation_path = segmentation_test + audio_file[:-4] + ".csv"

    segmentation_data = load_segmentation_data(segmentation_path)
    output_file_path = extract_patient_speech(audio_file_path, segmentation_data, output_directory)
    print(f"Final segmented audio file saved at: {output_file_path}")

Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt1.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt10.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt11.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt12.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt13.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt14.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt15.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt16.wav
Final segmented audio file saved at: Data/ADReSSo21-diagnosis-test/test-dist/final_segmented_audio/adrsdt