In [None]:
import logging
from datetime import datetime

current_file_name = "6_2_Remove_Pauses"

dt_string = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = f"logs/{current_file_name}/{dt_string}.log"
logging.basicConfig(level=logging.INFO, filename=log_file,filemode="w", format="%(asctime)s %(levelname)s %(message)s")

# https://blog.sentry.io/logging-in-python-a-developers-guide/

In [None]:
from pydub import AudioSegment
from pydub.silence import detect_silence
import pandas as pd
import os

In [None]:
from helpers.pages import *
from helpers.constants import *
from helpers.utils import *

In [None]:
pd.set_option('display.max_columns', 500)

In [None]:
def get_dict_of_paths(root_path):
    # There are folders in the root path named after the respondents
    # Each of these folders contains the audio files in aac/wav format
    # Create dictionary with the paths to the audio files, where the key is subfolder name and the value is the list of audio files

    dict_of_paths = {}
    for root, dirs, files in os.walk(root_path):
        if len(files) > 0:
            # Only keep the audio files
            files = [f for f in files if f.endswith('.wav')]
            # Full path to the audio files
            files = [os.path.join(root, f) for f in files]
            
            folder_name = root.split("\\")[-1]
            dict_of_paths[folder_name] = files
    return dict_of_paths

In [None]:
extracted_recordings_fg_path = "data\\6_Elaborations_Extraction\\FG"
extracted_recordings_h_path = "data\\6_Elaborations_Extraction\\H"

In [None]:
fg_paths = get_dict_of_paths(extracted_recordings_fg_path)
h_paths = get_dict_of_paths(extracted_recordings_h_path)

In [None]:
@timer
def remove_silence_and_save_chunks(input_wav, output_dir, silence_thresh=-50, min_silence_len=2000, padding=100, min_chunk_len=500):
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Load the audio file
    audio = AudioSegment.from_wav(input_wav)
    
    # Detect silent chunks [(start, end), (start, end), ...]
    silent_ranges = detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)

    # Get file name without extension
    file_name = os.path.splitext(os.path.basename(input_wav))[0]

    # If no silence is detected, just save the original audio as a single chunk if it's long enough
    if not silent_ranges:
        if len(audio) >= min_chunk_len:
            chunk_name = os.path.join(output_dir, f"{file_name}_chunk_0.wav")
            audio.export(chunk_name, format="wav")
            df = pd.DataFrame(columns=["chunk_name", "start", "end"])
            df.loc[0] = [chunk_name, 0, len(audio)]
        else:
            df = pd.DataFrame(columns=["chunk_name", "start", "end"])
        return df

    # Convert silent ranges to start-end pairs with padding
    silent_ranges = [(start - padding, end + padding) for start, end in silent_ranges]
    silent_ranges = [(max(0, start), min(len(audio), end)) for start, end in silent_ranges]

    # Create a DataFrame for the timestamps
    df = pd.DataFrame(columns=["chunk_name", "start", "end"])

    # Initialize variables
    previous_end = 0
    chunk_index = 0

    for start, end in silent_ranges:
        # Extract non-silent part before the silence
        chunk = audio[previous_end:start]
        if len(chunk) >= min_chunk_len:
            chunk_name = os.path.join(output_dir, f"{file_name}_chunk_{chunk_index}.wav")
            chunk.export(chunk_name, format="wav")
            df.loc[chunk_index] = [chunk_name, previous_end, start]
            chunk_index += 1
        
        previous_end = end
    
    # Save the last chunk after the last silent segment if it's long enough
    chunk = audio[previous_end:]
    if len(chunk) >= min_chunk_len:
        chunk_name = os.path.join(output_dir, f"{file_name}_chunk_{chunk_index}.wav")
        chunk.export(chunk_name, format="wav")
        df.loc[chunk_index] = [chunk_name, previous_end, len(audio)]

    # Save the DataFrame with timestamps
    df.to_csv(os.path.join(output_dir, f"{file_name}_removed_silence_timestamps.csv"), index=False)

    return df

In [None]:
@timer
def process_recordings(dict_of_paths):
    for respondent, paths in dict_of_paths.items():
        for path in paths:
            print(f"Processing {path}")
            output_dir = path.replace("6_Elaborations_Extraction", current_file_name)[:-4]
            os.makedirs(output_dir, exist_ok=True)
            df_removed_silence = remove_silence_and_save_chunks(path, output_dir)
            logging.info(f"Removed silence from {path} and saved the chunks in {output_dir}")

In [None]:
process_recordings(fg_paths)

In [None]:
process_recordings(h_paths)