In [4]:
import pandas as pd
import wespeaker
from pathlib import Path

In [5]:
LANG = "english"
DAILYTALK_DIR = Path("../data/dailytalk/")

In [6]:
model = wespeaker.load_model(LANG)



In [18]:
import wave
import audioop

def merge_wav_files(file_list, output_file):
    """
    Merges multiple .wav files into a single .wav file, converting them to match the
    parameters of the first file if they differ.
    
    Parameters:
    - file_list: List of strings, paths to .wav files to be merged.
    - output_file: String, path where the merged output .wav file will be saved.
    """
    if not file_list:
        raise ValueError("The file list is empty")

    # Open the first file to get parameters
    with wave.open(file_list[0], 'rb') as wave_read:
        params = wave_read.getparams()

    # Create the output file
    with wave.open(output_file, 'wb') as wave_write:
        wave_write.setparams(params)
        
        # Read data from each file and append to the output file
        for file_path in file_list:
            with wave.open(file_path, 'rb') as wave_read:
                data = wave_read.readframes(wave_read.getnframes())

                # If parameters do not match, resample or convert
                if wave_read.getparams() != params:
                    # Convert audio to mono if required
                    if wave_read.getnchannels() != params.nchannels:
                        data = audioop.tomono(data, wave_read.getsampwidth(), 1, 1)
                    
                    # Resample if sample rate differs
                    if wave_read.getframerate() != params.framerate:
                        data, _ = audioop.ratecv(data, wave_read.getsampwidth(), params.nchannels,
                                                 wave_read.getframerate(), params.framerate, None)
                
                wave_write.writeframes(data)

    print(f"All files have been merged into {output_file}")

# Example usage:
# merge_wav_files(['file1.wav', 'file2.wav', 'file3.wav'], 'output_merged.wav')


In [19]:
import re

def natural_sort_key(path):
    """
    Obtain a sort key that is a mix of integers and non-integer substrings.
    
    Parameters:
    - path: Path, the file path to be sorted naturally.
    
    Returns:
    - A tuple with mixed types of integers and strings, suitable for sorting.
    """
    parts = re.split(r'(\d+)', str(path))
    return [int(part) if part.isdigit() else part for part in parts]

def sort_paths(paths):
    """
    Sorts a list of POSIX paths using natural sort order.
    
    Parameters:
    - paths: List of POSIX paths.
    
    Returns:
    - List of sorted POSIX paths.
    """
    # Convert all paths to Path objects if not already and filter out non .wav files
    paths = [Path(p) for p in paths if p.suffix == '.wav']
    
    # Sort paths using the natural sort key
    paths_sorted = sorted(paths, key=natural_sort_key)
    
    return paths_sorted

In [20]:
dialog_id = 0
dialog_dir = DAILYTALK_DIR / "data" / str(dialog_id)
dialog_wav_fpaths = sort_paths(list(dialog_dir.glob("*.wav")))
dialog_wav_fpaths

[PosixPath('../data/dailytalk/data/0/0_1_d0.wav'),
 PosixPath('../data/dailytalk/data/0/1_0_d0.wav'),
 PosixPath('../data/dailytalk/data/0/2_1_d0.wav'),
 PosixPath('../data/dailytalk/data/0/3_0_d0.wav'),
 PosixPath('../data/dailytalk/data/0/4_1_d0.wav'),
 PosixPath('../data/dailytalk/data/0/5_0_d0.wav'),
 PosixPath('../data/dailytalk/data/0/6_1_d0.wav'),
 PosixPath('../data/dailytalk/data/0/7_0_d0.wav'),
 PosixPath('../data/dailytalk/data/0/8_1_d0.wav'),
 PosixPath('../data/dailytalk/data/0/9_0_d0.wav'),
 PosixPath('../data/dailytalk/data/0/10_1_d0.wav'),
 PosixPath('../data/dailytalk/data/0/11_0_d0.wav')]

In [21]:
merge_wav_files(list(map(str, dialog_wav_fpaths)), f'{dialog_id}_merged.wav')

All files have been merged into 0_merged.wav


In [22]:
diar_result = model.diarize(f'{dialog_id}_merged.wav')
 # 5. merged segments
        # [[utt, ([begin, end, label], [])], [utt, ([], [])]]
diar_result

100%|██████████| 1/1 [00:19<00:00, 19.37s/it]


[('unk', 0.194, 1.374, 0),
 ('unk', 1.698, 3.068, 1),
 ('unk', 3.714, 4.314, 1),
 ('unk', 4.546, 5.146, 0),
 ('unk', 5.506, 8.636, 1),
 ('unk', 9.25, 10.78, 0),
 ('unk', 11.074, 12.984, 1),
 ('unk', 13.506, 15.166, 0),
 ('unk', 15.362, 16.542, 1),
 ('unk', 16.994, 17.494, 0),
 ('unk', 17.858, 18.968, 1),
 ('unk', 19.362, 21.432, 0),
 ('unk', 21.698, 24.378, 1)]