## 處理 Knowledge Dataset 的資料

In [None]:
import pandas as pd

excel_file = "dataset/knowledgeDataset.xlsx"
csv_file = "dataset/knowledgeDataset.csv"

# Display the sheet names in the Excel file
excel_sheets = pd.ExcelFile(excel_file, engine="openpyxl").sheet_names
print(f"Excel 文件中的工作表: {excel_sheets}")

for sheet in excel_sheets:
    print(f"正在處理工作表: {sheet}")
    if sheet != "Training wav":
        csv_file = f"dataset/{sheet}.csv"
    else:
        csv_file = f"dataset/transcripts.csv"
    df = pd.read_excel(excel_file, engine="openpyxl", sheet_name=sheet)
    df.to_csv(csv_file, index=False, encoding="utf-8")
    print(f"Excel 轉換為 CSV 完成，存為 {csv_file}")

## 處理 Training wav

In [1]:
from pyannote.audio import Pipeline
from pydub import AudioSegment
from dotenv import load_dotenv
import os

load_dotenv()

DATASET_ADUIO_NAME = "dataset/training.wav"
audio = AudioSegment.from_wav(DATASET_ADUIO_NAME)
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=os.getenv("HUGGINGFACE_TOKEN"))
diarization = pipeline(DATASET_ADUIO_NAME)

times = []

count = 1

os.makedirs("tmp", exist_ok=True)
for turn, _, speaker in diarization.itertracks(yield_label=True):
    start_ms = int(turn.start * 1000) 
    end_ms = int(turn.end * 1000)
    segment = audio[start_ms:end_ms]
    filename = f"tmp/s_{count}_{speaker}_{start_ms}_{end_ms}.wav"
    count += 1
    segment.export(filename, format="wav")
    times.append((speaker, start_ms, end_ms))

  from .autonotebook import tqdm as notebook_tqdm
INFO:speechbrain.utils.quirks:Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32]
INFO:speechbrain.utils.quirks:Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): []
  std = sequences.std(dim=-1, correction=1)


In [None]:
import os
from pydub import AudioSegment

audio_dir = "./tmp"

file_list = [f for f in os.listdir(audio_dir) if f.endswith(".wav")]
file_list.sort(key=lambda x: int(x.split("_")[1]))

merged_segments = []
current_speaker = None
current_start = None
current_end = None
current_audio = None

speaker_map = {
    "SPEAKER00": "Martin",
    "SPEAKER01": "Ivan",
    "SPEAKER02": "Daisy",
    "SPEAKER03": "Lisa",
}

for file_name in file_list:
    _, _, speaker, idx, start_ms, end_ms = file_name.replace(".wav", "").split("_")
    speaker += idx
    start_ms, end_ms = int(start_ms), int(end_ms)
    
    if speaker == current_speaker:
        current_end = end_ms
        segment = AudioSegment.from_file(os.path.join(audio_dir, file_name))
        current_audio += segment
    else:
        if current_speaker is not None:
            merged_segments.append({
                "speaker": speaker_map[current_speaker],
                "start_ms": current_start,
                "end_ms": current_end,
                "audio": current_audio
            })
        current_speaker = speaker
        current_start = start_ms
        current_end = end_ms
        current_audio = AudioSegment.from_file(os.path.join(audio_dir, file_name))

if current_speaker is not None:
    merged_segments.append({
        "speaker": current_speaker,
        "start_ms": current_start,
        "end_ms": current_end,
        "audio": current_audio
    })

time_sequence = []
output_dir = "dataset/train_split_audio"
os.makedirs(output_dir, exist_ok=True)

for i, segment in enumerate(merged_segments, 1):
    output_file = os.path.join(output_dir, f"{i}_{segment['speaker']}_{segment['start_ms']}_{segment['end_ms']}.wav")
    segment["audio"].export(output_file, format="wav")
    time_sequence.append({
        "speaker": segment["speaker"],
        "start": segment["start_ms"],
        "end": segment["end_ms"],
        "file": output_file
    })

print("合併後的時間序列：")
for seq in time_sequence:
    print(f"Speaker: {seq['speaker']}, Start: {seq['start']} ms, End: {seq['end']} ms, File: {seq['file']}")

合併後的時間序列：
Speaker: Daisy, Start: 891 ms, End: 11877 ms, File: merged_audio/1_Daisy_891_11877.wav
Speaker: Martin, Start: 13412 ms, End: 20854 ms, File: merged_audio/2_Martin_13412_20854.wav
Speaker: Lisa, Start: 23369 ms, End: 37814 ms, File: merged_audio/3_Lisa_23369_37814.wav
Speaker: Daisy, Start: 39788 ms, End: 48141 ms, File: merged_audio/4_Daisy_39788_48141.wav
Speaker: Ivan, Start: 49626 ms, End: 51702 ms, File: merged_audio/5_Ivan_49626_51702.wav
Speaker: Daisy, Start: 53187 ms, End: 64628 ms, File: merged_audio/6_Daisy_53187_64628.wav
Speaker: Ivan, Start: 67294 ms, End: 80423 ms, File: merged_audio/7_Ivan_67294_80423.wav
Speaker: Daisy, Start: 82245 ms, End: 91240 ms, File: merged_audio/8_Daisy_82245_91240.wav
Speaker: Martin, Start: 92742 ms, End: 95847 ms, File: merged_audio/9_Martin_92742_95847.wav
Speaker: Daisy, Start: 97214 ms, End: 99947 ms, File: merged_audio/10_Daisy_97214_99947.wav
Speaker: Martin, Start: 100234 ms, End: 100605 ms, File: merged_audio/11_Martin_10023