In [4]:
import csv
import os
import random
import subprocess
from pathlib import Path
from piper import PiperVoice
import wave

In [15]:
MODEL_GOSIA = r"C:/Users/Admin/Downloads/Posts.csv/pl_PL-gosia-medium.onnx"

MODEL_DARK = r"C:/Users/Admin/Downloads/Posts.csv/pl_PL-darkman-medium.onnx"

MODEL_MC = r"C:/Users/Admin/Downloads/Posts.csv/pl_PL-mc_speech-medium.onnx"

INPUT_DIR = r"C:/Users/Admin/Downloads/Posts.csv"
CSV_FILES = {
    "train": os.path.join(INPUT_DIR, "train.csv"),
    "dev":   os.path.join(INPUT_DIR, "dev.csv"),
    "test":  os.path.join(INPUT_DIR, "test.csv"),
}

OUT_BASE = r"C:/Users/Admin/Downloads/piper_out/Posts.csv"
os.makedirs(OUT_BASE, exist_ok=True)

SPEED_MIN = 0.90
SPEED_MAX = 1.12
REVERB_PROB = 0.18   

In [None]:
def ffmpeg_change_speed_and_reverb(in_wav, out_wav, speed=1.0, add_reverb=False):
    filters = []
    if abs(speed - 1.0) > 0.001:
        filters.append(f"atempo={speed:.3f}")
    if add_reverb:
        filters.append("aecho=0.8:0.9:1000:0.3")
    filter_str = ",".join(filters) if filters else None

    cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-i", in_wav]
    if filter_str:
        cmd += ["-af", filter_str]
    cmd += [out_wav]
    subprocess.run(cmd, check=True)

In [20]:
def synthesize_with_piper(voice_obj, text, tmp_path):
    if os.path.exists(tmp_path):
        os.remove(tmp_path)
    with wave.open(tmp_path, "wb") as f:
        voice_obj.synthesize_wav(text, f)

In [None]:
def process_split(split_name, csv_path, out_split_dir,
                  voice_gosia, voice_dark, voice_mc):
    os.makedirs(out_split_dir, exist_ok=True)
    out_rows = []
    with open(csv_path, encoding="utf-8") as f:
        reader = csv.DictReader(f, delimiter="|")
        for i, row in enumerate(reader, start=1):
            utt_id = row["utt_id"]
            speaker = row.get("speaker", "male").lower()
            text = row["text"]

            if speaker == "male":
                chosen = random.choice(["dark", "mc"])
                voice_obj = voice_dark if chosen == "dark" else voice_mc
            else:
                voice_obj = voice_gosia

            tmp_wav = os.path.join(out_split_dir, f"{utt_id}.tmp.wav")
            final_wav = os.path.join(out_split_dir, f"{utt_id}.wav")

            synthesize_with_piper(voice_obj, text, tmp_wav)

            speed = random.uniform(SPEED_MIN, SPEED_MAX)
            add_reverb = (random.random() < REVERB_PROB)

            if abs(speed - 1.0) < 0.001 and not add_reverb:
                os.replace(tmp_wav, final_wav)
            else:
                ffmpeg_change_speed_and_reverb(tmp_wav, final_wav, speed=speed, add_reverb=add_reverb)
                os.remove(tmp_wav)


            out_rows.append({
                "utt_id": utt_id,
                "wav_path": final_wav,
                "speaker": speaker,
                "text": text
            })

    manifest_path = os.path.join(OUT_BASE, f"{split_name}_manifest.csv")
    with open(manifest_path, "w", encoding="utf-8", newline="") as outf:
        writer = csv.DictWriter(outf, fieldnames=["utt_id","wav_path","speaker","text"], delimiter="|")
        writer.writeheader()
        for r in out_rows:
            writer.writerow(r)
    print(f"[{split_name}] Zapisano {len(out_rows)} plików. Manifest: {manifest_path}")
    return manifest_path



In [22]:
voice_gosia = PiperVoice.load(MODEL_GOSIA)
voice_dark  = PiperVoice.load(MODEL_DARK)
voice_mc    = PiperVoice.load(MODEL_MC)

for split, csvpath in CSV_FILES.items():
    out_dir = os.path.join(OUT_BASE, split)
    process_split(split, csvpath, out_dir, voice_gosia, voice_dark, voice_mc)

[train] Zapisano 1168 plików. Manifest: C:/Users/Admin/Downloads/piper_out/Posts.csv\train_manifest.csv
[dev] Zapisano 146 plików. Manifest: C:/Users/Admin/Downloads/piper_out/Posts.csv\dev_manifest.csv
[test] Zapisano 146 plików. Manifest: C:/Users/Admin/Downloads/piper_out/Posts.csv\test_manifest.csv
