In [None]:
%pip install torch torchaudio silero

In [None]:
import os
import torch
import torchaudio
import pandas as pd
from silero import silero_tts

# ===== НАСТРОЙКИ =====
CSV_PATH = "./data/khakas_merged.csv"     # итоговый корпус с колонкой 'text'
OUT_DIR = "./data/khakas/audio"
os.makedirs(OUT_DIR, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
print("Device:", device)
language = 'kjh'                # или 'ru', если kjh не примет
model_id = 'v5_cis_base_nostress'
speaker = 'kjh_karina'          # или 'kjh_sibday'
sample_rate = 24000

# ===== ЗАГРУЗКА МОДЕЛИ =====
model, example_text = silero_tts(language=language,
                                 speaker=model_id)
model.to(device)

# ===== ЗАГРУЗКА КОРПУСА =====
df = pd.read_csv(CSV_PATH)

# на всякий случай уберём пустые / NaN
df = df.dropna(subset=["text"])
df["text"] = df["text"].astype(str).str.strip()
df = df[df["text"] != ""]

# если нужно ровно 9000:
# df = df.head(9000)

# ===== ЦИКЛ ПО СТРОКАМ CSV =====
for idx, text in enumerate(df["text"], start=1):
    audio = model.apply_tts(text=text,
                            speaker=speaker,
                            sample_rate=sample_rate)

    audio_tensor = torch.tensor(audio).unsqueeze(0)  # [1, T]
    out_path = os.path.join(OUT_DIR, f"{idx}.wav")
    torchaudio.save(out_path, audio_tensor, sample_rate)

    if idx % 100 == 0:
        print(f"Сгенерировано {idx} файлов")

print("done")
