In [1]:
%matplotlib inline

# uncomment if ModuleNotFoundError is thrown
# import sys
# sys.path.append("/home/arnas/inovoice/repos/vits")

import os
from datetime import datetime
from pathlib import Path
from typing import List

from scipy.io.wavfile import write
from tqdm import tqdm

from src.audio.concat import concat_2d_array_audios_with_silence
from src.file import read_book
from src.model.config import Speaker, get_inference_configs
from src.model.synthesizer import Synthesizer
from src.srt import generate_and_save_audiobook_srt
from src.text.split import split_lines_to_sentences
from src.model.config import InferenceConfig


In [2]:
def synthesize_book(config: InferenceConfig, chapters, out_dir: Path):
    for idx, chapter in enumerate(chapters, start=1):
        chapter_sentences = split_lines_to_sentences(chapter)

        print(f"Synthesizing chapter {idx}/{len(chapters)}")
        audios = synthesize_chapter(chapter_sentences, config.synthesizer)

        audiobook = concat_2d_array_audios_with_silence(audios,
                                                        silence_between_dim1=silence_between_sentences,
                                                        silence_between_dim2=silence_between_paragraphs,
                                                        sr=config.synthesizer.sample_rate)

        write(f"{out_dir}/{idx}.wav", config.synthesizer.sample_rate, audiobook)

        generate_and_save_audiobook_srt(audios, chapter_sentences, config.synthesizer.sample_rate, silence_between_sentences,
                                                  silence_between_paragraphs, Path(f"{out_dir}/{idx}.srt"))

def synthesize_chapter(chapter_sentences, synthesizer: Synthesizer):
    return [
        [synthesizer.synthesize(sentence).audio for sentence in sentences if sentence]
        for sentences in tqdm(chapter_sentences)
    ]

def load_book_chapters(input_dir: Path,) -> List[List[str]]:
    filepaths = [Path(input_dir) / file for file in os.listdir(input_dir)]

    return [read_book(filepath) for filepath in filepaths]

In [None]:
configs = get_inference_configs(speakers=[Speaker.GIEDRIUS_STUDIO_44, Speaker.AURIMAS_STUDIO_44, Speaker.MILDA_STUDIO_44],
                                device=0, audiobook_synthesis=True)

In [4]:
silence_between_sentences = 0.5
silence_between_paragraphs = 1.5

In [5]:
input_base_dir = Path("/home/arnas/inovoice/data/text/audiobooks")
output_base_dir = Path("/home/arnas/inovoice/repos/vits/files/audio/audiobooks")
# book_name = "teka-upe-pro-sali"
book_name = "monologas-savam-kieme"
chapters = load_book_chapters(input_dir=input_base_dir / f"{book_name}-chapters")

In [None]:
for speaker, config in configs.items():
    print(f"Using `{speaker.value}` synthesizer")
    out_book_name = f'{book_name}_{datetime.now().strftime("%Y-%d-%m_%H-%M-%S")}'
    output_dir = output_base_dir / speaker.value / str(config.checkpoint_step) / out_book_name
    output_dir.mkdir(parents=True, exist_ok=True)

    synthesize_book(config, chapters, output_dir)