In [1]:
%matplotlib inline

# uncomment if ModuleNotFoundError is thrown
import sys
sys.path.append("/home/arnas/inovoice/repos/vits")

import os
import time
from datetime import datetime
from pathlib import Path

from scipy.io.wavfile import write
from tqdm import tqdm

from src.audio.concat import concat_2d_array_audios_with_silence
from src.file import read_book
from src.model.config import Speaker, get_inference_configs
from src.model.synthesizer import Synthesizer
from src.srt import generate_and_save_audiobook_srt
from src.text.split import split_lines_to_sentences
from src.model.config import InferenceConfig

In [2]:
pauses_by_speaker = {
    Speaker.GIEDRIUS_BASE_44: (0.2, 0.7),

    Speaker.AURIMAS_AUDIOBOOK_22: (0.0, 0.5),
    Speaker.AURIMAS_AUDIOBOOK_44: (0.0, 0.5),

    Speaker.AURIMAS_ALTORIU_SESELY_22: (0.9, 1.8),
    Speaker.AURIMAS_ALTORIU_SESELY_44: (0.8, 1.8),

    Speaker.MILDA_STUDIO_22: (0.5, 1.5),
    Speaker.MILDA_STUDIO_44: (0.5, 1.5),
    Speaker.MILDA_TEKA_UPE_PRO_SALI_22: (1, 1.7),
    Speaker.MILDA_TEKA_UPE_PRO_SALI_44: (1, 1.7),
}

def synthesize_book_using_configs(in_dir, out_dir, synthesizer_configs, book_name):
    chapters = load_book_chapters(input_dir=in_dir / f"{book_name}-chapters")

    for speaker, config in synthesizer_configs.items():
        speaker_value = str(speaker) if not isinstance(speaker, Speaker) else speaker.value
        silence_between_sentences, silence_between_paragraphs = pauses_by_speaker[speaker]
        print(f"Using `{speaker_value}` synthesizer")
        print(f"Synthesizing {book_name}")
        out_book_name = f'{book_name}_{datetime.now().strftime("%Y-%d-%m_%H-%M-%S")}'
        output_dir = out_dir / speaker_value / str(config.checkpoint_step) / out_book_name
        output_dir.mkdir(parents=True, exist_ok=True)

        synthesize_book(config, chapters, output_dir, silence_between_sentences, silence_between_paragraphs)


def load_book_chapters(input_dir: Path, ) -> dict:
    filepaths = [Path(input_dir) / file for file in os.listdir(input_dir)]

    return {filepath.stem: read_book(filepath) for filepath in filepaths}


def synthesize_book(config: InferenceConfig, chapters: dict, out_dir: Path,
                    silence_between_sentences, silence_between_paragraphs):
    start_time = time.time()
    for idx, (chapter_name, chapter) in enumerate(chapters.items(), start=1):
        print(f"Synthesizing chapter `{chapter_name}` ({idx}/{len(chapters)})")

        chapter_sentences = split_lines_to_sentences(chapter)
        audios = synthesize_chapter(chapter_sentences, config.synthesizer)
        audiobook = concat_2d_array_audios_with_silence(audios,
                                                        silence_between_dim1=silence_between_sentences,
                                                        silence_between_dim2=silence_between_paragraphs,
                                                        sr=config.synthesizer.sample_rate)

        out_filename = out_dir / f"{config.synthesizer.model_name}_{chapter_name}"
        write(f"{out_filename}.wav", config.synthesizer.sample_rate, audiobook)

        generate_and_save_audiobook_srt(audios, chapter_sentences, config.synthesizer.sample_rate,
                                        silence_between_sentences,
                                        silence_between_paragraphs, Path(f"{out_filename}.srt"))

    print(f"Synthesis took {time.time() - start_time} seconds")


def synthesize_chapter(chapter_sentences, synthesizer: Synthesizer):
    return [
        [synthesizer.synthesize(sentence).audio for sentence in sentences if sentence]
        for sentences in tqdm(chapter_sentences)
    ]

In [3]:
input_base_dir = Path("/home/arnas/inovoice/tdi-files/antanukas-in-progress/segment")
output_base_dir = Path("/home/arnas/inovoice/repos/vits/files/audio/audiobooks")

In [None]:
configs = get_inference_configs(speakers=[Speaker.AURIMAS_ALTORIU_SESELY_22],
                                audiobook_synthesis=True, speed_multiplier=0.85,
                                device='gpu', cuda_device=1)

In [None]:
synthesize_book_using_configs(input_base_dir, output_base_dir, configs, "laiskai-lucilijui")