In [1]:
%matplotlib inline
from pathlib import Path

from scipy.io.wavfile import write

import utils
from src.audio import concatenate_book_audios_with_silence
from src.file import read_book
from src.srt import generate_and_save_audiobook_srt
from src.synthesize import synthesize_book
from src.text import split_book_lines_to_sentences
from models import SynthesizerTrn
from text.symbols import stressed_symbols


In [2]:
checkpoint_step = '150000'
speaker = 'aurimas_nausedas'
sample_rate = 22050

silence_between_sentences = 0.0 # TODO: [optional? would complicate the generation of srts] pick silence duration from a distribution (e.g. mean - 1.0s, lower bound - 0.5s, upper - 1.5s)
silence_between_paragraphs = 1.0

book_path = "/media/arnas/SSD Disk/inovoice/text_data/synthesis-audiobooks/processed/Kur_vasara_amžina_stressed.txt"

output_dir = f"/home/arnas/Desktop/tdi/bitbucket/vits/files/audio/audiobooks/{speaker}/{checkpoint_step}"
Path(output_dir).mkdir(parents=True, exist_ok=True)
output_filename = f'kur_vasara_amzina_{speaker}_{checkpoint_step}'

# checkpoint_filepath = f"/media/arnas/SSD Disk/inovoice/models/text-to-speech/vits/{speaker}/logs/G_{checkpoint_step}.pth"
checkpoint_filepath = f"/media/arnas/SSD Disk/inovoice/models/text-to-speech/vits/{speaker}/G_{checkpoint_step}.pth"

hps = utils.get_hparams_from_file("./configs/mif_stressed.json")

In [3]:
net_g = SynthesizerTrn(
    len(stressed_symbols),
    hps.data.filter_length // 2 + 1,
    hps.train.segment_size // hps.data.hop_length,
    **hps.model).cuda()
_ = net_g.eval()
_ = utils.load_checkpoint_for_inference(checkpoint_filepath, net_g)

[2022-06-08 17:10:30,593] INFO:PID-35393:MainThread:utils: Loading checkpoint at /media/arnas/SSD Disk/inovoice/models/text-to-speech/vits/aurimas_nausedas/G_150000.pth


[INFO] Loading checkpoint at /media/arnas/SSD Disk/inovoice/models/text-to-speech/vits/aurimas_nausedas/G_150000.pth


[2022-06-08 17:10:32,302] INFO:PID-35393:MainThread:utils: Loaded checkpoint '/media/arnas/SSD Disk/inovoice/models/text-to-speech/vits/aurimas_nausedas/G_150000.pth'


[INFO] Loaded checkpoint '/media/arnas/SSD Disk/inovoice/models/text-to-speech/vits/aurimas_nausedas/G_150000.pth'


In [4]:
book = read_book(book_path)

In [5]:
book_sentences = split_book_lines_to_sentences(book)

In [6]:
audios = synthesize_book(book_sentences, net_g, hps)

100%|██████████| 1323/1323 [05:01<00:00,  4.38it/s]


In [7]:
audiobook = concatenate_book_audios_with_silence(audios,
                                                 sentence_silence=silence_between_sentences,
                                                 paragraph_silence=silence_between_paragraphs)

In [8]:
entries = generate_and_save_audiobook_srt(audios, book_sentences, sample_rate, silence_between_sentences, silence_between_paragraphs, Path(f"{output_dir}/{output_filename}.srt"))

In [9]:
write(f"{output_dir}/{output_filename}.wav", sample_rate, audiobook)


In [10]:
for i, l in enumerate(book_sentences):
    for s in l:
        if "Òlga nuskrìdo į̃ Per̃mę, paskuĩ važiãvo tráukiniu " in s:
            print(i)

125
