In [None]:
import os
import torch
import transformers
from TTS.api import TTS # only in v0.22
from TTS.tts.models.vits import Vits
from TTS.tts.models.xtts import Xtts

from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.configs.vits_config import VitsConfig

from TTS.tts.utils.synthesis import synthesis
from TTS.utils.audio import AudioProcessor
from TTS.tts.utils.text.tokenizer import TTSTokenizer

from neon_tts_plugin_coqui import CoquiTTS as neonTTS
from IPython.display import Audio
from scipy.io import wavfile
import numpy as np

from pydub import AudioSegment

In [None]:
irish_lyrics = """
Bhí loch ag mo sheanmháthair,
Áit ina raibh na lachain ag snámh,
Le héadain bhána geal,
Is cluimhreach chomh bog le scamall.

Ó, a lachain álainn, a sheod,
Le do ghlór binn i gconaí ag glaoch,
I do loch ghlé geal,
Agus tú chomh saor le gaoth.

Sa mhaidin chiúin go moch,
Bhí an lacha ag éirí as a suan,
Le heireabaill ag crith,
Is a sciatháin ag sracadh an uisce.

Ó, a lachain álainn, a sheod,
Le do ghlór binn i gconaí ag glaoch,
I do loch ghlé geal,
Agus tú chomh saor le gaoth.

Nuair a tháinig an tráthnóna,
Bhí na lachain fós ann,
Le spraoi is súgradh leo,
Agus an ghrian ag dul faoi chiúin.

Ó, a lachain álainn, a sheod,
Le do ghlór binn i gconaí ag glaoch,
I do loch ghlé geal,
Agus tú chomh saor le gaoth.

Anois tá cuimhne agam ort,
A lachain mo sheanmháthar,
Áit álainn ar domhan,
Nach n-imeoidh uaim go bráth.

Ó, a lachain álainn, a sheod,
Le do ghlór binn i gconaí ag glaoch,
I do loch ghlé geal,
Agus tú chomh saor le gaoth
"""

dutch_lyrics = """
Zooals ik eenmaal beminde,
Zoo minde er op aarde nooit een,
Maar 'k vond, tot wien ik mij wendde,
Slechts harten van ijs en van steen.

Toen stierf mijn geloof aan de vriendschap,
Mijn hoop en mijn liefde verdween,
En zooals mijn hart toen haatte,
Zoo haatte er op aarde nooit een.

En sombere, bittere liederen
Zijn aan mijn lippen ontgleên;
Zoo somber en bitter als ik zong,
Zoo zong er op aarde nooit een.

Verveeld heeft mij eindlijk dat haten,
Dat eeuwig gezang en geween,
Ik zweeg, en zooals ik nu zwijg,
Zoo zweeg er op aarde nooit een.
"""

# XTTS API v0.22.0

In [None]:
MODEL_PATH = "/media/koekiemonster/DATA-FAST/TTS/tts_models/multilingual/multi-dataset/XTTS-v2"
CONFIG_PATH = "/media/koekiemonster/DATA-FAST/TTS/tts_models/multilingual/multi-dataset/XTTS-v2/config.json"

_tts = TTS(progress_bar=True,
            model_path=MODEL_PATH,
            config_path=CONFIG_PATH)
_tts.to('gpu')
_tts.tts_to_file(text=dutch_lyrics, 
                 language="nl", 
                 speaker_wav="../assets/english_bram.wav", 
                 file_path="../artifacts/test_dutch.wav")

## Direct ViTS

In [None]:
MODEL_PATH = "/media/bramiozo/DATA-FAST/TTS/tts_models/multilingual/multi-dataset/tts-vits-cv-ga"
CONFIG_PATH = "/media/bramiozo/DATA-FAST/TTS/tts_models/multilingual/multi-dataset/tts-vits-cv-ga/config.json"

config = VitsConfig()
config.load_json(CONFIG_PATH)
ap = AudioProcessor.init_from_config(config)
tokenizer, config = TTSTokenizer.init_from_config(config)
model = Vits.init_from_config(config)
model.load_checkpoint(config, 
                      checkpoint_path=os.path.join(MODEL_PATH,"model_file.pth.tar"), 
                      eval=True, 
                      strict=False, 
                      cache=False)
model.ap=ap
model.tokenizer=tokenizer
model.cuda()


In [None]:
wav, alignment, _, _ = synthesis(
    model,
    irish_lyrics,
    config,
    style_wav="../assets/english_bram.wav",
    use_cuda=True
).values()

# Save the output waveform
ap.save_wav(wav, "../artifacts/test_irish.wav")

In [3]:
_neonTTS = neonTTS(lang="ga", config={})

2024-05-26 18:02:47.529 - OVOS - ovos_plugin_manager.g2p:create:142 - ERROR - The selected G2P plugin could not be loaded.
Traceback (most recent call last):
  File "/media/bramiozo/Storage1/bramiozo/VIRTUALENVS/Python/seanos-bFLQpzeS-py3.10/lib/python3.10/site-packages/ovos_plugin_manager/g2p.py", line 139, in create
    g2p = clazz(g2p_config)
TypeError: 'NoneType' object is not callable
2024-05-26 18:02:47.549 - OVOS - ovos_plugin_manager.templates.tts:__init__:205 - ERROR - G2P plugin not loaded, there will be no mouth movements
Traceback (most recent call last):
  File "/media/bramiozo/Storage1/bramiozo/VIRTUALENVS/Python/seanos-bFLQpzeS-py3.10/lib/python3.10/site-packages/ovos_plugin_manager/templates/tts.py", line 203, in __init__
    self.g2p = OVOSG2PFactory.create(cfg)
  File "/media/bramiozo/Storage1/bramiozo/VIRTUALENVS/Python/seanos-bFLQpzeS-py3.10/lib/python3.10/site-packages/ovos_plugin_manager/g2p.py", line 139, in create
    g2p = clazz(g2p_config)
TypeError: 'NoneType

  dtype = storage_type.dtype


In [None]:
result = _neonTTS.get_tts(irish_lyrics,
                         "../artifacts/test_irish.wav",
                          speaker={
                              "language": "ga"
                          })

In [4]:
wavresult = _neonTTS.get_audio(irish_lyrics,  audio_format="ipython")

 > Text splitted to sentences.
['Bhí loch ag mo sheanmháthair,', 'Áit ina raibh na lachain ag snámh,', 'Le héadain bhána geal,', 'Is cluimhreach chomh bog le scamall.', 'Ó, a lachain álainn, a sheod,', 'Le do ghlór binn i gconaí ag glaoch,', 'I do loch ghlé geal,', 'Agus tú chomh saor le gaoth.', 'Sa mhaidin chiúin go moch,', 'Bhí an lacha ag éirí as a suan,', 'Le heireabaill ag crith,', 'Is a sciatháin ag sracadh an uisce.', 'Ó, a lachain álainn, a sheod,', 'Le do ghlór binn i gconaí ag glaoch,', 'I do loch ghlé geal,', 'Agus tú chomh saor le gaoth.', 'Nuair a tháinig an tráthnóna,', 'Bhí na lachain fós ann,', 'Le spraoi is súgradh leo,', 'Agus an ghrian ag dul faoi chiúin.', 'Ó, a lachain álainn, a sheod,', 'Le do ghlór binn i gconaí ag glaoch,', 'I do loch ghlé geal,', 'Agus tú chomh saor le gaoth.', 'Anois tá cuimhne agam ort,', 'A lachain mo sheanmháthar,', 'Áit álainn ar domhan,', 'Nach n-imeoidh uaim go bráth.', 'Ó, a lachain álainn, a sheod,', 'Le do ghlór binn i gconaí ag glao

In [39]:
wavfile.write("../artifacts/test_irish.wav", rate=wavresult['rate'], data=np.array(wavresult['data']))

In [40]:
 # useful to know if we want to apply our custom finetuned model 
 #model_path = _neonTTS._download_huggingface('neongeckocom/tts-vits-cv-ga')

In [41]:
Audio(np.array(wavresult['data']), rate=wavresult['rate'], normalize=False, autoplay=True)

In [42]:
song_to_edit = AudioSegment.from_file('../artifacts/test_irish_reencoded.wav', format='wav')


In [43]:
def add_reverb(sound, decay_factor=0.1, delays=[50, 100, 150, 200, 400, 800, 1600, 3200]):
    output = sound
    for delay in delays:
        overlay = sound - (3 * delay)
        output = output.overlay(overlay, position=delay)
        decay_factor *= decay_factor  # Decay the reverb effect
    return output


In [44]:
song_edited = add_reverb(song_to_edit)
song_edited.export("../artifacts/test_reverb.wav", format="wav")

<_io.BufferedRandom name='../artifacts/test_reverb.wav'>