In [1]:
## Install Coqui TTS
# ! pip install -U pip
# ! pip install TTS

In [1]:
from pathlib import Path
import torch
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts

import librosa
from scipy.io.wavfile import write

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_model(model_path, config_path):
    config = XttsConfig()
    config.load_json(config_path)
    model = Xtts.init_from_config(config)
    model.load_checkpoint(config, checkpoint_dir=model_path, eval=True)
    return model, config

In [3]:

device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
!pwd

/srv/storage/talc2@talc-data2.nancy/multispeech/calcul/users/sogun/AfriSpeech-TTS/notebooks


In [5]:
def generate_data(models_dict, data, dst_folder, 
                  sentences, noise_scale=0.667, 
                  noise_scale_w=0.8):
    
    assert len(sentences) > 1
    for model_name in models:
        model_path, config_path = models[model_name]
        
        metadata_full_path = os.path.join(dst_folder, f"{model_name}.txt")
        os.makedirs(os.path.dirname(metadata_full_path), exist_ok=True)
        
        with open(metadata_full_path, "w+", encoding="utf-8") as f1:
            model, config = load_model(model_path, config_path)
            model = model.to(device)
            
            for i, item in data.iterrows():
                audio_path = Path(item.audio_paths).stem
                
                # stn_tst = get_text(item.transcript, hps)
                dst_path = os.path.join(dst_folder, model_name, audio_path+".wav")
                

                os.makedirs(os.path.dirname(dst_path), exist_ok=True)

                sentence_idx = i%len(sentences)
                transcript = sentences[sentence_idx]
                
                # transcript = tts_cleaner(transcript)
                
                speaker_id = int(item.user_ids_num)

                if os.path.exists(dst_path): 
                    sav_path = f"{dst_path}|{transcript}|{item.country}|{item.accent}|{str(speaker_id)}|{str(sentence_idx)}" #.encode('ascii', 'ignore').decode('ascii')
                    print(sav_path, file=f1)
                    continue
                
                with torch.no_grad():
                    # print(os.path.join(main_dir, "../" "afrispeech_16k_trimmed", item.audio_paths[1:]))
                    fap = item.audio_paths[1:].replace("AfriSpeech-TTS-D",  "AfriSpeech-TTS")
                    speaker_wav = os.path.join(main_dir, "../" "afrispeech_16k_trimmed", fap)
                    
                    audio = model.synthesize(
                        transcript,
                        config,
                        speaker_wav=speaker_wav,
                        gpt_cond_len=3,
                        language="en",
                    )["wav"]

                    audio = librosa.resample(audio, orig_sr=24000, target_sr=16000)
                    
                    write(dst_path, 16000, audio)
                    sav_path = f"{dst_path}|{transcript}|{item.country}|{item.accent}|{str(speaker_id)}|{str(sentence_idx)}" #.encode('ascii', 'ignore').decode('ascii')
                    print(sav_path, file=f1)
                    audio = None
                    sid = None
                
    print("Finished.")

In [6]:
import os

os.getcwd()

'/srv/storage/talc2@talc-data2.nancy/multispeech/calcul/users/sogun/AfriSpeech-TTS/notebooks'

In [7]:
import pandas as pd

main_dir = os.getcwd()
             
test_seen = pd.read_csv(os.path.join(main_dir, "..", "data/afritts-test-seen-clean.csv"))

#using general domain text with speakers in afrispeech-200 dataset
text_data = pd.read_csv(os.path.join(main_dir, "..", "data/intron-dev-public-tts-eval.csv"))
sentences = list(text_data.transcript_norm.values)

In [8]:
# # run on cpu if it fails due to memory
models = {
    # "xtts": (os.path.join(main_dir, "../", "src/vits/AfriSpeech-Models/xtts"), 
    #          os.path.join(main_dir, "../", "src/vits/AfriSpeech-Models/xtts_ft/config.json")
    #         ),
    "xtts_ft": (os.path.join(main_dir, "../", "src/vits/AfriSpeech-Models/xtts_ft"), 
             os.path.join(main_dir, "../", "src/vits/AfriSpeech-Models/xtts_ft/config.json")
            ), # iteration 90951
    }

dst_folder = os.path.join(main_dir, "../", "src/vits/afritts_test_seen")

generate_data(models, test_seen, dst_folder, sentences,)

Finished.
