# Tortoise! 🐢🐢🐢🐢


Made by [Artem Konevskikh](https://aiculedssul.net/)

Based on [Tortoise TTS](https://github.com/neonbjb/tortoise-tts)

In [None]:
#@title GPU Check
#@markdown You can check which GPU you got. V100 is perfect, P100 and T4 is good, and the K80 is the weakest one
!nvidia-smi -L

In [None]:
#@title Install
# the scipy version packaged with colab is not tolerant of misformated WAV files.
# install the latest version.
!pip3 install -U scipy
!git clone https://github.com/jnordberg/tortoise-tts.git
%cd tortoise-tts
!pip3 install transformers==4.19.0
!pip3 install -r requirements.txt
!pip3 install einops==0.5.0
!pip3 install rotary_embedding_torch==0.1.5
!pip3 install unidecode==1.3.5
!python3 setup.py install

In [None]:
#@title Load
# Imports used through the rest of the notebook.
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F

import IPython

from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices

# This will download all the models used by Tortoise from the HuggingFace hub.
tts = TextToSpeech()

In [None]:
#@title Mount Google Drive
#@markdown Mount Google Drive to load custom voices and to save the results.

from google.colab import drive
drive.mount('/content/drive')

In [None]:
text = """
We're no strangers to love
You know the rules, and so do I
A full commitment is what I'm thinking of
You wouldn't get this from any other guy
"""

In [None]:
#@title Generate
#@markdown Output wav file
output_file = '/content/generated.wav' #@param {type:"string"}
#@markdown Pick a "preset mode" to determine quality.
preset = "fast" #@param ["ultra_fast", "fast", "standard", "high_quality"]
#@markdown Select voice
voice = 'myself' #@param ["angie", "applejack", "cond_latent_example", "daniel", "deniro", "emma", "freeman", "geralt", "halle", "jlaw", "lj", "mol", "myself", "pat", "pat2", "rainbow", "snakes", "tim_reynolds", "tom", "train_atkins", "train_daws", "train_dotrice", "train_dreams", "train_empire", "train_grace", "train_kennard", "train_lescault", "train_mouse", "weaver", "william"]
#@markdown Or generate random voice
gen_random = False #@param {type:"boolean"}

if gen_random:
  gen = tts.tts_with_preset(text, voice_samples=None, conditioning_latents=None, preset=preset)
  torchaudio.save(output_file, gen.squeeze(0).cpu(), 24000)
else:
  voice_samples, conditioning_latents = load_voice(voice)
  gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, 
                            preset=preset)
  torchaudio.save(output_file, gen.squeeze(0).cpu(), 24000)
  
IPython.display.Audio(output_file)

In [None]:
#@title Mix voices
#@markdown Output wav file.
output_file = '/content/generated2.wav' #@param {type:"string"}
#@markdown Pick a "preset mode" to determine quality.
preset = "fast" #@param ["ultra_fast", "fast", "standard", "high_quality"]
#@markdown Type comma-separated list of voices.
voices = 'angie,  myself,  deniro' #@param {type:"string"}

voices = [v.strip() for v in voices.split(',')]
print(voices)
voice_samples, conditioning_latents = load_voices(voices)
gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, 
                          preset=preset)
torchaudio.save(output_file, gen.squeeze(0).cpu(), 24000)

IPython.display.Audio(output_file)

In [None]:
#@title Custom voice (not tested yet!)
#@markdown Optionally, you can use your own voice. You should upload at least 2 audio clips. They must be a WAV file, 6-10 seconds long.

#@markdown Custom voice name
CUSTOM_VOICE_NAME = "custom" #@param {type:"string"}
#@markdown Folder with the wav files
input_folder = '/content/drive/MyDrive/voice' #@param {type:"string"}
#@markdown Output wav file.
output_file = '/content/generated2.wav' #@param {type:"string"}
#@markdown Pick a "preset mode" to determine quality.
preset = "fast" #@param ["ultra_fast", "fast", "standard", "high_quality"]

input_folder_mask = f"{input_folder}/*.wav"
custom_voice_folder = f"tortoise/voices/{CUSTOM_VOICE_NAME}/"

!mkdir $custom_voice_folder
!cp $input_folder_mask $custom_voice_folder


# Generate speech with the custotm voice.
voice_samples, conditioning_latents = load_voice(CUSTOM_VOICE_NAME)
gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, 
                          preset=preset)
torchaudio.save(output_file, gen.squeeze(0).cpu(), 24000)
IPython.display.Audio(output_file)