<a href="https://colab.research.google.com/github/edrihan/audioAnalysis/blob/main/tortoise_tts_chess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#<font face="Trebuchet MS" size="6">Tortoise TTS<font color="#999" size="4">&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;</font><font color="#999" size="4">Text to spoken word audio</font><font color="#999" size="4">&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;</font><a href="https://github.com/olaviinha/NeuralTextToAudio" target="_blank"><font color="#999" size="4">Github</font></a>

- All file and directory paths should be relative to your Google Drive root (_My Drive_). E.g. `voice_audio` value should be `Audio/test-voice.wav`, if you have a directory called _Audio_ in your drive, and you want to use _test-voice.wav_ from that directory. Paths are case-sensitive.
- This notebook will attempt to prepare a coherent voice dataset from `voice_audio` input, but optimal `voice_audio` for coherent output should be a path to a WAV file of about 1 minute in duration, or a directory containing a total of about 1 minute of WAV files.
- In case `voice_audio` contents exceeds 1 minute considerably, random clips (from random file, or files depending on contents, if directory given) will be picked for voice cloning.

In [None]:
#@title #Setup
#@markdown This cell needs to be run only once. It will mount your Google Drive and setup prerequisites.<br>
#@markdown <small>Mounting Drive will enable this notebook to save outputs directly to your Drive. Otherwise you will need to copy/download them manually from this notebook.</small>

force_setup = False
repositories = ['https://github.com/neonbjb/tortoise-tts.git']
pip_packages = 'scipy transformers==4.19.0'
apt_packages = 'sox'
mount_drive = False #@param {type:"boolean"}
skip_setup = False #@ param {type:"boolean"}

# Download the repo from Github
import os
from google.colab import output
import warnings
warnings.filterwarnings('ignore')
%cd /content/

# inhagcutils
if not os.path.isfile('/content/inhagcutils.ipynb') and force_setup == False:
  !pip -q install import-ipynb {pip_packages}
  if apt_packages != '':
    !apt-get update && apt-get install {apt_packages}
  !curl -s -O https://raw.githubusercontent.com/olaviinha/inhagcutils/master/inhagcutils.ipynb
import import_ipynb
from inhagcutils import *

# Mount Drive
if mount_drive == True:
  if not os.path.isdir('/content/drive'):
    from google.colab import drive
    drive.mount('/content/drive')
    drive_root = '/content/drive/My Drive'
  if not os.path.isdir('/content/mydrive'):
    os.symlink('/content/drive/My Drive', '/content/mydrive')
    drive_root = '/content/mydrive/'
  drive_root_set = True
else:
  create_dirs(['/content/faux_drive'])
  drive_root = '/content/faux_drive/'

if len(repositories) > 0 and skip_setup == False:
  for repo in repositories:
    %cd /content/
    install_dir = fix_path('/content/'+path_leaf(repo).replace('.git', ''))
    repo = repo if '.git' in repo else repo+'.git'
    !git clone {repo}
    if os.path.isfile(install_dir+'requirements.txt'):
      !pip install -r {install_dir}/requirements.txt
    if os.path.isfile(install_dir+'setup.py') or os.path.isfile(install_dir+'setup.cfg'):
      !pip install -e {install_dir}

if len(repositories) == 1:
  %cd {install_dir}

dir_tmp = '/content/tmp/'
dir_tmp_corpus = '/content/tmp/corpus/'
dir_tmp_slices = '/content/tmp/slices/'
dir_tmp_clips = '/content/tmp/clips/'
dir_tmp_processed = '/content/tmp/processed/'
create_dirs([dir_tmp, dir_tmp_corpus, dir_tmp_slices, dir_tmp_clips, dir_tmp_processed])

import time, sys
from datetime import timedelta
import math

# Imports used through the rest of the notebook.
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F

import IPython
import librosa
import soundfile as sf

from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices

def slice_to_frames(audio_data, slice_duration, fade_in=0, fade_out=0, sr=44100):
  a_duration = librosa.get_duration(audio_data, sr=sr)
  clips = math.ceil(a_duration/slice_duration)
  frames = []
  for i in range(clips-1):
    if i > 0 and i < clips:
      start = i*slice_duration
      audio_clip = clip_audio(audio_data, start, slice_duration)
      if fade_in > 0 or fade_out > 0:
        audio_clip = fade_audio(audio_clip, fade_in, fade_out, sr=sr)
      frames.append(audio_clip)
  return frames

def clip_audio(audio_data, start, duration, sr=44100):
  xstart = librosa.time_to_samples(start, sr=sr)
  xduration = librosa.time_to_samples(start+duration, sr=sr)
  audio_data = audio_data[:, xstart:xduration]
  return audio_data

def fade_audio(audio_data, fade_in=0.05, fade_out=0.05, sr=44100):
  a_duration = librosa.get_duration(audio_data, sr=sr)
  if fade_in > 0:
    fade_in_to = librosa.time_to_samples(fade_in, sr=sr)
    in_y = audio_data[:, 0:fade_in_to]
    fade_ins = []
    for channel in in_y:
      fade = [ i/len(channel)*smp for i, smp in enumerate(channel) ]
      fade_ins.append(fade)
    fade_ins = np.array(fade_ins)
    tail_start = fade_in_to+1
    tail = audio_data[:, tail_start:]
    audio_data = np.concatenate([fade_ins, tail], axis=1)
  if fade_out > 0:
    fade_out_start = librosa.time_to_samples(a_duration-fade_out, sr=sr)
    out_y = audio_data[:, fade_out_start:]
    fade_outs = []
    for channel in out_y:
      fade = [ smp-(i/len(channel)*smp) for i, smp in enumerate(channel) ]
      fade_outs.append(fade)
    fade_outs = np.array(fade_outs)
    head_start = fade_out_start-1
    head = audio_data[:, :head_start]
    audio_data = np.concatenate([head, fade_outs], axis=1)
  return audio_data

def remove_silence(audio, window_size=0.2, threshold=0.1, save_as='', sr=44100):
  if type(audio) != np.ndarray:
    y, sr = librosa.load(audio, sr=None, mono=False)
  else:
    y = audio
  audio_slices = slice_to_frames(y, window_size, sr=sr)
  silence_removed_list = []
  for audio_slice in audio_slices:
    if max(audio_slice[0]) > threshold or max(audio_slice[1]) < -abs(threshold):
      silence_removed_list.append(audio_slice)
  silence_removed = np.concatenate(silence_removed_list, axis=1)
  if save_as != '':
    sf.write(save_as, silence_removed.T, sr)
    return save_as
  return silence_removed

import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

def get_audio_duration(file):
  y, sr = librosa.load(voice_file, sr=None, mono=True)
  return librosa.get_duration(y, sr=sr)

def get_dir_size(dir_path='.'):
  total_size = 0
  for dirpath, dirnames, filenames in os.walk(dir_path):
    for f in filenames:
      fp = os.path.join(dirpath, f)
      if not os.path.islink(fp):
        total_size += os.path.getsize(fp)
  return total_size

def chop_to_sentences(text):
  delimiter = '.'
  temp = [e+delimiter for e in text.split(delimiter) if e]
  sentences = []
  for sentence in temp:
    delimiter = '?'
    if delimiter in sentence:
      wtf = sentence.split(delimiter)
      for f in wtf:
        if f[-1] != '.' and f[-1] != '?' and f[-1] != '?':
          f = f+'?'
        if f != '':
          sentences.append(f.strip())
    elif sentence.strip() != '' and len(sentence.strip()) > 1:
      sentences.append(sentence.strip())
  return sentences

# This will download all the models used by Tortoise from the HuggingFace hub.
tts = TextToSpeech()

output.clear()
# !nvidia-smi
op(c.ok, 'Setup finished.', time=True)

[90m2023-08-28 01:27:41 [92mSetup finished.[0m


In [None]:
#!pip install --upgrade youtube-dl
!pip uninstall -y youtube_dl
!pip install git+https://github.com/ytdl-org/youtube-dl.git@master#egg=youtube_dl
import os
!pip install sox
#!pip install -U youtube-dl

Found existing installation: youtube-dl 2021.12.17
Uninstalling youtube-dl-2021.12.17:
  Successfully uninstalled youtube-dl-2021.12.17
Collecting youtube_dl
  Cloning https://github.com/ytdl-org/youtube-dl.git (to revision master) to /tmp/pip-install-t8diular/youtube-dl_789907be5d4a40a0ba4e452efd278dbc
  Running command git clone --filter=blob:none --quiet https://github.com/ytdl-org/youtube-dl.git /tmp/pip-install-t8diular/youtube-dl_789907be5d4a40a0ba4e452efd278dbc
  Resolved https://github.com/ytdl-org/youtube-dl.git to commit 86e3cf5e5849aefcc540c19bb5fa5ab7f470d1c1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: youtube_dl
  Building wheel for youtube_dl (setup.py) ... [?25l[?25hdone
  Created wheel for youtube_dl: filename=youtube_dl-2021.12.17-py2.py3-none-any.whl size=1939858 sha256=7968193dfee28bd0785757489a3dab9a74f382c756f1f791eadeab581852f6c5
  Stored in directory: /tmp/pip-ephem-wheel-cache-804nlnne/wheels/b8/03/62/9c414b89a2

In [None]:

voices_settings = {
    "sagan": [
        {
            'link': 'https://www.youtube.com/watch?v=wupToqz1e2g',
            'start_sec': 0,
            'end_sec': 17
         },
        {
            'link':'https://www.youtube.com/watch?v=nGanLUnjoPI',
            'start_sec': 60,
            'end_sec': 60 + 14,
        },
                {
            'link':'https://www.youtube.com/watch?v=UnURElCzGc0',
            'start_sec': 3,
            'end_sec': 17,
        },
        {
            'link':'https://www.youtube.com/watch?v=UnURElCzGc0',
            'start_sec': 3 * 60 + 26,
            'end_sec': 3 * 60 + 42,
        },
       #         {
       #     'link':'https://www.loc.gov/item/cosmos000110/',
       #     'start_sec': 0,
      #     'end_sec': 20,
      #  },




    ]


}

for voice, refs in voices_settings.items():
  for r, ref in enumerate(refs):

    #voice_path = '/content/tortoise-tts/tortoise/voices'
    voices_path = os.path.join(drive_root,"voices")
    voices_path = os.path.join(drive_root)
    voice_path = os.path.join(voices_path,voice)
    filename =  voice + '.mp4'
    filepath = os.path.join(voice_path,filename)
    chunkpath =  os.path.join(voice_path,str(r) + '.wav')
    command = f'mkdir {voices_path}; cd {voices_path} ; mkdir {voice} ; cd {voice_path};' + \
              f'youtube-dl -x --audio-format wav {ref["link"]} --output "{str(r)+"_complete"}.%(ext)s"'
    print('running command:')
    print(command)
    !{command}
    #Trim

    !rm -rf {chunkpath}
    command = f"sox {os.path.join(voice_path,str(r)+'_complete.wav')} {chunkpath} trim {ref['start_sec']} {ref['end_sec'] - ref['start_sec']}"
    print('running command:')
    print(command)
    !{command}
    !rm -rf {os.path.join(voice_path,str(r)+'_complete.wav')}




#!cd /content/faux_drive && youtube-dl --extract-audio --audio-format wav https://lotushelix.bandcamp.com/track/stranger-in-the-street --output "stranger.wav"
#!cd /content/faux_drive && youtube-dl --extract-audio --audio-format wav https://lotushelix.bandcamp.com/track/live-life-in-love --output "love.wav"
#!cd /content/faux_drive && youtube-dl --extract-audio --audio-format wav https://youtu.be/3u3JSEqNtlg --output "technique.wav"
#!cd /content/faux_drive && youtube-dl --extract-audio --audio-format wav https://www.youtube.com/watch?v=wupToqz1e2g --output "sagan.wav"


running command:
mkdir /content/faux_drive/; cd /content/faux_drive/ ; mkdir sagan ; cd /content/faux_drive/sagan;youtube-dl -x --audio-format wav https://www.youtube.com/watch?v=wupToqz1e2g --output "0_complete.%(ext)s"
mkdir: cannot create directory ‘/content/faux_drive/’: File exists
mkdir: cannot create directory ‘sagan’: File exists
[youtube] wupToqz1e2g: Downloading webpage
[dashsegments] Total fragments: 1
[download] Destination: 0_complete.m4a
[K[download] 100% of 3.26MiB in 00:00
[ffmpeg] Correcting container in "0_complete.m4a"
[ffmpeg] Destination: 0_complete.wav
Deleting original file 0_complete.m4a (pass -k to keep)
running command:
sox /content/faux_drive/sagan/0_complete.wav /content/faux_drive/sagan/0.wav trim 0 17
running command:
mkdir /content/faux_drive/; cd /content/faux_drive/ ; mkdir sagan ; cd /content/faux_drive/sagan;youtube-dl -x --audio-format wav https://www.youtube.com/watch?v=nGanLUnjoPI --output "1_complete.%(ext)s"
mkdir: cannot create directory ‘/cont

In [None]:
#@title # Generate spoken word audio
text = "pawn to ay 1. pawn to ay 2. pawn to ay 3. pawn to ay 4. pawn to ay 5. pawn to ay 6. pawn to ay 7. pawn to ay 8. pawn to bee 1. pawn to bee 2. pawn to bee 3. pawn to bee 4. pawn to bee 5. pawn to bee 6. pawn to bee 7. pawn to bee 8. pawn to sea 1. pawn to sea 2. pawn to sea 3. pawn to sea 4. pawn to sea 5. pawn to sea 6. pawn to sea 7. pawn to sea 8. pawn to dee 1. pawn to dee 2. pawn to dee 3. pawn to dee 4. pawn to dee 5. pawn to dee 6. pawn to dee 7. pawn to dee 8. pawn to ee 1. pawn to ee 2. pawn to ee 3. pawn to ee 4. pawn to ee 5. pawn to ee 6. pawn to ee 7. pawn to ee 8. pawn to eff 1. pawn to eff 2. pawn to eff 3. pawn to eff 4. pawn to eff 5. pawn to eff 6. pawn to eff 7. pawn to eff 8. pawn to jee 1. pawn to jee 2. pawn to jee 3. pawn to jee 4. pawn to jee 5. pawn to jee 6. pawn to jee 7. pawn to jee 8. pawn to aych 1. pawn to aych 2. pawn to aych 3. pawn to aych 4. pawn to aych 5. pawn to aych 6. pawn to aych 7. pawn to aych 8. rook to ay 1. rook to ay 2. rook to ay 3. rook to ay 4. rook to ay 5. rook to ay 6. rook to ay 7. rook to ay 8. rook to bee 1. rook to bee 2. rook to bee 3. rook to bee 4. rook to bee 5. rook to bee 6. rook to bee 7. rook to bee 8. rook to sea 1. rook to sea 2. rook to sea 3. rook to sea 4. rook to sea 5. rook to sea 6. rook to sea 7. rook to sea 8. rook to dee 1. rook to dee 2. rook to dee 3. rook to dee 4. rook to dee 5. rook to dee 6. rook to dee 7. rook to dee 8. rook to ee 1. rook to ee 2. rook to ee 3. rook to ee 4. rook to ee 5. rook to ee 6. rook to ee 7. rook to ee 8. rook to eff 1. rook to eff 2. rook to eff 3. rook to eff 4. rook to eff 5. rook to eff 6. rook to eff 7. rook to eff 8. rook to jee 1. rook to jee 2. rook to jee 3. rook to jee 4. rook to jee 5. rook to jee 6. rook to jee 7. rook to jee 8. rook to aych 1. rook to aych 2. rook to aych 3. rook to aych 4. rook to aych 5. rook to aych 6. rook to aych 7. rook to aych 8. knight to ay 1. knight to ay 2. knight to ay 3. knight to ay 4. knight to ay 5. knight to ay 6. knight to ay 7. knight to ay 8. knight to bee 1. knight to bee 2. knight to bee 3. knight to bee 4. knight to bee 5. knight to bee 6. knight to bee 7. knight to bee 8. knight to sea 1. knight to sea 2. knight to sea 3. knight to sea 4. knight to sea 5. knight to sea 6. knight to sea 7. knight to sea 8. knight to dee 1. knight to dee 2. knight to dee 3. knight to dee 4. knight to dee 5. knight to dee 6. knight to dee 7. knight to dee 8. knight to ee 1. knight to ee 2. knight to ee 3. knight to ee 4. knight to ee 5. knight to ee 6. knight to ee 7. knight to ee 8. knight to eff 1. knight to eff 2. knight to eff 3. knight to eff 4. knight to eff 5. knight to eff 6. knight to eff 7. knight to eff 8. knight to jee 1. knight to jee 2. knight to jee 3. knight to jee 4. knight to jee 5. knight to jee 6. knight to jee 7. knight to jee 8. knight to aych 1. knight to aych 2. knight to aych 3. knight to aych 4. knight to aych 5. knight to aych 6. knight to aych 7. knight to aych 8. bishop to ay 1. bishop to ay 2. bishop to ay 3. bishop to ay 4. bishop to ay 5. bishop to ay 6. bishop to ay 7. bishop to ay 8. bishop to bee 1. bishop to bee 2. bishop to bee 3. bishop to bee 4. bishop to bee 5. bishop to bee 6. bishop to bee 7. bishop to bee 8. bishop to sea 1. bishop to sea 2. bishop to sea 3. bishop to sea 4. bishop to sea 5. bishop to sea 6. bishop to sea 7. bishop to sea 8. bishop to dee 1. bishop to dee 2. bishop to dee 3. bishop to dee 4. bishop to dee 5. bishop to dee 6. bishop to dee 7. bishop to dee 8. bishop to ee 1. bishop to ee 2. bishop to ee 3. bishop to ee 4. bishop to ee 5. bishop to ee 6. bishop to ee 7. bishop to ee 8. bishop to eff 1. bishop to eff 2. bishop to eff 3. bishop to eff 4. bishop to eff 5. bishop to eff 6. bishop to eff 7. bishop to eff 8. bishop to jee 1. bishop to jee 2. bishop to jee 3. bishop to jee 4. bishop to jee 5. bishop to jee 6. bishop to jee 7. bishop to jee 8. bishop to aych 1. bishop to aych 2. bishop to aych 3. bishop to aych 4. bishop to aych 5. bishop to aych 6. bishop to aych 7. bishop to aych 8. king to ay 1. king to ay 2. king to ay 3. king to ay 4. king to ay 5. king to ay 6. king to ay 7. king to ay 8. king to bee 1. king to bee 2. king to bee 3. king to bee 4. king to bee 5. king to bee 6. king to bee 7. king to bee 8. king to sea 1. king to sea 2. king to sea 3. king to sea 4. king to sea 5. king to sea 6. king to sea 7. king to sea 8. king to dee 1. king to dee 2. king to dee 3. king to dee 4. king to dee 5. king to dee 6. king to dee 7. king to dee 8. king to ee 1. king to ee 2. king to ee 3. king to ee 4. king to ee 5. king to ee 6. king to ee 7. king to ee 8. king to eff 1. king to eff 2. king to eff 3. king to eff 4. king to eff 5. king to eff 6. king to eff 7. king to eff 8. king to jee 1. king to jee 2. king to jee 3. king to jee 4. king to jee 5. king to jee 6. king to jee 7. king to jee 8. king to aych 1. king to aych 2. king to aych 3. king to aych 4. king to aych 5. king to aych 6. king to aych 7. king to aych 8. queen to ay 1. queen to ay 2. queen to ay 3. queen to ay 4. queen to ay 5. queen to ay 6. queen to ay 7. queen to ay 8. queen to bee 1. queen to bee 2. queen to bee 3. queen to bee 4. queen to bee 5. queen to bee 6. queen to bee 7. queen to bee 8. queen to sea 1. queen to sea 2. queen to sea 3. queen to sea 4. queen to sea 5. queen to sea 6. queen to sea 7. queen to sea 8. queen to dee 1. queen to dee 2. queen to dee 3. queen to dee 4. queen to dee 5. queen to dee 6. queen to dee 7. queen to dee 8. queen to ee 1. queen to ee 2. queen to ee 3. queen to ee 4. queen to ee 5. queen to ee 6. queen to ee 7. queen to ee 8. queen to eff 1. queen to eff 2. queen to eff 3. queen to eff 4. queen to eff 5. queen to eff 6. queen to eff 7. queen to eff 8. queen to jee 1. queen to jee 2. queen to jee 3. queen to jee 4. queen to jee 5. queen to jee 6. queen to jee 7. queen to jee 8. queen to aych 1. queen to aych 2. queen to aych 3. queen to aych 4. queen to aych 5. queen to aych 6. queen to aych 7. queen to aych 8" #@param {type:"string"}
voice_audio = "sagan" #@param {type:"string"}
combo_voice = False #@ param {type:"boolean"}
preset = "high_quality" #@param ["standard", "fast", "ultra_fast", "high_quality"]
output_dir = "fullset" #@param {type:"string"}
end_session_when_done = False #@ param {type: "boolean"}

save_txt = True
timer_start = time.time()
uniq_id = gen_id()


slice_length = 12 # seconds per slice
use_slices = 5 # slices to use
optimal_samples_duration = slice_length * use_slices # total duration
sample_rate = 24000
#process this many sentences in one go
# @markdown try lowering this if you run out of VRAM:
chunk_sentences = 10 #@param {type:"integer", description:"If you run out of (v)RAM try lowering this"}
dir_byte_limit = 48000000
merge_sentences = True

op(c.title, 'Run ID:', uniq_id, time=True)
print()

voice_corpus = voice_audio
prompts = chop_to_sentences(text)

if chunk_sentences > 1:
  prompts = [''.join(prompts[i:i+chunk_sentences]) for i in range(0, len(prompts), chunk_sentences)]

clean_dirs([dir_tmp_corpus, dir_tmp_slices, dir_tmp_clips, dir_tmp_processed])

if os.path.isfile(drive_root+voice_corpus):
  clean_dirs([dir_tmp_corpus])
  shutil.copy(drive_root+voice_corpus, dir_tmp_corpus)
  voice_dirs = [dir_tmp_corpus]
else:
  if voice_corpus == 'voice_list':
    voice_dirs = [drive_root+x for x in voice_list]
  elif ',' in voice_corpus:
    voice_dirs = [drive_root+fix_path(x.strip()) for x in voice_corpus.split(',')]
  elif ';' in voice_corpus:
    voice_dirs = [drive_root+fix_path(x.strip()) for x in voice_corpus.split(';')]
  else:
    voice_dirs = [drive_root+fix_path(voice_corpus)]

# Output
if output_dir == '':
  if mount_drive == True:
    dir_out = dir_tmp
  else:
    dir_out = drive_root
else:
  if not os.path.isdir(drive_root+output_dir):
    os.mkdir(drive_root+output_dir)
  dir_out = drive_root+fix_path(output_dir)

total = len(voice_dirs * len(prompts))
use_voices = []

txt_file = dir_out+uniq_id+'.txt'
if save_txt: append_txt(txt_file, timestamp(human_readable=True)+' '+uniq_id+'\n\n'+text+'\n\n'+'combo_voice: '+str(combo_voice)+'\n'+'preset: '+preset+'\n'+'dir_out: '+dir_out+'\n\n')

for i, voice_dir in enumerate(voice_dirs, 1):
  if voice_dir == dir_tmp_corpus:
    voice_name = basename(voice_corpus)
  else:
    voice_name = path_leaf(voice_dir)

  use_voices.append(voice_name)
  new_voice_dir = '/content/tortoise-tts/tortoise/voices/'+voice_name+'/'
  if not os.path.isdir(new_voice_dir):
    os.mkdir(new_voice_dir)
  else:
    clean_dirs([new_voice_dir])
  voice_files = list_audio(voice_dir)

  random.shuffle(voice_files)

  if save_txt: append_txt(txt_file, voice_name+'\n'+'In: '+voice_dir)

  if len(voice_files) == 0:
    print()
    op(c.fail, 'Skipping '+voice_name+' - Reason: WAV files not found in dir:', voice_dir.replace(drive_root, ''), time=True)
    if save_txt: append_txt(txt_file, 'Out: - (no wav found, SKIP)\n')
  else:
    op(c.okb, 'Processing voice files...', time=True)
    bytes_collected = 0
    for voice_file in voice_files:
      voice_file = remove_silence(voice_file, window_size=2, threshold=0.1, save_as=dir_tmp_processed+path_leaf(voice_file))
      file_duration = get_audio_duration(voice_file)
      slice_file = dir_tmp_slices+path_leaf(voice_file)

      if file_duration > slice_length:
        !sox {sox_q} "{voice_file}" -r 22050 {slice_file} trim 0 {slice_length} : newfile : restart
      else:
        !sox {sox_q} "{voice_file}" -r 22050 {slice_file}

      clips = list_audio(dir_tmp_slices)

      short_clips = []
      long_clips = []
      for clip in clips:
        clip_duration = get_audio_duration(clip)
        if clip_duration >= slice_length:
          long_clips.append(clip)
        else:
          short_clips.append(clip)
        if (len(long_clips)*slice_length >= optimal_samples_duration):
          break

      if len(long_clips) >= use_slices:
        selected_clips = random.sample(long_clips, use_slices)
      else:
        selected_clips = clips

      if save_txt: append_txt(txt_file, 'Selected clips:')
      for clip in selected_clips:
        if save_txt: append_txt(txt_file, path_leaf(clip)+'\n')
        shutil.copy(clip, new_voice_dir)

      file_size = os.path.getsize(voice_file)
      bytes_collected += file_size
      if bytes_collected > dir_byte_limit:
        break

    merge_list = []
    for ii, text in enumerate(prompts, 1):

      ndx_info = str(i*ii)+'/'+str(total)+' '

      voice_samples = None
      conditioning_latents = None
      gen = None

      print()
      op(c.title, ndx_info+'Processing', voice_name, time=True)

      if combo_voice == False:
        op(c.title, ndx_info+'Synthesizing', text+'...', time=True)

        file_out = dir_out+uniq_id+'__'+voice_name+'_'+str(ii).zfill(3)+'_'+slug(text[:60])+'.wav'
        if save_txt: append_txt(txt_file, 'Out: '+file_out+'\n')
        voice_samples, conditioning_latents = load_voice(voice_name)
        gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset=preset)
        torchaudio.save(file_out, gen.squeeze(0).cpu(), sample_rate)
        if os.path.isfile(file_out):
          op(c.ok, 'Saved', file_out.replace(drive_root, ''), time=True)
          merge_list.append(file_out)
        else:
          op(c.fail, 'Error saving', file_out.replace(drive_root, ''), time=True)

        del voice_samples
        del conditioning_latents
        del gen

      torch.cuda.empty_cache()
      import gc
      gc.collect()

    if merge_sentences == True:
      sox_input_list = ' '.join(merge_list)
      sox_merge_out = dir_out+uniq_id+'__'+voice_name+'_FULL.wav'
      !sox {sox_q} {sox_input_list} {sox_merge_out}

# if combo_voice == True:
#   for text in prompts:
#     print()
#     op(c.title, 'Synthesizing', text[:40]+'...', time=True)
#     file_out = dir_out+uniq_id+'__'+voice_name+'_'+slug(text[:60])+'.wav'
#     if save_txt == True:
#       append_txt(txt_file, 'Out: '+file_out+'\n')
#     voice_samples, conditioning_latents = load_voices(use_voices)
#     gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset=preset)
#     torchaudio.save(file_out, gen.squeeze(0).cpu(), sample_rate)
#     # IPython.display.Audio(file_out)

#     del voice_samples
#     del conditioning_latents
#     del gen
#     # del tts
#     torch.cuda.empty_cache()
#     import gc
#     gc.collect()


timer_end = time.time()

print()

if save_txt: append_txt(txt_file, str(timedelta(seconds=timer_end-timer_start)) )
if save_txt: append_txt(txt_file, 'Finished at '+timestamp(human_readable=True))

op(c.okb, 'Elapsed', timedelta(seconds=timer_end-timer_start), time=True)
op(c.ok, 'FIN.')

if end_session_when_done is True: end_session()

[90m2023-08-28 04:04:31 [96mRun ID:[0m galfaf

[90m2023-08-28 04:04:31 [94mProcessing voice files...[0m
sox WARN trim: Last 1 position(s) not reached (audio shorter than expected).
sox WARN trim: Last 1 position(s) not reached (audio shorter than expected).

[90m2023-08-28 04:04:32 [96m1/39 Processing[0m sagan
[90m2023-08-28 04:04:32 [96m1/39 Synthesizing[0m pawn to ay 1.pawn to ay 2.pawn to ay 3.pawn to ay 4.pawn to ay 5.pawn to ay 6.pawn to ay 7.pawn to ay 8.pawn to bee 1.pawn to bee 2....
Generating autoregressive samples..


  6%|▋         | 1/16 [10:45<2:41:22, 645.52s/it]