<a href="https://colab.research.google.com/github/edrihan/chessvid/blob/main/tortoise_tts_chess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#<font face="Trebuchet MS" size="6">Tortoise TTS<font color="#999" size="4">&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;</font><font color="#999" size="4">Text to spoken word audio</font><font color="#999" size="4">&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;</font><a href="https://github.com/olaviinha/NeuralTextToAudio" target="_blank"><font color="#999" size="4">Github</font></a>

- All file and directory paths should be relative to your Google Drive root (_My Drive_). E.g. `voice_audio` value should be `Audio/test-voice.wav`, if you have a directory called _Audio_ in your drive, and you want to use _test-voice.wav_ from that directory. Paths are case-sensitive.
- This notebook will attempt to prepare a coherent voice dataset from `voice_audio` input, but optimal `voice_audio` for coherent output should be a path to a WAV file of about 1 minute in duration, or a directory containing a total of about 1 minute of WAV files.
- In case `voice_audio` contents exceeds 1 minute considerably, random clips (from random file, or files depending on contents, if directory given) will be picked for voice cloning.

In [1]:
#@title #Setup
#@markdown This cell needs to be run only once. It will mount your Google Drive and setup prerequisites.<br>
#@markdown <small>Mounting Drive will enable this notebook to save outputs directly to your Drive. Otherwise you will need to copy/download them manually from this notebook.</small>

force_setup = False
repositories = ['https://github.com/neonbjb/tortoise-tts.git']
pip_packages = 'scipy transformers==4.19.0'
apt_packages = 'sox'
mount_drive = False #@param {type:"boolean"}
skip_setup = False #@ param {type:"boolean"}

# Download the repo from Github
import os
from google.colab import output
import warnings
warnings.filterwarnings('ignore')
%cd /content/

# inhagcutils
if not os.path.isfile('/content/inhagcutils.ipynb') and force_setup == False:
  !pip -q install import-ipynb {pip_packages}
  if apt_packages != '':
    !apt-get update && apt-get install {apt_packages}
  !curl -s -O https://raw.githubusercontent.com/olaviinha/inhagcutils/master/inhagcutils.ipynb
import import_ipynb
from inhagcutils import *

# Mount Drive
if mount_drive == True:
  if not os.path.isdir('/content/drive'):
    from google.colab import drive
    drive.mount('/content/drive')
    drive_root = '/content/drive/My Drive'
  if not os.path.isdir('/content/mydrive'):
    os.symlink('/content/drive/My Drive', '/content/mydrive')
    drive_root = '/content/mydrive/'
  drive_root_set = True
else:
  create_dirs(['/content/faux_drive'])
  drive_root = '/content/faux_drive/'

if len(repositories) > 0 and skip_setup == False:
  for repo in repositories:
    %cd /content/
    install_dir = fix_path('/content/'+path_leaf(repo).replace('.git', ''))
    repo = repo if '.git' in repo else repo+'.git'
    !git clone {repo}
    if os.path.isfile(install_dir+'requirements.txt'):
      !pip install -r {install_dir}/requirements.txt
    if os.path.isfile(install_dir+'setup.py') or os.path.isfile(install_dir+'setup.cfg'):
      !pip install -e {install_dir}

if len(repositories) == 1:
  %cd {install_dir}

dir_tmp = '/content/tmp/'
dir_tmp_corpus = '/content/tmp/corpus/'
dir_tmp_slices = '/content/tmp/slices/'
dir_tmp_clips = '/content/tmp/clips/'
dir_tmp_processed = '/content/tmp/processed/'
create_dirs([dir_tmp, dir_tmp_corpus, dir_tmp_slices, dir_tmp_clips, dir_tmp_processed])

import time, sys
from datetime import timedelta
import math

# Imports used through the rest of the notebook.
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F

import IPython
import librosa
import soundfile as sf

from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices

def slice_to_frames(audio_data, slice_duration, fade_in=0, fade_out=0, sr=44100):
  a_duration = librosa.get_duration(audio_data, sr=sr)
  clips = math.ceil(a_duration/slice_duration)
  frames = []
  for i in range(clips-1):
    if i > 0 and i < clips:
      start = i*slice_duration
      audio_clip = clip_audio(audio_data, start, slice_duration)
      if fade_in > 0 or fade_out > 0:
        audio_clip = fade_audio(audio_clip, fade_in, fade_out, sr=sr)
      frames.append(audio_clip)
  return frames

def clip_audio(audio_data, start, duration, sr=44100):
  xstart = librosa.time_to_samples(start, sr=sr)
  xduration = librosa.time_to_samples(start+duration, sr=sr)
  audio_data = audio_data[:, xstart:xduration]
  return audio_data

def fade_audio(audio_data, fade_in=0.05, fade_out=0.05, sr=44100):
  a_duration = librosa.get_duration(audio_data, sr=sr)
  if fade_in > 0:
    fade_in_to = librosa.time_to_samples(fade_in, sr=sr)
    in_y = audio_data[:, 0:fade_in_to]
    fade_ins = []
    for channel in in_y:
      fade = [ i/len(channel)*smp for i, smp in enumerate(channel) ]
      fade_ins.append(fade)
    fade_ins = np.array(fade_ins)
    tail_start = fade_in_to+1
    tail = audio_data[:, tail_start:]
    audio_data = np.concatenate([fade_ins, tail], axis=1)
  if fade_out > 0:
    fade_out_start = librosa.time_to_samples(a_duration-fade_out, sr=sr)
    out_y = audio_data[:, fade_out_start:]
    fade_outs = []
    for channel in out_y:
      fade = [ smp-(i/len(channel)*smp) for i, smp in enumerate(channel) ]
      fade_outs.append(fade)
    fade_outs = np.array(fade_outs)
    head_start = fade_out_start-1
    head = audio_data[:, :head_start]
    audio_data = np.concatenate([head, fade_outs], axis=1)
  return audio_data

def remove_silence(audio, window_size=0.2, threshold=0.1, save_as='', sr=44100):
  if type(audio) != np.ndarray:
    y, sr = librosa.load(audio, sr=None, mono=False)
  else:
    y = audio
  audio_slices = slice_to_frames(y, window_size, sr=sr)
  silence_removed_list = []
  for audio_slice in audio_slices:
    if max(audio_slice[0]) > threshold or max(audio_slice[1]) < -abs(threshold):
      silence_removed_list.append(audio_slice)
  silence_removed = np.concatenate(silence_removed_list, axis=1)
  if save_as != '':
    sf.write(save_as, silence_removed.T, sr)
    return save_as
  return silence_removed

import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

def get_audio_duration(file):
  y, sr = librosa.load(voice_file, sr=None, mono=True)
  return librosa.get_duration(y, sr=sr)

def get_dir_size(dir_path='.'):
  total_size = 0
  for dirpath, dirnames, filenames in os.walk(dir_path):
    for f in filenames:
      fp = os.path.join(dirpath, f)
      if not os.path.islink(fp):
        total_size += os.path.getsize(fp)
  return total_size

def chop_to_sentences(text):
  delimiter = '.'
  temp = [e+delimiter for e in text.split(delimiter) if e]
  sentences = []
  for sentence in temp:
    delimiter = '?'
    if delimiter in sentence:
      wtf = sentence.split(delimiter)
      for f in wtf:
        if f[-1] != '.' and f[-1] != '?' and f[-1] != '?':
          f = f+'?'
        if f != '':
          sentences.append(f.strip())
    elif sentence.strip() != '' and len(sentence.strip()) > 1:
      sentences.append(sentence.strip())
  return sentences

# This will download all the models used by Tortoise from the HuggingFace hub.
tts = TextToSpeech()

output.clear()
# !nvidia-smi
op(c.ok, 'Setup finished.', time=True)

[90m2023-09-05 07:59:14 [92mSetup finished.[0m


In [2]:
import os

In [3]:
'''
import zipfile
import soundfile as sf
from tqdm import tqdm
from google.colab import files
folder_path = "/content/faux_drive/fullset/"
zip_file = zipfile.ZipFile("/content/faux_drive/fullset/fimdom.zip", 'w')
for root, dirs, filez in os.walk(folder_path):
    for file in tqdm(filez,desc="zipping files"):
        file_path = os.path.join(root, file)
        extension = file_path.split('.')[-1]
        files.download(file_path)
        if extension in ('wav','mp3','flac'):

          # Specify the paths for the input WAV file and output FLAC file
          input_file = file_path
          output_flac = file_path.replace('.'+extension,'.flac')
          # Read the WAV file
          data, samplerate = sf.read(input_file)

          # Write the audio data to a FLAC file
          sf.write(output_flac, data, samplerate, format='flac')
          zip_file.write(output_flac, os.path.relpath(file_path, folder_path))
          !rm -rf {file_path}
        else:
          zip_file.write(file_path, os.path.relpath(file_path, folder_path))
          !rm -rf {file_path}
'''


'\nimport zipfile\nimport soundfile as sf\nfrom tqdm import tqdm\nfrom google.colab import files\nfolder_path = "/content/faux_drive/fullset/"\nzip_file = zipfile.ZipFile("/content/faux_drive/fullset/fimdom.zip", \'w\')\nfor root, dirs, filez in os.walk(folder_path):\n    for file in tqdm(filez,desc="zipping files"):\n        file_path = os.path.join(root, file)\n        extension = file_path.split(\'.\')[-1]\n        files.download(file_path)\n        if extension in (\'wav\',\'mp3\',\'flac\'):\n\n          # Specify the paths for the input WAV file and output FLAC file\n          input_file = file_path\n          output_flac = file_path.replace(\'.\'+extension,\'.flac\')\n          # Read the WAV file\n          data, samplerate = sf.read(input_file)\n\n          # Write the audio data to a FLAC file\n          sf.write(output_flac, data, samplerate, format=\'flac\')\n          zip_file.write(output_flac, os.path.relpath(file_path, folder_path))\n          !rm -rf {file_path}\n      

In [4]:
#@title #Audio tool installation

#!pip uninstall -y youtube_dl
#!pip install --upgrade youtube-dl
#!pip install {r'git+https://github.com/ytdl-org/youtube-dl.git@master#egg=youtube_dl'}

!pip install sox
!pip install pydub
from pydub import AudioSegment
print("youtube-dl --version:",end=' ')
!youtube-dl --version
print("yt-dlp --version:",end=' ')
!pip install https://files.pythonhosted.org/packages/5c/da/ef08140cea3392288a8f6cd60f6f12510a4c5776eead53e90151f139af19/yt_dlp-2023.7.6-py2.py3-none-any.whl
!yt-dlp --version

Collecting sox
  Downloading sox-1.4.1-py2.py3-none-any.whl (39 kB)
Installing collected packages: sox
Successfully installed sox-1.4.1
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
youtube-dl --version: /bin/bash: line 1: youtube-dl: command not found
yt-dlp --version: Collecting yt-dlp==2023.7.6
  Downloading yt_dlp-2023.7.6-py2.py3-none-any.whl (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mutagen (from yt-dlp==2023.7.6)
  Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.4/194.4 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pycryptodomex (from yt-dlp==2023.7.6)
  Downloading pycryptodomex-3.18.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [5]:
#@title #Download audio and process
voices_settings = {
    "sarl": [
        {
            'link': 'https://www.youtube.com/watch?v=wupToqz1e2g',
            'start_sec': 0.21,
            'end_sec': 9.29
         },
        {
            'link':'https://www.youtube.com/watch?v=nGanLUnjoPI',
            'start_sec': 60.56,
            'end_sec': 75.56,
        #},
        #       {
        #    'link':'https://www.youtube.com/watch?v=UnURElCzGc0',
        #    'start_sec': 3.372,
        #    'end_sec': 17,
        },
        #{
        #    'link':'https://www.youtube.com/watch?v=UnURElCzGc0',
        #    'start_sec': 3 * 60 + 26,
        #    'end_sec': 3 * 60 + 42,
        #},
       #         {
       #     'link':'https://www.loc.gov/item/cosmos000110/',
       #     'start_sec': 0,
      #     'end_sec': 20,
      #  },




    ]


}

for voice, refs in voices_settings.items():
  for r, ref in enumerate(refs):

    #voice_path = '/content/tortoise-tts/tortoise/voices'
    voices_path = os.path.join(drive_root,"voices")
    voices_path = os.path.join(drive_root)
    voice_path = os.path.join(voices_path,voice)
    filename =  voice + '.mp4'
    filepath = os.path.join(voice_path,filename)
    chunkpath =  os.path.join(voice_path,str(r) + '.wav')
    inputs_dir = os.path.join(drive_root,'voice_inputs')
    input_name = ref["link"].split("=")[-1]
    input_path = os.path.join(inputs_dir,input_name + '.wav')
    if not os.path.isfile(input_path):
      print(f'downloading {ref["link"]} because {input_path} is not a file')
      #command = f'mkdir {voices_path}; cd {voices_path} ; mkdir {voice} ; cd {voice_path};' + \
      #          f'youtube-dl -x --audio-format wav {ref["link"]} --output "{str(r)+"_complete"}.%(ext)s"'

      !mkdir {inputs_dir}

      command = f"cd {inputs_dir} && " + \
                f'yt-dlp -x --audio-format wav {ref["link"]} --output "{input_name}.%(ext)s"'

      print('running command:')
      print(command)

      !{command}

    else:
      print(f'skipped downloading {ref["link"]} because {input_path} exists')


    !mkdir {os.path.dirname(chunkpath)}

    #Trim
    !rm -rf {chunkpath}
    command = f"sox {input_path} {chunkpath} trim {ref['start_sec']} {ref['end_sec'] - ref['start_sec']}"
    print('running command:')
    print(command)
    !{command}

    #Normalize
    try:
      audio = AudioSegment.from_file(chunkpath)
    except FileNotFoundError as e:
        raise FileNotFoundError(str(e) + f'\nProbably failed to download files to {input_path}.\nIs your downloader working?')
    normalized_audio = audio.normalize()
    compressed_audio = normalized_audio.compress_dynamic_range(threshold=-5,ratio=2,attack=5,release=50)
    renormalized_audio = compressed_audio.normalize()
    renormalized_audio.export(chunkpath, format="wav")





#!cd /content/faux_drive && youtube-dl --extract-audio --audio-format wav https://lotushelix.bandcamp.com/track/stranger-in-the-street --output "stranger.wav"
#!cd /content/faux_drive && youtube-dl --extract-audio --audio-format wav https://lotushelix.bandcamp.com/track/live-life-in-love --output "love.wav"
#!cd /content/faux_drive && youtube-dl --extract-audio --audio-format wav https://youtu.be/3u3JSEqNtlg --output "technique.wav"
#!cd /content/faux_drive && youtube-dl --extract-audio --audio-format wav https://www.youtube.com/watch?v=wupToqz1e2g --output "sagan.wav"


downloading https://www.youtube.com/watch?v=wupToqz1e2g because /content/faux_drive/voice_inputs/wupToqz1e2g.wav is not a file
running command:
cd /content/faux_drive/voice_inputs && yt-dlp -x --audio-format wav https://www.youtube.com/watch?v=wupToqz1e2g --output "wupToqz1e2g.%(ext)s"
[youtube] Extracting URL: https://www.youtube.com/watch?v=wupToqz1e2g
[youtube] wupToqz1e2g: Downloading webpage
[youtube] wupToqz1e2g: Downloading ios player API JSON
[youtube] wupToqz1e2g: Downloading android player API JSON
[youtube] wupToqz1e2g: Downloading iframe API JS
[youtube] wupToqz1e2g: Downloading player 0a835141
[youtube] wupToqz1e2g: Downloading web player API JSON
[youtube] wupToqz1e2g: Downloading m3u8 information
[youtube] wupToqz1e2g: Downloading initial data API JSON
[info] wupToqz1e2g: Downloading 1 format(s): 251
[download] Destination: wupToqz1e2g.webm
[K[download] 100% of    2.78MiB in [1;37m00:00:00[0m at [0;32m51.86MiB/s[0m
[ExtractAudio] Destination: wupToqz1e2g.wav
Deletin

In [None]:
from google.colab import files
import zipfile
import time
from datetime import datetime
#@title # Generate spoken word audio
text = "bishop takes dee 2. bishop takes eff 4. bishop takes dee 8. bishop takes jee 5. bishop takes ee 3. bishop takes sea 7. bishop takes ee 7. bishop takes jee 8. bishop takes bee 1. king takes dee 6. bishop takes eff 8. bishop takes dee 4. king takes ay 8. king takes bee 6. bishop takes jee 3. bishop takes ee 4" #@param {type:"string"}
voice_audio = "sarl" #@param {type:"string"}
combo_voice = False #@param {type:"boolean"}
preset = "ultra_fast" #@param ["standard", "fast", "ultra_fast", "high_quality"]
output_dir = "fullset" #@param {type:"string"}
end_session_when_done = False #@param {type: "boolean"}
save_sentences_as_they_render = False #@param {type: "boolean"}
zip_sentences_and_download_all = True #@param {type: "boolean"}
# @markdown `download_partial_zip_after_minutes <= 0` will disable this behavior
download_partial_zip_after_minutes = 0 #@param {type:"number"}
if download_partial_zip_after_minutes < 0:
  download_partial_zip_after_minutes = 0

save_txt = True
timer_start = time.time()
try:
  uniq_id = gen_id()
except:
  raise Exception('Restart and run all')

folder_path = os.path.join("/content/faux_drive","fullset")
slice_length = 12 # seconds per slice
use_slices = 5 # slices to use
optimal_samples_duration = slice_length * use_slices # total duration
sample_rate = 24000
#process this many sentences in one go
# @markdown try lowering `chunk_sentences` if you run out of VRAM. It worked with 8 with a high-RAM environment:
chunk_sentences = 1 #@param {type:"integer", description:"If you run out of (v)RAM try lowering this"}
dir_byte_limit = 48000000
merge_sentences = False #@param {type:"boolean"}


start_time = datetime.now()
if zip_sentences_and_download_all:
  zip_name = uniq_id
  zip_file = zipfile.ZipFile(f'{zip_name}.zip','w')


if download_partial_zip_after_minutes:

  partial_zip_num = 0
  partial_zip_start_time = datetime.now()
  partial_zip_name = f'{uniq_id}_{partial_zip_num}'
  partial_zip_path = os.path.join(folder_path,
                                partial_zip_name + '.zip')
  partial_zip_file = zipfile.ZipFile(f'{partial_zip_path}','w',)
  def close_zip_part():

    global partial_zip_file, partial_zip_path
    print(f"closing {partial_zip_path}")
    # Finsh last one
    partial_zip_file.close()
    #print(f'sleeping for {10}s')
    #time.sleep(10)
    print(f"trying to start download of {partial_zip_path}")

    files.download(partial_zip_path)
    !wget -q http://www.yoursite.com/file.csv



  def new_zip_part():
    global partial_zip_num
    global partial_zip_start_time, partial_zip_name
    global partial_zip_path, partial_zip_file
      # INIT partial_zip
    close_zip_part()
    #start next one
    partial_zip_num += 1
    partial_zip_start_time = datetime.now()
    partial_zip_name = f'{uniq_id}_{partial_zip_num}'
    partial_zip_path = os.path.join(folder_path,
                                    partial_zip_name + '.zip')
    partial_zip_file = zipfile.ZipFile(f'{partial_zip_path}','w',)
    print('created {partial_zip_path}')







op(c.title, 'Run ID:', uniq_id, time=True)
print()

voice_corpus = voice_audio
prompts = chop_to_sentences(text)

if chunk_sentences > 1:
  prompts = [''.join(prompts[i:i+chunk_sentences]) for i in range(0, len(prompts), chunk_sentences)]

clean_dirs([dir_tmp_corpus, dir_tmp_slices, dir_tmp_clips, dir_tmp_processed])

if os.path.isfile(drive_root+voice_corpus):
  clean_dirs([dir_tmp_corpus])
  shutil.copy(drive_root+voice_corpus, dir_tmp_corpus)
  voice_dirs = [dir_tmp_corpus]
else:
  if voice_corpus == 'voice_list':
    voice_dirs = [drive_root+x for x in voice_list]
  elif ',' in voice_corpus:
    voice_dirs = [drive_root+fix_path(x.strip()) for x in voice_corpus.split(',')]
  elif ';' in voice_corpus:
    voice_dirs = [drive_root+fix_path(x.strip()) for x in voice_corpus.split(';')]
  else:
    voice_dirs = [drive_root+fix_path(voice_corpus)]

# Output
if output_dir == '':
  if mount_drive == True:
    dir_out = dir_tmp
  else:
    dir_out = drive_root
else:
  if not os.path.isdir(drive_root+output_dir):
    os.mkdir(drive_root+output_dir)
  dir_out = drive_root+fix_path(output_dir)

total = len(voice_dirs * len(prompts))
use_voices = []

txt_file = dir_out+uniq_id+'.txt'
if save_txt: append_txt(txt_file, timestamp(human_readable=True)+' '+uniq_id+'\n\n'+text+'\n\n'+'combo_voice: '+str(combo_voice)+'\n'+'preset: '+preset+'\n'+'dir_out: '+dir_out+'\n\n')

for i, voice_dir in enumerate(voice_dirs, 1):
  if voice_dir == dir_tmp_corpus:
    voice_name = basename(voice_corpus)
  else:
    voice_name = path_leaf(voice_dir)

  use_voices.append(voice_name)
  new_voice_dir = '/content/tortoise-tts/tortoise/voices/'+voice_name+'/'
  if not os.path.isdir(new_voice_dir):
    os.mkdir(new_voice_dir)
  else:
    clean_dirs([new_voice_dir])
  voice_files = list_audio(voice_dir)

  random.shuffle(voice_files)

  if save_txt: append_txt(txt_file, voice_name+'\n'+'In: '+voice_dir)

  if len(voice_files) == 0:
    print()
    op(c.fail, 'Skipping '+voice_name+' - Reason: WAV files not found in dir:', voice_dir.replace(drive_root, ''), time=True)
    if save_txt: append_txt(txt_file, 'Out: - (no wav found, SKIP)\n')
  else:
    op(c.okb, 'Processing voice files...', time=True)
    bytes_collected = 0
    for voice_file in voice_files:
      voice_file = remove_silence(voice_file, window_size=2, threshold=0.1, save_as=dir_tmp_processed+path_leaf(voice_file))
      file_duration = get_audio_duration(voice_file)
      slice_file = dir_tmp_slices+path_leaf(voice_file)

      if file_duration > slice_length:
        !sox {sox_q} "{voice_file}" -r 22050 {slice_file} trim 0 {slice_length} : newfile : restart
      else:
        !sox {sox_q} "{voice_file}" -r 22050 {slice_file}

      clips = list_audio(dir_tmp_slices)

      short_clips = []
      long_clips = []
      for clip in clips:
        clip_duration = get_audio_duration(clip)
        if clip_duration >= slice_length:
          long_clips.append(clip)
        else:
          short_clips.append(clip)
        if (len(long_clips)*slice_length >= optimal_samples_duration):
          break

      if len(long_clips) >= use_slices:
        selected_clips = random.sample(long_clips, use_slices)
      else:
        selected_clips = clips

      if save_txt: append_txt(txt_file, 'Selected clips:')
      for clip in selected_clips:
        if save_txt: append_txt(txt_file, path_leaf(clip)+'\n')
        shutil.copy(clip, new_voice_dir)

      file_size = os.path.getsize(voice_file)
      bytes_collected += file_size
      if bytes_collected > dir_byte_limit:
        break

    merge_list = []
    for ii, text in enumerate(prompts, 1):

      ndx_info = str(i*ii)+'/'+str(total)+' '

      voice_samples = None
      conditioning_latents = None
      gen = None

      print()
      op(c.title, ndx_info+'Processing', voice_name, time=True)

      if combo_voice == False:
        op(c.title, ndx_info+'Synthesizing', text+'...', time=True)

        file_out = dir_out+uniq_id+'__'+voice_name+'_'+str(ii).zfill(3)+'_'+slug(text[:60])+'.wav'
        if save_txt: append_txt(txt_file, 'Out: '+file_out+'\n')
        voice_samples, conditioning_latents = load_voice(voice_name)
        gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset=preset)
        torchaudio.save(file_out, gen.squeeze(0).cpu(), sample_rate)
        if os.path.isfile(file_out):
          op(c.ok, 'Saved', file_out.replace(drive_root, ''), time=True)
          merge_list.append(file_out)
          if save_sentences_as_they_render:
            files.download(file_out)
          if zip_sentences_and_download_all:
            print(f'adding {file_out} to {zip_name}.zip')
            #zip_file.write(file_out,
            #  os.path.relpath(partial_zip_path, folder_path))
            print(file_out, folder_path,
                  )
            #input('on line 223')
            zip_file.write(file_out,os.path.relpath(file_out,
                                  folder_path))
          if bool(download_partial_zip_after_minutes):
            print(f'adding {file_out} to {partial_zip_file}.zip')
            partial_zip_file.write(file_out,
              os.path.relpath(folder_path, file_out)) #HERE
            elapsed = datetime.now() -partial_zip_start_time
            minutes = elapsed.total_seconds() / 60
            if ii == (len(prompts) - 1):
              close_zip_part()
            elif minutes > download_partial_zip_after_minutes:
              new_zip_part()

        else:
          op(c.fail, 'Error saving', file_out.replace(drive_root, ''), time=True)

        del voice_samples
        del conditioning_latents
        del gen

      torch.cuda.empty_cache()
      import gc
      gc.collect()

    if merge_sentences == True:
      sox_input_list = ' '.join(merge_list)
      sox_merge_out = dir_out+uniq_id+'__'+voice_name+'_FULL.wav'
      !sox {sox_q} {sox_input_list} {sox_merge_out}

if combo_voice == True:
   for text in prompts:
     print()
     op(c.title, 'Synthesizing', text[:40]+'...', time=True)
     file_out = dir_out+uniq_id+'__'+voice_name+'_'+slug(text[:60])+'.wav'
     if save_txt == True:
       append_txt(txt_file, 'Out: '+file_out+'\n')
     voice_samples, conditioning_latents = load_voices(use_voices)
     gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset=preset)
     torchaudio.save(file_out, gen.squeeze(0).cpu(), sample_rate)
     if save_sentences_as_they_render:
      files.download(file_out)

     # IPython.display.Audio(file_out)

     del voice_samples
     del conditioning_latents
     del gen
     del tts
     torch.cuda.empty_cache()
     import gc
     gc.collect()


timer_end = time.time()

print()

if save_txt: append_txt(txt_file, str(timedelta(seconds=timer_end-timer_start)) )
if save_txt: append_txt(txt_file, 'Finished at '+timestamp(human_readable=True))
if zip_sentences_and_download_all:
  zip_file.write(txt_file,os.path.relpath(txt_file,folder_path))
files.download(f'{zip_name}.zip')

op(c.okb, 'Elapsed', timedelta(seconds=timer_end-timer_start), time=True)
op(c.ok, 'FIN.')

if end_session_when_done is True: end_session()

[90m2023-09-05 08:17:06 [96mRun ID:[0m lubten

[90m2023-09-05 08:17:07 [94mProcessing voice files...[0m

[90m2023-09-05 08:17:07 [96m1/16 Processing[0m sarl
[90m2023-09-05 08:17:07 [96m1/16 Synthesizing[0m bishop takes dee 2....
Generating autoregressive samples..


  0%|          | 0/1 [00:00<?, ?it/s]