In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install gdown

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
# '%cd' was occasionally not working properly on Colab, the following solution is from here: https://github.com/googlecolab/colabtools/issues/40

path = "/content/drive/MyDrive/assignment7_programming/" # ensure the 'assignment7_programming' directory is located under MyDrive (this is also needed for train.py to work!!)
!echo $path
%cd $path

/content/drive/MyDrive/assignment7_programming/
/content/drive/MyDrive/assignment7_programming


In [4]:
# obtain open-source data via gdown, from: https://github.com/Edresson/TTS-Portuguese-Corpus (this could also be done via wget but since it's a google drive file, gdown is recommended)
import gdown

id = "1ujlfIl7iN-0HJ2vAtbZGFbP43u-NBFav"
gdown.download(id=id, output=path, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1ujlfIl7iN-0HJ2vAtbZGFbP43u-NBFav
To: /content/drive/MyDrive/assignment7_programming/TTS-Portuguese-Corpus_22khz.zip
100%|██████████| 2.91G/2.91G [00:29<00:00, 98.0MB/s]


'/content/drive/MyDrive/assignment7_programming/TTS-Portuguese-Corpus_22khz.zip'

In [5]:
# unzip the data (please note: this will take a while and you will be downloading 22khz worth of data onto your drive)
!unzip /content/drive/MyDrive/assignment7_programming/TTS-Portuguese-Corpus_22khz.zip
print("\n\n DONE UNZIPPING FILES!")

Archive:  /content/drive/MyDrive/assignment7_programming/TTS-Portuguese-Corpus_22khz.zip
   creating: TTS-Portuguese-Corpus_22khz/
   creating: TTS-Portuguese-Corpus_22khz/wavs/
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-6386.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-3298.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-4772.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-739.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-5654.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-2416.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-5264.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-3645.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-226.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-150.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-4701.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-5712.wav  
  inflating: TTS-Portuguese-Corpus_22khz/wavs/sample-2341

In [6]:
# restructure the training metadata csv file in accordance with the settings in train.py
import pandas as pd

path_dir_train = "/content/drive/MyDrive/assignment7_programming/TTS-Portuguese-Corpus_22khz/"
path_file_train = path_dir_train + "train_TTS-Portuguese_Corpus_metadata.csv"

train_df = pd.read_csv(path_file_train, sep="|", header=0, names=["wavfile", "filename", "text", "abbrev", "speaker"])
train_df = train_df.drop(columns=["filename", "abbrev", "speaker"], axis=1)
train_df["wavfile"] = train_df["wavfile"].str.replace("wavs/", "")
# ensure only wav files are in there
train_df = train_df[train_df["wavfile"].str.contains(".wav") == True]

# change order of columns
columns_titles = ["text","wavfile"]
train_df=train_df.reindex(columns=columns_titles)

# get rid of faulty row
train_df = train_df.drop([2905])

# write out new file; this is the metadata file used in train.py
new_path_file_train = path_dir_train + "train_metadata.csv"
train_df.to_csv(new_path_file_train, index=False, header=False, sep="|")

train_df.head()

Unnamed: 0,text,wavfile
0,"Depois que foi atropelado, só atravessa na fai...",sample-631.wav
1,A cidade também tem uma instituição de ensino ...,sample-2757.wav
2,Também os astronautas depressa se juntaram às...,sample-5578.wav
3,"Nessa idade, começou a praticar balé.",sample-3712.wav
4,Um exemplo de conhecimento de terceiro tipo é...,sample-3434.wav


In [7]:
# create empty output directory for model output later on (filepath specified in train.py)
# it can take a while for the directory to show up in Drive
!mkdir output

In [8]:
!mkdir TTSdir

ttsdirpath = "/content/drive/MyDrive/assignment7_programming/TTSdir/"
!echo $ttsdirpath
%cd $ttsdirpath

/content/drive/MyDrive/assignment7_programming/TTSdir/
/content/drive/MyDrive/assignment7_programming/TTSdir


In [9]:
# clone TTS model into TTSdir
!git clone https://github.com/coqui-ai/TTS.git

Cloning into 'TTS'...
remote: Enumerating objects: 28016, done.[K
remote: Counting objects: 100% (267/267), done.[K
remote: Compressing objects: 100% (191/191), done.[K
remote: Total 28016 (delta 144), reused 168 (delta 75), pack-reused 27749[K
Receiving objects: 100% (28016/28016), 129.33 MiB | 13.25 MiB/s, done.
Resolving deltas: 100% (20371/20371), done.
Checking out files: 100% (510/510), done.


In [10]:
ttspath = "/content/drive/MyDrive/assignment7_programming/TTSdir/TTS/"
!echo $ttspath
%cd $ttspath

/content/drive/MyDrive/assignment7_programming/TTSdir/TTS/
/content/drive/MyDrive/assignment7_programming/TTSdir/TTS


In [11]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Ignoring numpy: markers 'python_version == "3.10"' don't match your environment
Ignoring numba: markers 'python_version == "3.10"' don't match your environment
Collecting cython==0.29.28
  Downloading Cython-0.29.28-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 34.3 MB/s 
Collecting librosa==0.8.0
  Downloading librosa-0.8.0.tar.gz (183 kB)
[K     |████████████████████████████████| 183 kB 67.2 MB/s 
[?25hCollecting numba==0.55.1
  Downloading numba-0.55.1-1-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 66.2 MB/s 
[?25hCollecting inflect==5.6.0
  Downloading inflect-5.6.0-py3-none-any.whl (33 kB)
Collecting anyascii
  Downloading anyascii-0.3.1-py3-none-any.whl (287 kB)
[K     |████████████████████████████████| 287 kB 65.6 M

In [12]:
!python ./setup.py develop

Compiling TTS/tts/utils/monotonic_align/core.pyx because it changed.
[1/1] Cythonizing TTS/tts/utils/monotonic_align/core.pyx
  % (opt, underscore_opt))
  % (opt, underscore_opt))
running develop
running egg_info
creating TTS.egg-info
writing TTS.egg-info/PKG-INFO
writing dependency_links to TTS.egg-info/dependency_links.txt
writing entry points to TTS.egg-info/entry_points.txt
writing requirements to TTS.egg-info/requires.txt
writing top-level names to TTS.egg-info/top_level.txt
writing manifest file 'TTS.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
no previously-included directories found matching 'tests*'
adding license file 'LICENSE.txt'
writing manifest file 'TTS.egg-info/SOURCES.txt'
running build_ext
building 'TTS.tts.utils.monotonic_align.core' extension
creating build
creating build/temp.linux-x86_64-3.7
creating build/temp.linux-x86_64-3.7/TTS
creating build/temp.linux-x86_64-3.7/TTS/tts
creating build/temp.linux-x86_64-3.7/TTS/tts/utils
creating build/temp.l

In [None]:
# start training the model ---  IT ONLY RUNS FOR A SINGLE EPOCH FOR DEMONSTRATION PURPOSES AS IT TAKES LESS TIME (config settings can be changed in train.py)
!python /content/drive/MyDrive/assignment7_programming/train.py

 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:True
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:20.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:45
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:10
 | > hop_length:256
 | > win_length:1024
 | > Found 3023 files in /content/drive/MyDrive/assignment7_programming/TTS-Portuguese-Corpus_22khz
 > Training Environment:
 | > Num. of CPUs: 2
 | > Num. of Torch Threads: 1
 | > Torch seed: 54321
 | > Torch CUDNN: True
 | > Torch CUDNN deterministic: Fals