In [None]:
!pip install librosa

In [2]:
import librosa

In [None]:
!pip install numpy==1.13.3
!pip install inflect==0.2.5
!pip install librosa==0.6.0
!pip install scipy==1.0.0
!pip install Unidecode==1.0.22
!pip install tensorflow==1.15.2
!pip install tensorboardX==1.1

In [4]:
!git clone https://github.com/NVIDIA/tacotron2.git

Cloning into 'tacotron2'...
remote: Enumerating objects: 406, done.[K
remote: Total 406 (delta 0), reused 0 (delta 0), pack-reused 406[K
Receiving objects: 100% (406/406), 2.69 MiB | 4.94 MiB/s, done.
Resolving deltas: 100% (205/205), done.


In [5]:
%cd tacotron2

/content/tacotron2


In [6]:
!git submodule init; git submodule update

Submodule 'waveglow' (https://github.com/NVIDIA/waveglow) registered for path 'waveglow'
Cloning into '/content/tacotron2/waveglow'...
Submodule path 'waveglow': checked out '5bc2a53e20b3b533362f974cfa1ea0267ae1c2b1'


In [1]:
import sys
sys.path.append('./waveglow')

In [2]:
from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT, STFT

from audio_processing import griffin_lim
from train import load_model
from text import text_to_sequence
from denoiser import Denoiser


In [3]:
def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom', 
                       interpolation='none')

**Setup hparams**

In [4]:
hparams = create_hparams()
hparams.sampling_rate = 22050

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



  import pandas.util.testing as tm


**Load model from checkpoint**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [6]:
import torch

In [7]:
checkpoint_path = "./models/tacotron2_statedict.pt"
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
_ = model.cuda().eval().half()

In [None]:
model

**Load WaveGlow for mel2audio synthesis and denoiser**

In [8]:
waveglow_path = './models/waveglow_256channels_universal_v5.pt'
waveglow = torch.load(waveglow_path)['model']
waveglow.cuda().eval().half()
for k in waveglow.convinv:
    k.float()
denoiser = Denoiser(waveglow)



**Prepare text input**

In [9]:
import numpy as np

In [10]:
text = "Once upon a time there was a dear little girl who was loved by every one who looked at her, but most of all by her grandmother, and there was nothing that she would not have given to the child. Once she gave her a little cap of red velvet, which suited her so well that she would never wear anything else. So she was always called Little Red Riding Hood."
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()

In [None]:
sequence

In [12]:
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)



**Synthesize audio from spectrogram using WaveGlow**

In [13]:
import IPython.display as ipd

In [14]:
data = "Once upon a time there was a dear little girl who was loved by every one who looked at her, but most of all by her grandmother, and there was nothing that she would not have given to the child. Once she gave her a little cap of red velvet, which suited her so well that she would never wear anything else. So she was always called Little Red Riding Hood.One day her mother said to her, Come, Little Red Riding Hood, here is a piece of cake and a bottle of wine. Take them to your grandmother, she is ill and weak, and they will do her good."

In [15]:
data

'Once upon a time there was a dear little girl who was loved by every one who looked at her, but most of all by her grandmother, and there was nothing that she would not have given to the child. Once she gave her a little cap of red velvet, which suited her so well that she would never wear anything else. So she was always called Little Red Riding Hood.One day her mother said to her, Come, Little Red Riding Hood, here is a piece of cake and a bottle of wine. Take them to your grandmother, she is ill and weak, and they will do her good.'

In [16]:
sen = data.split(".")
len(sen)

6

In [17]:
text = sen[0] + "."
print(text)
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

Once upon a time there was a dear little girl who was loved by every one who looked at her, but most of all by her grandmother, and there was nothing that she would not have given to the child.


In [18]:
text = sen[1] + "."
print(text)
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

 Once she gave her a little cap of red velvet, which suited her so well that she would never wear anything else.


In [19]:
text = sen[2] + "."
print(text)
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

 So she was always called Little Red Riding Hood.


In [20]:
text = sen[3] + "."
print(text)
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

One day her mother said to her, Come, Little Red Riding Hood, here is a piece of cake and a bottle of wine.


In [21]:
text = sen[4] + "."
print(text)
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

 Take them to your grandmother, she is ill and weak, and they will do her good.
