In [None]:
import torch
import numpy as np
from scipy.io.wavfile import write

cuda_is_available = torch.cuda.is_available()
DEVICE = torch.device("cuda") if cuda_is_available else torch.device("cpu")

def load_waveglow():
    waveglow = torch.hub.load("nvidia/DeepLearningExamples:torchhub", "nvidia_waveglow")
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow = waveglow.to(DEVICE)
    waveglow.eval()
    return waveglow


def load_tacotron2():
    tacotron2 = torch.hub.load(
        "nvidia/DeepLearningExamples:torchhub", "nvidia_tacotron2",
     map_location=DEVICE
    )
    tacotron2 = tacotron2.to(DEVICE)
    tacotron2.eval()
    return tacotron2

tacotron2 = load_tacotron2()
waveglow = load_waveglow()

In [None]:
text = "hello world, I missed you"

# preprocessing
sequence = np.array(tacotron2.text_to_sequence(text, ["english_cleaners"]))[None, :]
sequence = torch.from_numpy(sequence).to(device=DEVICE, dtype=torch.int64)

# run the models
with torch.no_grad():
    _, mel, _, _ = tacotron2.infer(sequence)
    audio = waveglow.infer(mel)
audio_numpy = audio[0].data.cpu().numpy()
rate = 22050

write("audio.wav", rate, audio_numpy)

# from IPython.display import Audio
# Audio(audio_numpy, rate=rate)
