## Tacotron 2 inference code 
Edit the variables **checkpoint_path** and **text** to match yours and run the entire code to generate plots of mel outputs, alignments and audio synthesis from the generated mel-spectrogram using Griffin-Lim.

#### Import libraries and setup matplotlib

In [29]:
%pip install matplotlib==2.1.0

Collecting matplotlib==2.1.0
  Using cached matplotlib-2.1.0.tar.gz (35.7 MB)
  Preparing metadata (setup.py) ... [?25l- \ | / - \ done
Building wheels for collected packages: matplotlib
  Building wheel for matplotlib (setup.py) ... [?25l- \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | done
[?25h  Created wheel for matplotlib: filename=matplotlib-2.1.0-cp38-cp38-linux_x86_64.whl size=11715348 sha256=9a86892194e9866f2340fce14db0e7cc2b707c6b687e6b3a9fb15f87186d390a
  Stored in directory: /home/azureuser/.cache/pip/wheels/da/64/57/cc4c142c177dc7f39299b031c4a8990be71029ad9665ccc586
Successfully built matplotlib
Installing collected packages: matplot

In [61]:
import matplotlib
%matplotlib inline
import matplotlib.pylab as plt

import IPython.display as ipd

import sys
sys.path.append('waveglow/')
import numpy as np
import torch

from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT, STFT
from audio_processing import griffin_lim
from train import load_model
from text import text_to_sequence
from denoiser import Denoiser

In [62]:
def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom', 
                       interpolation='none')

#### Setup hparams

In [63]:
hparams = create_hparams()
hparams.sampling_rate = 22050

#### Load model from checkpoint

In [64]:
checkpoint_path = "checkpoint_final"
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
_ = model.cuda().eval()

#### Load WaveGlow for mel2audio synthesis and denoiser

In [65]:
waveglow_path = 'waveglow_256channels_universal_v5.pt'
waveglow = torch.load(waveglow_path)['model']
waveglow.cuda().eval()
for k in waveglow.convinv:
    k.float()
denoiser = Denoiser(waveglow)

#### Prepare text input

In [66]:
text = "dastanda Koroğlunun kim olduğundan və onun etdiyi mübarizələrdən bəhs edir."
sequence = np.array(text_to_sequence(text, ['basic_cleaners']))[None, :]
sequence = torch.autograd.Variable(
torch.from_numpy(sequence)).cuda().long()

#### Decode text input and plot results

In [67]:
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)



#### Synthesize audio from spectrogram using WaveGlow

In [68]:
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

#### (Optional) Remove WaveGlow bias

In [45]:
audio_denoised = denoiser(audio, strength=0.01)[:, 0]
ipd.Audio(audio_denoised.cpu().numpy(), rate=hparams.sampling_rate) 