# Preprocessing and notebook setup

**Cloning the repository**

In [1]:
!git clone https://github.com/NVIDIA/tacotron2.git
%cd tacotron2

Cloning into 'tacotron2'...
remote: Enumerating objects: 406, done.[K
remote: Total 406 (delta 0), reused 0 (delta 0), pack-reused 406[K
Receiving objects: 100% (406/406), 2.69 MiB | 32.46 MiB/s, done.
Resolving deltas: 100% (205/205), done.
/content/tacotron2


**Downloading and extracting the LJSpeech dataset**

In [2]:
!wget https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2
!mkdir /content/tacotron2/ljspeech
!tar -xf /content/tacotron2/LJSpeech-1.1.tar.bz2 -C /content/tacotron2/ljspeech

--2020-12-07 08:35:50--  https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2
Resolving data.keithito.com (data.keithito.com)... 174.138.79.61
Connecting to data.keithito.com (data.keithito.com)|174.138.79.61|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2748572632 (2.6G) [application/octet-stream]
Saving to: ‘LJSpeech-1.1.tar.bz2’


2020-12-07 08:36:28 (69.3 MB/s) - ‘LJSpeech-1.1.tar.bz2’ saved [2748572632/2748572632]



**Installing requirements and downgrading to Tensorflow 1.x**

In [3]:
%tensorflow_version 1.x
!pip install unidecode

TensorFlow 1.x selected.
Collecting unidecode
[?25l  Downloading https://files.pythonhosted.org/packages/d0/42/d9edfed04228bacea2d824904cae367ee9efd05e6cce7ceaaedd0b0ad964/Unidecode-1.1.1-py2.py3-none-any.whl (238kB)
[K     |████████████████████████████████| 245kB 11.8MB/s 
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.1.1


**Importing the required libraries**

In [4]:
import matplotlib
%matplotlib inline
import matplotlib.pylab as plt

import IPython.display as ipd

import sys
sys.path.append('waveglow/')
import numpy as np
import torch

from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT, STFT
from audio_processing import griffin_lim
from train import load_model
from text import text_to_sequence

# Downloading and setting up model 

**Setting up plotting prerequisites**

In [15]:
def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom', 
                       interpolation='none')
    
    plt.show()

**Setting up hparams**

In [6]:
hparams = create_hparams()
hparams.sampling_rate = 22050

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



**Downloading the checkpoints of Tacotron2**

In [7]:
import requests

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

print('Downloading tacotron2 model from [https://drive.google.com/file/d/1c5ZTuT7J08wLUoVZ2KkUs_VdZuJ86ZqA/view]')
file_id = '1c5ZTuT7J08wLUoVZ2KkUs_VdZuJ86ZqA'
destination = '/content/tacotron2/tacotron2_statedict.pt'
download_file_from_google_drive(file_id, destination)
print('Model downloaded and saved in: ', destination)

Downloading tacotron2 model from [https://drive.google.com/file/d/1c5ZTuT7J08wLUoVZ2KkUs_VdZuJ86ZqA/view]
Model downloaded and saved in:  /content/tacotron2/tacotron2_statedict.pt


**Loading the checkpoints and the model**

In [8]:
checkpoint_path = "/content/tacotron2/tacotron2_statedict.pt"
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
_ = model.cuda().eval().half()

# Inference using Tacotron2

In [22]:
text = "I don't like our data science project."
sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cuda().long()

mel_outputs, mel_outputs_postnet, some_ret_val, alignments = model.inference(sequence)
plot_data((mel_outputs.float().data.cpu().numpy()[0],
           mel_outputs_postnet.float().data.cpu().numpy()[0],
           alignments.float().data.cpu().numpy()[0].T))

**Displaying results of generated data**

In [24]:
print('Sequence: ')
print(sequence)
print('------')
print('Mel-outputs: ')
print(mel_outputs)
print('------')
print('mel_outputs_postnet: ')
print(mel_outputs_postnet)
print('------')
#print('some_ret_val:')
#print(some_ret_val)
#print('------')
print('algnments: ')
print(alignments)
print('------')

Sequence: 
tensor([[46, 11, 41, 52, 51,  3, 57, 11, 49, 46, 48, 42, 11, 52, 58, 55, 11, 41,
         38, 57, 38, 11, 56, 40, 46, 42, 51, 40, 42, 11, 53, 55, 52, 47, 42, 40,
         57,  7]], device='cuda:0')
------
Mel-outputs: 
tensor([[[-7.7969, -7.0508, -6.5000,  ..., -9.0391, -8.7578, -8.1875],
         [-7.2734, -6.5625, -5.8203,  ..., -7.7617, -7.6406, -7.3672],
         [-6.7617, -5.7969, -4.7812,  ..., -6.6367, -6.6680, -6.7500],
         ...,
         [-9.9062, -9.7109, -9.4062,  ..., -9.4922, -9.5703, -9.6016],
         [-9.7891, -9.3984, -9.0703,  ..., -9.4922, -9.5781, -9.6172],
         [-9.7109, -9.1406, -8.6328,  ..., -9.5312, -9.6016, -9.6484]]],
       device='cuda:0', dtype=torch.float16, grad_fn=<TransposeBackward0>)
------
mel_outputs_postnet: 
tensor([[[-7.7617, -7.0469, -6.5078,  ..., -9.1016, -8.7891, -8.1562],
         [-7.2461, -6.5508, -5.8164,  ..., -7.8164, -7.6641, -7.3438],
         [-6.7422, -5.7930, -4.7773,  ..., -6.6914, -6.6953, -6.7227],
         ..