## Tacotron 2 + Hifi-gan inference code 
Edit the variables **checkpoint_path** and **text** to match yours and run the entire code to generate plots of mel outputs, alignments and audio synthesis from the generated mel-spectrogram using Griffin-Lim.

In [1]:
import warnings
warnings.filterwarnings('ignore') 

In [2]:
import matplotlib
%matplotlib inline
import matplotlib.pylab as plt

import IPython.display as ipd

import sys
sys.path.append('waveglow/')
import numpy as np
import torch

from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT, STFT
from audio_processing import griffin_lim
from train import load_model
from text import text_to_sequence
from denoiser import Denoiser


In [4]:
hparams = create_hparams()
hparams.sampling_rate = 22050
hparams.max_decoder_steps = 10000

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



#### Load model from checkpoint

In [6]:
import scipy
import re
import os
from pydub import AudioSegment
checkpoints = ['56500','60000', '60500', '63000']
checkpoints = [ '84000']
waveglows = ['400000']

speakers = [('da_checkpoint_824800_done', 'da_waveglow_1516200'), ('jej_checkpoint_904500_done', 'jej_waveglow890k_done')]
speakers = [('jej_checkpoint_904500_done', 'jej_waveglow890k_done')]
settings_groups = [('a', 0.02, 0.666)]
texts = []

text_file = open("../Samples/Work.txt", "r", encoding="utf8")
texts = text_file.readlines()
for speaker in speakers:
    checkpoint = speaker[0]
    wg = speaker[1]
    checkpoint_path = "../Models/"+ speaker[0]
    model = load_model(hparams)
    model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
    _ = model.cuda().eval().half()
    waveglow_path = '../Models/'+speaker[1]
    waveglow = torch.load(waveglow_path)['model']
    waveglow.cuda().eval().half()
    for k in waveglow.convinv:
        k.float()
    denoiser = Denoiser(waveglow)
    for index, text in enumerate(texts):
        for settings in settings_groups:
            sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
            sequence = torch.autograd.Variable(
                torch.from_numpy(sequence)).cuda().long()
            mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
            mel_postnet = mel_outputs_postnet.detach().cpu().numpy()
            
            output_dir = '../Samples/mel_outputs_postnet/'

            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            filename = output_dir+'{0:03d}'.format(index)+'_'+checkpoint+"_wg"+wg+'_'+re.sub(r'\W+', '', text)[:30]+'_'+settings[0]+'.npy'
            #audio.export(filename, format="wav")
            np.save(filename, mel_postnet)

In [3]:
%cd hifi-gan

E:\deepfakes\tacotron2-Offerman\hifi-gan


In [9]:
import glob, os
models_dir = "./pretrained/"
filelist = os.listdir(models_dir)
print(filelist)
for i, filname in enumerate(filelist):
    if os.path.isdir(models_dir+filname):
        output_dir = "../../Samples/hifi-gan_outputs/"+ filname
        #model_dir_files = glob.glob(models_dir+filname+"/generator_v*")
        checkpoint_file = (glob.glob(models_dir+filname+"/generator_v*") or [None])[0]     
        print(filname, output_dir, checkpoint_file)
        !python inference_e2e.py --input_mels_dir "../../Samples/mel_outputs_postnet/"  --output_dir $output_dir  --checkpoint_file $checkpoint_file

['LJ_FT_T2_V1', 'LJ_FT_T2_V2', 'LJ_FT_T2_V3', 'LJ_V1', 'LJ_V2', 'LJ_V3', 'UNIVERSAL_V1', 'VCTK_V1', 'VCTK_V2', 'VCTK_V3']
LJ_FT_T2_V1 ../../Samples/hifi-gan_outputs/LJ_FT_T2_V1 ./pretrained/LJ_FT_T2_V1\generator_v1
Initializing Inference Process..
Loading './pretrained/LJ_FT_T2_V1\generator_v1'
Complete.
Removing weight norm...
../../Samples/hifi-gan_outputs/LJ_FT_T2_V1\000_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Twasthenightbeforecoowedfreeze_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V1\001_jej_checkpoint_904500_done_wgjej_waveglow890k_done_notaprogramwasworkingnoteventh_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V1\002_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theprogrammershungbytheirlapto_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V1\003_jej_checkpoint_904500_done_wgjej_waveglow890k_done_withhopesthatamiraclesoonwould_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V1\004_jej_checkpoint_904500_done_

Initializing Inference Process..
Loading './pretrained/LJ_FT_T2_V3\generator_v3'
Complete.
Removing weight norm...
../../Samples/hifi-gan_outputs/LJ_FT_T2_V3\000_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Twasthenightbeforecoowedfreeze_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V3\001_jej_checkpoint_904500_done_wgjej_waveglow890k_done_notaprogramwasworkingnoteventh_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V3\002_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theprogrammershungbytheirlapto_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V3\003_jej_checkpoint_904500_done_wgjej_waveglow890k_done_withhopesthatamiraclesoonwould_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V3\004_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theuserswerenestledallsnuginth_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_FT_T2_V3\005_jej_checkpoint_904500_done_wgjej_waveglow890k_done_whilevisionsofinquiriesdancedi_a_generated

Initializing Inference Process..
Loading './pretrained/LJ_V2\generator_v2'
Complete.
Removing weight norm...
../../Samples/hifi-gan_outputs/LJ_V2\000_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Twasthenightbeforecoowedfreeze_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_V2\001_jej_checkpoint_904500_done_wgjej_waveglow890k_done_notaprogramwasworkingnoteventh_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_V2\002_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theprogrammershungbytheirlapto_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_V2\003_jej_checkpoint_904500_done_wgjej_waveglow890k_done_withhopesthatamiraclesoonwould_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_V2\004_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theuserswerenestledallsnuginth_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ_V2\005_jej_checkpoint_904500_done_wgjej_waveglow890k_done_whilevisionsofinquiriesdancedi_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/LJ

Initializing Inference Process..
VCTK_V1 ../../Samples/hifi-gan_outputs/VCTK_V1 ./pretrained/VCTK_V1\generator_v1


Traceback (most recent call last):
  File "inference_e2e.py", line 89, in <module>
    main()
  File "inference_e2e.py", line 70, in main
    with open(config_file) as f:
FileNotFoundError: [Errno 2] No such file or directory: 'config.json'


Initializing Inference Process..
Loading './pretrained/VCTK_V1\generator_v1'
Complete.
Removing weight norm...
../../Samples/hifi-gan_outputs/VCTK_V1\000_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Twasthenightbeforecoowedfreeze_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V1\001_jej_checkpoint_904500_done_wgjej_waveglow890k_done_notaprogramwasworkingnoteventh_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V1\002_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theprogrammershungbytheirlapto_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V1\003_jej_checkpoint_904500_done_wgjej_waveglow890k_done_withhopesthatamiraclesoonwould_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V1\004_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theuserswerenestledallsnuginth_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V1\005_jej_checkpoint_904500_done_wgjej_waveglow890k_done_whilevisionsofinquiriesdancedi_a_generated_e2e.wav
../../Samples/hifi-

Initializing Inference Process..
Loading './pretrained/VCTK_V3\generator_v3'
Complete.
Removing weight norm...
../../Samples/hifi-gan_outputs/VCTK_V3\000_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Twasthenightbeforecoowedfreeze_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V3\001_jej_checkpoint_904500_done_wgjej_waveglow890k_done_notaprogramwasworkingnoteventh_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V3\002_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theprogrammershungbytheirlapto_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V3\003_jej_checkpoint_904500_done_wgjej_waveglow890k_done_withhopesthatamiraclesoonwould_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V3\004_jej_checkpoint_904500_done_wgjej_waveglow890k_done_Theuserswerenestledallsnuginth_a_generated_e2e.wav
../../Samples/hifi-gan_outputs/VCTK_V3\005_jej_checkpoint_904500_done_wgjej_waveglow890k_done_whilevisionsofinquiriesdancedi_a_generated_e2e.wav
../../Samples/hifi-