In [1]:
import tensorflow as tf
import torch
import onnx
from onnx_tf.backend import prepare








The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.











In [80]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import keras
from keras.utils import to_categorical

# replace with any text file containing full set of data
mozart_data = './mozart.txt'

with open(mozart_data, 'r') as file:
    text = file.read()
    
# get vocabulary set
words = sorted(tuple(set(text.split())))
n = len(words)

# create word-integer encoder/decoder
word2int = dict(zip(words, list(range(n))))
int2word = dict(zip(list(range(n)), words))

class WordLSTM(nn.ModuleList):
    
    def __init__(self, sequence_len, vocab_size, hidden_dim, batch_size):
        super(WordLSTM, self).__init__()
        
        # init the hyperparameters
        self.vocab_size = vocab_size
        self.sequence_len = sequence_len
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        
        # first layer lstm cell
        self.lstm_1 = nn.LSTMCell(input_size=vocab_size, hidden_size=hidden_dim)
        
        # second layer lstm cell
        self.lstm_2 = nn.LSTMCell(input_size=hidden_dim, hidden_size=hidden_dim)
        
        # dropout layer
        self.dropout = nn.Dropout(p=0.5)
        
        # fully connected layer
        self.fc = nn.Linear(in_features=hidden_dim, out_features=vocab_size)
        
    # forward pass in training   
    def forward(self, x, hc):
        """
            accepts 2 arguments: 
            1. x: input of each batch 
                - shape 128*149 (batch_size*vocab_size)
            2. hc: tuple of init hidden, cell states 
                - each of shape 128*512 (batch_size*hidden_dim)
        """
        
        # create empty output seq
#         print(self.sequence_len)
        output_seq = torch.empty((self.sequence_len,
                                  self.batch_size,
                                  self.vocab_size))
        # if using gpu        
        output_seq = output_seq.to(device)
        
        # init hidden, cell states for lstm layers
        hc_1, hc_2 = hc, hc
        
        # for t-th word in every sequence 
        for t in range(self.sequence_len):
            
            # layer 1 lstm
            hc_1 = self.lstm_1(x[t], hc_1)
            h_1, c_1 = hc_1
            
            # layer 2 lstm
            hc_2 = self.lstm_2(h_1, hc_2)
            h_2, c_2 = hc_2
            
            # dropout and fully connected layer
            output_seq[t] = self.fc(self.dropout(h_2))
            
        return output_seq.view((self.sequence_len * self.batch_size, -1))
          
    def init_hidden(self):
        
        # initialize hidden, cell states for training
        # if using gpu
        return (torch.zeros(self.batch_size, self.hidden_dim).to(device),
                torch.zeros(self.batch_size, self.hidden_dim).to(device))
    
    def init_hidden_generator(self):
        
        # initialize hidden, cell states for prediction of 1 sequence
        # if using gpu
        return (torch.zeros(1, self.hidden_dim).to(device),
                torch.zeros(1, self.hidden_dim).to(device))
    
    def predict(self, seed_seq, top_k=5, pred_len=256):
        """
            accepts 3 arguments: 
            1. seed_seq: seed string sequence for prediction (prompt)
            2. top_k: top k words to sample prediction from
            3. pred_len: number of words to generate after the seed seq
        """
        
        # set evaluation mode
        self.eval()
        
        # split string into list of words
        seed_seq = seed_seq.split()
        
        # get seed sequence length
        seed_len = len(seed_seq)
        
        # create output sequence
        out_seq = np.empty(seed_len+pred_len)
        
        # append input seq to output seq
        out_seq[:seed_len] = np.array([word2int[word] for word in seed_seq])
 
        # init hidden, cell states for generation
        hc = self.init_hidden_generator()
        print(f"hc: {self.batch_size, self.hidden_dim}")
        hc_1, hc_2 = hc, hc
        
        # feed seed string into lstm
        # get the hidden state set up
        for word in seed_seq[:-1]:
            
            # encode starting word to one-hot encoding
            word = to_categorical(word2int[word], num_classes=self.vocab_size)

            # add batch dimension
            word = torch.from_numpy(word).unsqueeze(0)
            # if using gpu
            word = word.to(device) 
            
            # layer 1 lstm
            hc_1 = self.lstm_1(word, hc_1)
            h_1, c_1 = hc_1
            
            # layer 2 lstm
            hc_2 = self.lstm_2(h_1, hc_2)
            h_2, c_2 = hc_2
        
        word = seed_seq[-1]
        
        # encode starting word to one-hot encoding
        word = to_categorical(word2int[word], num_classes=self.vocab_size)

        # add batch dimension
        word = torch.from_numpy(word).unsqueeze(0)
        # if using gpu
        word = word.to(device) 

        # forward pass
        for t in range(pred_len):
            
            # layer 1 lstm
            hc_1 = self.lstm_1(word, hc_1)
            h_1, c_1 = hc_1
            
            # layer 2 lstm
            hc_2 = self.lstm_2(h_1, hc_2)
            h_2, c_2 = hc_2
            
            # fully connected layer without dropout (no need)
            output = self.fc(h_2)
            
            # software to get probabilities of output options
            output = F.softmax(output, dim=1)
            
            # get top k words and corresponding probabilities
            p, top_word = output.topk(top_k)
            # if using gpu           
            p = p.cpu()
            
            # sample from top k words to get next word
            p = p.detach().squeeze().numpy()
            top_word = torch.squeeze(top_word)
            
            word = np.random.choice(top_word, p = p/p.sum())
            
            # add word to sequence
            out_seq[seed_len+t] = word
            
            # encode predicted word to one-hot encoding for next step
            word = to_categorical(word, num_classes=self.vocab_size)
            word = torch.from_numpy(word).unsqueeze(0)
            # if using gpu
            word = word.to(device)
            
        return out_seq

## Saving torch model

In [81]:
# load model
model_path = './models/lstm20_ed'
model = torch.load(model_path, map_location='cpu')

In [82]:
# notes for 25 keys piano
octave = "C,C#,D,D#,E,F,F#,G,G#,A,A#,B,"
notes = (octave*2).split(",")
notes[-1] = "C"

# note index for 25 key piano
noteIdxs = [("00"+str(i))[-3:]  for i in range(25,50)]

noteDict = list(zip(notes, noteIdxs))

In [83]:
print(model)

WordLSTM(
  (lstm_1): LSTMCell(149, 512)
  (lstm_2): LSTMCell(512, 512)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=149, bias=True)
)


## Load torch model and convert to ONNX

In [84]:
def intToText(int_seq):
    text_seq = []
    for i in int_seq:
        text_seq.append("p"+str(i))
        text_seq.append("wait6")
    return " ".join(text_seq)

In [90]:
device = torch.device('cpu')
# dummy_input = intToText([33, 32, 38, 39, 40, 45])
dummy_input = torch.zeros((512,128,149))
# hc = (128,512)
# hc_1, hc_2 = hc, hc
# print(hc_1, hc_2)
# for t in range(512):
#     print(dummy_input[t], hc_1)
# print(dummy_input)
# print(dummy_input.size())
model.forward(dummy_input, (128, 512))
# model.eval()
# model.predict(dummy_input)

AttributeError: 'int' object has no attribute 'size'

In [36]:
ONNX_PATH = './models/lstm20_ed.onnx'
torch.onnx.export(model, dummy_input, ONNX_PATH, input_names=['input'], output_names=['output'])

512


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

## Convert to Tensorflow

In [13]:
from onnx_tf.backend import prepare
import os

# Load ONNX model and convert to TensorFlow format
model_onnx = onnx.load(ONNX_PATH)

tf_rep = prepare(model_onnx)

print(tf_rep)
# Export model as .pb file
# os.mkdir("tfmodel")
TF_PATH = './tfmodel/saved_model.pb'

tf_rep.export_graph(TF_PATH)

<onnx_tf.backend_rep.TensorflowRep object at 0x7ff054d9c128>


In [10]:

def load_pb(path_to_pb):
    with tf.gfile.GFile(path_to_pb, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def, name='')
        return graph

In [14]:

tf_graph = load_pb('./tfmodel/saved_model.pb')
sess = tf.Session(graph=tf_graph)
# imported = tf.keras.models.load_model('./tfmodel')


In [15]:
# Show tensor names in graph
for op in tf_graph.get_operations():
  print(op.values())


(<tf.Tensor '49:0' shape=(1,) dtype=int64>,)
(<tf.Tensor '50:0' shape=(1,) dtype=int64>,)
(<tf.Tensor '68:0' shape=(1, 24, 6) dtype=float32>,)
(<tf.Tensor '69:0' shape=(1, 24, 6) dtype=float32>,)
(<tf.Tensor '70:0' shape=(1, 48) dtype=float32>,)
(<tf.Tensor 'hidden2tag.bias:0' shape=(3,) dtype=float32>,)
(<tf.Tensor 'hidden2tag.weight:0' shape=(3, 6) dtype=float32>,)
(<tf.Tensor 'word_embeddings.weight:0' shape=(9, 6) dtype=float32>,)
(<tf.Tensor 'lstm_input:0' shape=(6,) dtype=int64>,)
(<tf.Tensor 'Gather_0/axis:0' shape=() dtype=int32>,)
(<tf.Tensor 'Gather_0:0' shape=(6, 6) dtype=float32>,)
(<tf.Tensor 'Constant_1:0' shape=(3,) dtype=int64>,)
(<tf.Tensor 'Shape:0' shape=(2,) dtype=int64>,)
(<tf.Tensor 'Const:0' shape=() dtype=int64>,)
(<tf.Tensor 'Equal:0' shape=(3,) dtype=bool>,)
(<tf.Tensor 'Where:0' shape=(?, 1) dtype=int64>,)
(<tf.Tensor 'Squeeze:0' shape=(?,) dtype=int64>,)
(<tf.Tensor 'GatherV2/axis:0' shape=() dtype=int32>,)
(<tf.Tensor 'GatherV2:0' shape=(?,) dtype=int64>,)


In [18]:
output_tensor = tf_graph.get_tensor_by_name('lstm_output:0')
input_tensor = tf_graph.get_tensor_by_name('lstm_input:0')

output = sess.run(output_tensor, feed_dict={input_tensor: dummy_input})
print(output)

[[-0.99727446 -1.4073101  -0.95110065]
 [-1.0539446  -1.3397752  -0.94280046]
 [-1.0231315  -1.3710017  -0.950158  ]
 [-1.008065   -1.4240679  -0.9305347 ]
 [-1.0618953  -1.4260795  -0.8820071 ]
 [-1.0299796  -1.4475528  -0.8968785 ]]
