In [1]:
import tensorflow as tf
import torch
import onnx
from onnx_tf.backend import prepare




The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.















In [2]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import keras
from keras.utils import to_categorical

# replace with any text file containing full set of data
mozart_data = './mozart.txt'

with open(mozart_data, 'r') as file:
    text = file.read()
    
# get vocabulary set
words = sorted(tuple(set(text.split())))
n = len(words)

# create word-integer encoder/decoder
word2int = dict(zip(words, list(range(n))))
int2word = dict(zip(list(range(n)), words))

class WordLSTM(nn.ModuleList):
    
    def __init__(self, sequence_len, vocab_size, hidden_dim, batch_size):
        super(WordLSTM, self).__init__()
        
        # init the hyperparameters
        self.vocab_size = vocab_size
        self.sequence_len = sequence_len
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        
        # first layer lstm cell
        self.lstm_1 = nn.LSTMCell(input_size=vocab_size, hidden_size=hidden_dim)
        
        # second layer lstm cell
        self.lstm_2 = nn.LSTMCell(input_size=hidden_dim, hidden_size=hidden_dim)
        
        # dropout layer
        self.dropout = nn.Dropout(p=0.5)
        
        # fully connected layer
        self.fc = nn.Linear(in_features=hidden_dim, out_features=vocab_size)
        
    # forward pass in training   
    def forward(self, x, hc):
        """
            accepts 2 arguments: 
            1. x: input of each batch 
                - shape 128*149 (batch_size*vocab_size)
            2. hc: tuple of init hidden, cell states 
                - each of shape 128*512 (batch_size*hidden_dim)
        """
        
        # create empty output seq
        output_seq = torch.empty((self.sequence_len,
                                  self.batch_size,
                                  self.vocab_size))
        # if using gpu        
        output_seq = output_seq.to(device)
        
        # init hidden, cell states for lstm layers
        hc_1, hc_2 = hc, hc
        
        # for t-th word in every sequence 
        for t in range(self.sequence_len):
            # layer 1 lstm
            hc_1 = self.lstm_1(x[t], hc_1)
            h_1, c_1 = hc_1
            
            # layer 2 lstm
            hc_2 = self.lstm_2(h_1, hc_2)
            h_2, c_2 = hc_2
            
            # dropout and fully connected layer
            output_seq[t] = self.fc(self.dropout(h_2))
            
        return output_seq.view((self.sequence_len * self.batch_size, -1))
          
    def init_hidden(self):
        
        # initialize hidden, cell states for training
        # if using gpu
        return (torch.zeros(self.batch_size, self.hidden_dim).to(device),
                torch.zeros(self.batch_size, self.hidden_dim).to(device))
    
    def init_hidden_generator(self):
        
        # initialize hidden, cell states for prediction of 1 sequence
        # if using gpu
        return (torch.zeros(1, self.hidden_dim).to(device),
                torch.zeros(1, self.hidden_dim).to(device))
    
    def predict(self, seed_seq, top_k=5, pred_len=256):
        """
            accepts 3 arguments: 
            1. seed_seq: seed string sequence for prediction (prompt)
            2. top_k: top k words to sample prediction from
            3. pred_len: number of words to generate after the seed seq
        """
        
        # set evaluation mode
        self.eval()
        
        # split string into list of words
        seed_seq = seed_seq.split()
        
        # get seed sequence length
        seed_len = len(seed_seq)
        
        # create output sequence
        out_seq = np.empty(seed_len+pred_len)
        
        # append input seq to output seq
        out_seq[:seed_len] = np.array([word2int[word] for word in seed_seq])
 
        # init hidden, cell states for generation
        hc = self.init_hidden_generator()
        hc_1, hc_2 = hc, hc
        
        # feed seed string into lstm
        # get the hidden state set up
        for word in seed_seq[:-1]:
            
            # encode starting word to one-hot encoding
            word = to_categorical(word2int[word], num_classes=self.vocab_size)
            # add batch dimension
            word = torch.from_numpy(word).unsqueeze(0)
            # if using gpu
            word = word.to(device) 
            
            # layer 1 lstm
            hc_1 = self.lstm_1(word, hc_1)
            h_1, c_1 = hc_1
            
            # layer 2 lstm
            hc_2 = self.lstm_2(h_1, hc_2)
            h_2, c_2 = hc_2
        
        word = seed_seq[-1]
        
        # encode starting word to one-hot encoding
        word = to_categorical(word2int[word], num_classes=self.vocab_size)

        # add batch dimension
        word = torch.from_numpy(word).unsqueeze(0)
        # if using gpu
        word = word.to(device) 

        # forward pass
        for t in range(pred_len):
            
            # layer 1 lstm
            print(word)
            hc_1 = self.lstm_1(word, hc_1)
            h_1, c_1 = hc_1
            
            # layer 2 lstm
            hc_2 = self.lstm_2(h_1, hc_2)
            h_2, c_2 = hc_2
            
            # fully connected layer without dropout (no need)
            output = self.fc(h_2)
            
            # software to get probabilities of output options
            output = F.softmax(output, dim=1)
            
            # get top k words and corresponding probabilities
            p, top_word = output.topk(top_k)
            # if using gpu           
            p = p.cpu()
            
            # sample from top k words to get next word
            p = p.detach().squeeze().numpy()
            top_word = torch.squeeze(top_word)
            
            word = np.random.choice(top_word, p = p/p.sum())
            
            # add word to sequence
            out_seq[seed_len+t] = word
            
            # encode predicted word to one-hot encoding for next step
            word = to_categorical(word, num_classes=self.vocab_size)
            word = torch.from_numpy(word).unsqueeze(0)
            # if using gpu
            word = word.to(device)
            
        return out_seq

Using TensorFlow backend.


##  Loac saved torch model

In [23]:
# notes for 25 keys piano
octave = "C,C#,D,D#,E,F,F#,G,G#,A,A#,B,"
notes = (octave*2).split(",")
notes[-1] = "C"

# note index for 25 key piano
noteIdxs = [("00"+str(i))[-3:]  for i in range(25,50)]

noteDict = list(zip(notes, noteIdxs))

In [24]:
# load model
model_path = './models/lstm20_ed'
model = torch.load(model_path, map_location='cpu')

In [55]:
print(model)
print(model.fc.weight)

WordLSTM(
  (lstm_1): LSTMCell(149, 512)
  (lstm_2): LSTMCell(512, 512)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=149, bias=True)
)
Parameter containing:
tensor([[ 0.2340,  0.2038,  0.4043,  ..., -0.0977, -0.2485,  0.0443],
        [ 0.2057,  0.2226,  0.4094,  ..., -0.1921, -0.1278,  0.0423],
        [ 0.1339,  0.1674,  0.0608,  ..., -0.2454,  0.0240,  0.0481],
        ...,
        [ 0.4821,  0.1234, -0.0768,  ...,  0.1921,  0.0132, -0.1159],
        [ 0.7550, -0.0492, -0.1098,  ...,  0.1216, -0.0119, -0.2602],
        [ 0.2744, -0.2951,  0.0047,  ...,  0.0683, -0.1170, -0.3829]],
       requires_grad=True)


## Load torch model and convert to ONNX

In [26]:
def intToText(int_seq):
    text_seq = []
    for i in int_seq:
        text_seq.append("p"+str(i))
        text_seq.append("wait6")
    return " ".join(text_seq)

In [27]:
device = torch.device('cpu')
dummy_input = intToText([33, 32, 38, 39, 40, 45])
# print(dummy_input)
# dummy_input = torch.zeros((512,128,149))

# hc = model.init_hidden()
# print(dummy_input[0][0].shape)
# for t in range(512):
#     print(dummy_input[t], (128, 512))
#     hc_1 = self.lstm_1(x[t], hc_1)
model.eval()
# model.forward(dummy_input, hc)

model.predict(dummy_input)

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 1., 0., 0., 0.]])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0.]])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0

array([ 89., 145.,  88., 145.,  94., 145.,  95., 145.,  97., 145., 102.,
       145.,  65.,  75., 144.,   3., 124.,  70., 135.,  35., 124.,  95.,
       135.,   8.,  33., 124.,  78., 145.,  78., 144.,  16., 124.,  75.,
        91., 144.,  13., 124.,  78., 135.,  29., 142.,  16., 124.,  70.,
        88.,  97., 144.,   8., 124.,  78., 135.,  26.,  35., 142.,  16.,
       124.,  65.,  70.,  88.,  97., 144.,   3., 124.,  70., 135.,   8.,
        26.,  35., 143., 123.,  67.,  70.,  76.,  83.,  87.,  93., 141.,
       146.,  61.,  61.,   8.,  11.,  14.,  21.,  25.,  27.,  31., 143.,
        68.,  81.,  86.,  91., 145.,  68., 135.,  19.,  24.,  29., 143.,
        68., 145.,  68.,  68., 145.,  68.,  68., 144.,   6., 124.,  68.,
        73.,  87., 145.,  68., 145.,  68., 135.,  25., 142.,   6., 124.,
        65.,  86.,  91., 145.,  68., 145.,  68., 145.,  68., 144.,  16.,
        26., 124.,  70.,  87.,  91., 145.,  73., 145.,  73., 135.,  25.,
        29., 143.,  73.,  87.,  92., 145.,  73., 14

In [28]:
ONNX_PATH = './models/lstm20_ed.onnx'
torch.onnx.export(model, (dummy_input, hc), ONNX_PATH, input_names=['input'], output_names=['output'])



## Convert to Tensorflow

In [None]:
from onnx_tf.backend import prepare
import os
# Load ONNX model and convert to TensorFlow format
model_onnx = onnx.load(ONNX_PATH)

tf_rep = prepare(model_onnx)

# print(tf_rep)
# # Export model as .pb file
# os.mkdir("tfmodel")
# TF_PATH = './tfmodel/saved_model.pb'

# tf_rep.export_graph(TF_PATH)

In [4]:

def load_pb(path_to_pb):
    with tf.gfile.GFile(path_to_pb, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def, name='')
        return graph

In [5]:

tf_graph = load_pb('./models/saved_model.pb')
sess = tf.Session(graph=tf_graph)


In [6]:
# Show tensor names in graph
for op in tf_graph.get_operations():
  print(op.values())


(<tf.Tensor 'fc.bias:0' shape=(149,) dtype=float32>,)
(<tf.Tensor 'fc.weight:0' shape=(149, 512) dtype=float32>,)
(<tf.Tensor 'lstm_1.bias_hh:0' shape=(2048,) dtype=float32>,)
(<tf.Tensor 'lstm_1.bias_ih:0' shape=(2048,) dtype=float32>,)
(<tf.Tensor 'lstm_1.weight_hh:0' shape=(2048, 512) dtype=float32>,)
(<tf.Tensor 'lstm_1.weight_ih:0' shape=(2048, 149) dtype=float32>,)
(<tf.Tensor 'lstm_2.bias_hh:0' shape=(2048,) dtype=float32>,)
(<tf.Tensor 'lstm_2.bias_ih:0' shape=(2048,) dtype=float32>,)
(<tf.Tensor 'lstm_2.weight_hh:0' shape=(2048, 512) dtype=float32>,)
(<tf.Tensor 'lstm_2.weight_ih:0' shape=(2048, 512) dtype=float32>,)
(<tf.Tensor 'input:0' shape=(512, 128, 149) dtype=float32>,)
(<tf.Tensor '1:0' shape=(128, 512) dtype=float32>,)
(<tf.Tensor '2:0' shape=(128, 512) dtype=float32>,)
(<tf.Tensor 'output:0' shape=(65536, 149) dtype=float32>,)


In [17]:
output_tensor = tf_graph.get_tensor_by_name('output:0')
input_tensor = tf_graph.get_tensor_by_name('input:0')



dummy_input = torch.rand((512,128,149))
# dummy_input[0][0][-4] = 123234
print(dummy_input)
output = sess.run(output_tensor, feed_dict={input_tensor: dummy_input})
print(output)
# print(sum(output))

print(sess.run(tf_graph.get_tensor_by_name('output:0'), feed_dict={input_tensor: dummy_input}))


tensor([[[0.1803, 0.9417, 0.1169,  ..., 0.7830, 0.1159, 0.3165],
         [0.0518, 0.3072, 0.7975,  ..., 0.7662, 0.8853, 0.5382],
         [0.0884, 0.3464, 0.7999,  ..., 0.7851, 0.9288, 0.7985],
         ...,
         [0.1247, 0.6785, 0.3321,  ..., 0.1549, 0.3650, 0.3078],
         [0.6177, 0.5930, 0.2051,  ..., 0.8262, 0.5611, 0.5394],
         [0.1312, 0.5529, 0.6200,  ..., 0.0025, 0.3866, 0.8852]],

        [[0.6836, 0.8976, 0.8076,  ..., 0.6451, 0.5228, 0.5770],
         [0.5929, 0.0727, 0.5378,  ..., 0.2690, 0.2381, 0.1330],
         [0.5169, 0.4479, 0.7870,  ..., 0.6609, 0.0956, 0.3888],
         ...,
         [0.2028, 0.4218, 0.0767,  ..., 0.5395, 0.5953, 0.9239],
         [0.6088, 0.3417, 0.3688,  ..., 0.3193, 0.5815, 0.2034],
         [0.8113, 0.2777, 0.8962,  ..., 0.1266, 0.6172, 0.2057]],

        [[0.4994, 0.2521, 0.6859,  ..., 0.4312, 0.6159, 0.0576],
         [0.0351, 0.0646, 0.1752,  ..., 0.6360, 0.7052, 0.7990],
         [0.2292, 0.9306, 0.6075,  ..., 0.1223, 0.1423, 0.

In [None]:
# tensorflowjs_converter --input_format=tf_frozen_model --output_node_names='output:0' ./models/saved_model.pb ./models/web/