In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [2]:
S3_PATH_POS = {
    'bert': {
        'model': 'v34/pos/bert-base-pos.pb',
        'vocab': 'tokenizer/sp10m.cased.bert.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.bert.model',
        'setting': 'bert-bahasa/dictionary-pos.json',
    },
    'tiny-bert': {
        'model': 'v34/pos/tiny-bert-pos.pb',
        'vocab': 'tokenizer/sp10m.cased.bert.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.bert.model',
        'setting': 'bert-bahasa/dictionary-pos.json',
    },
    'albert': {
        'model': 'v34/pos/albert-base-pos.pb',
        'vocab': 'tokenizer/sp10m.cased.v10.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.v10.model',
        'setting': 'bert-bahasa/dictionary-pos.json',
    },
    'tiny-albert': {
        'model': 'v34/pos/albert-tiny-pos.pb',
        'vocab': 'tokenizer/sp10m.cased.v10.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.v10.model',
        'setting': 'bert-bahasa/dictionary-pos.json',
    },
    'xlnet': {
        'model': 'v34/pos/xlnet-base-pos.pb',
        'vocab': 'tokenizer/sp10m.cased.v9.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.v9.model',
        'setting': 'bert-bahasa/dictionary-pos.json',
    },
    'alxlnet': {
        'model': 'v34/pos/alxlnet-base-pos.pb',
        'vocab': 'tokenizer/sp10m.cased.v9.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.v9.model',
        'setting': 'bert-bahasa/dictionary-pos.json',
    },
}

In [3]:
for k in S3_PATH_POS.keys():
    if k != 'multinomial':
        print(k)
        os.system(f"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_POS[k]['model']}")

bert
tiny-bert
albert
tiny-albert
xlnet
alxlnet


In [4]:
import tensorflow as tf
from tensorflow.tools.graph_transforms import TransformGraph
from glob import glob
tf.compat.v1.set_random_seed(0)

In [5]:
pbs = glob('*.pb')
pbs

['bert-base-pos.pb',
 'xlnet-base-pos.pb',
 'alxlnet-base-pos.pb',
 'albert-base-pos.pb',
 'tiny-bert-pos.pb',
 'albert-tiny-pos.pb']

In [6]:
transforms = ['add_default_attributes',
             'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',
             'fold_batch_norms',
             'fold_old_batch_norms',
             'quantize_weights(fallback_min=-10, fallback_max=10)',
             'strip_unused_nodes',
             'sort_by_execution_order']

for pb in pbs:
    input_graph_def = tf.compat.v1.GraphDef()
    with tf.compat.v1.gfile.FastGFile(pb, 'rb') as f:
        input_graph_def.ParseFromString(f.read())
    
    if 'bert' in pb:
        inputs = ['Placeholder', 'Placeholder_1']
        outputs = ['dense/BiasAdd']
    if 'xlnet'in pb:
        inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']
        outputs = ['transpose_3']
        
    print(pb, inputs)
    
    transformed_graph_def = TransformGraph(input_graph_def, 
                                           inputs,
                                           ['logits'] + outputs, transforms)
    
    with tf.compat.v1.gfile.GFile(f'{pb}.quantized', 'wb') as f:
        f.write(transformed_graph_def.SerializeToString())

Instructions for updating:
Use tf.compat.v1.gfile.GFile.
bert-base-pos.pb ['Placeholder', 'Placeholder_1']
xlnet-base-pos.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']
alxlnet-base-pos.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']
albert-base-pos.pb ['Placeholder', 'Placeholder_1']
tiny-bert-pos.pb ['Placeholder', 'Placeholder_1']
albert-tiny-pos.pb ['Placeholder', 'Placeholder_1']


In [7]:
def load_graph(frozen_graph_filename, **kwargs):
    with tf.compat.v1.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.compat.v1.GraphDef()
        graph_def.ParseFromString(f.read())

    # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091
    # to fix import T5
    for node in graph_def.node:
        if node.op == 'RefSwitch':
            node.op = 'Switch'
            for index in xrange(len(node.input)):
                if 'moving_' in node.input[index]:
                    node.input[index] = node.input[index] + '/read'
        elif node.op == 'AssignSub':
            node.op = 'Sub'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
        elif node.op == 'AssignAdd':
            node.op = 'Add'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
        elif node.op == 'Assign':
            node.op = 'Identity'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
            if 'validate_shape' in node.attr:
                del node.attr['validate_shape']
            if len(node.input) == 2:
                node.input[0] = node.input[1]
                del node.input[1]

    with tf.compat.v1.Graph().as_default() as graph:
        tf.compat.v1.import_graph_def(graph_def)
    return graph

In [8]:
g = load_graph('xlnet-base-pos.pb.quantized')
x = g.get_tensor_by_name('import/Placeholder:0')
x_len = g.get_tensor_by_name('import/Placeholder_1:0')
x_len2 = g.get_tensor_by_name('import/Placeholder_2:0')
logits = g.get_tensor_by_name('import/logits:0')

In [9]:
# x, x_len, logits

In [10]:
test_sess = tf.compat.v1.InteractiveSession(graph = g)

In [11]:
%%time
test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]],
                                  x_len2: [[1,1,1,1,1]]})

CPU times: user 2.62 s, sys: 318 ms, total: 2.93 s
Wall time: 2.45 s


array([[5, 1, 1, 1, 5]], dtype=int32)

In [12]:
# %%time
# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})

In [13]:
quantized = glob('*.pb.quantized')
quantized

['alxlnet-base-pos.pb.quantized',
 'xlnet-base-pos.pb.quantized',
 'bert-base-pos.pb.quantized',
 'tiny-bert-pos.pb.quantized',
 'albert-tiny-pos.pb.quantized',
 'albert-base-pos.pb.quantized']

In [16]:
!rm *.pb*

In [None]:
# converter = @@#lite.TFLiteConverter.from_frozen_graph(
#     graph_def_file='test.pb',
#     input_arrays=['Placeholder', 'Placeholder_1'],
#     input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},
#     output_arrays=['logits'],
# )
# # converter.allow_custom_ops=True

In [None]:
# converter.target_spec.supported_ops = [tf.compat.v1.lite.OpsSet.TFLITE_BUILTINS, tf.compat.v1.lite.OpsSet.SELECT_TF_OPS]
# converter.target_spec.supported_types = [tf.compat.v1.float16]
# converter.optimizations = [tf.compat.v1.lite.Optimize.DEFAULT]
# converter.experimental_new_converter = True
# tflite_model = converter.convert()

In [None]:
# converter.target_spec.supported_ops = [tf.compat.v1.lite.OpsSet.TFLITE_BUILTINS, 
#                                        tf.compat.v1.lite.OpsSet.SELECT_TF_OPS]
# converter.target_spec.supported_types = [tf.compat.v1.float16]
# converter.optimizations = [tf.compat.v1.lite.Optimize.DEFAULT]
# tflite_model = converter.convert()

# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:
#     f.write(tflite_model)

In [None]:
# converter.target_spec.supported_ops = [tf.compat.v1.lite.OpsSet.TFLITE_BUILTINS, 
#                                        tf.compat.v1.lite.OpsSet.SELECT_TF_OPS]
# converter.optimizations = [tf.compat.v1.lite.Optimize.OPTIMIZE_FOR_SIZE]
# tflite_model = converter.convert()

# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:
#     f.write(tflite_model)

In [None]:
# interpreter = tf.compat.v1.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')
# interpreter.allocate_tensors()