In [1]:
import os

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'mesolitica-storage.json'

In [2]:
from google.cloud import storage
client = storage.Client()
bucket = client.bucket('mesolitica-general')

In [3]:
# !rm -rf t5-base-summary

In [4]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [5]:
import tensorflow as tf
import tensorflow_datasets as tfds
import t5

In [6]:
model = t5.models.MtfModel(
    model_dir='gs://mesolitica-general/t5-small-generator-v1',
    tpu=None,
    tpu_topology=None,
    model_parallelism=2,
    batch_size=1,
    sequence_length={"inputs": 1024, "targets": 1024},
    learning_rate_schedule=0.003,
    save_checkpoints_steps=5000,
    keep_checkpoint_max=3,
    iterations_per_loop=100,
    mesh_shape="model:1,batch:1", 
    mesh_devices=["gpu:0"]
)

In [7]:
# string = '1. Dr M perlu dikekalkan sebagai perdana menteri. 2. Muhyiddin perlulah menolong Dr M. 3. rakyat perlu menolong Muhyiddin.'

In [8]:
# string = '1. kerajaan perlu tolong gotong royong. 2. masyarakat juga perlu menolong kerajaan. 3. ibu bapa perlu memastikan anak menolong kerajaan'

In [9]:
# string = '1. Neelofa tetap dengan keputusan untuk berkahwin akhir tahun ini. 2. Long Tiger sanggup membantu Neelofa. 3. Tiba-tiba Long Tiger bergaduh dengan Husein.'

In [10]:
# string = '1. menambahkan ilmu tentang tempat yang dilawati. 2. memanfaatkan masa yang terluang ke tempat yang berfaedah. 3. menambahkan semangat kecintaan negara. 4. memberikan hiburan - sambil melawat sambil berhibur dan berseronok'

In [11]:
# string = '1. 1MBD menolong ekonomi negara. 2. Najib Razak menggunakan duit 1MBD sebaiknya. 3. Tiada bukti 1MBD mengambil duit rakyat.'

In [12]:
# print(string)
# with tf.compat.v1.io.gfile.GFile('test.txt', "w") as f:
#     f.write("karangan: %s\n" % string)

In [13]:
# model.predict(
#     input_file='test.txt',
#     output_file='out.txt',
#     temperature=0.7,
#     beam_size=1,
#     sentencepiece_model_path='sp10m.cased.t5.model'
# )

In [None]:
!rm -rf output/*

In [None]:
import gin

from t5.data import sentencepiece_vocabulary

DEFAULT_SPM_PATH = 'sp10m.cased.t5.model'
DEFAULT_EXTRA_IDS = 100
model_dir = 'gs://mesolitica-general/t5-small-generator-v1'

def get_default_vocabulary():
    return sentencepiece_vocabulary.SentencePieceVocabulary(
      DEFAULT_SPM_PATH, DEFAULT_EXTRA_IDS)

with gin.unlock_config():
    gin.parse_config_file(t5.models.mtf_model._operative_config_path(model_dir))
    gin.bind_parameter("Bitransformer.decode.beam_size", 1)
    gin.bind_parameter("Bitransformer.decode.temperature", 0.7)
    gin.bind_parameter("utils.get_variable_dtype.slice_dtype", "float32")
    gin.bind_parameter(
        "utils.get_variable_dtype.activation_dtype", "float32")
    
vocabulary = t5.data.SentencePieceVocabulary(DEFAULT_SPM_PATH)
estimator = model.estimator(vocabulary, disable_tpu=True)

In [None]:
import os


checkpoint_step = t5.models.mtf_model._get_latest_checkpoint_from_dir(model_dir)
model_ckpt = "model.ckpt-" + str(checkpoint_step)
checkpoint_path = os.path.join(model_dir, model_ckpt)
checkpoint_step, model_ckpt, checkpoint_path

In [None]:
from mesh_tensorflow.transformer import dataset as transformer_dataset

def serving_input_fn():
    inputs = @@#placeholder(
            dtype=tf.compat.v1.string,
            shape=[None],
            name="inputs")

    batch_size = tf.compat.v1.shape(inputs)[0]
    padded_inputs = tf.compat.v1.pad(inputs, [(0, tf.compat.v1.mod(-tf.compat.v1.size(inputs), batch_size))])
    dataset = tf.compat.v1.data.Dataset.from_tensor_slices(padded_inputs)
    dataset = dataset.map(lambda x: {"inputs": x})
    dataset = transformer_dataset.encode_all_features(dataset, vocabulary)
    dataset = transformer_dataset.pack_or_pad(
        dataset=dataset,
        length=model._sequence_length,
        pack=False,
        feature_keys=["inputs"]
    )
    dataset = dataset.batch(tf.compat.v1.cast(batch_size, tf.compat.v1.int64))
    features = tf.compat.v1.data.experimental.get_single_element(dataset)
    return tf.compat.v1.estimator.export.ServingInputReceiver(
        features=features, receiver_tensors=inputs)

out = estimator.export_saved_model('output', serving_input_fn, checkpoint_path=checkpoint_path)

In [None]:
config = tf.compat.v1.ConfigProto()
config.allow_soft_placement = True
sess = tf.compat.v1.InteractiveSession(config = config)
meta_graph_def = tf.compat.v1.saved_model.loader.load(
        sess,
        [tf.compat.v1.saved_model.tag_constants.SERVING],
        out)

In [None]:
saver = tf.compat.v1.train.Saver(tf.compat.v1.trainable_variables())
saver.save(sess, 'out/model.ckpt')

In [None]:
strings = [
    n.name
    for n in tf.compat.v1.get_default_graph().as_graph_def().node
    if ('encoder' in n.op
    or 'decoder' in n.name
    or 'shared' in n.name
    or 'inputs' in n.name
    or 'output' in n.name
    or 'SentenceTokenizer' in n.name
    or 'self/Softmax' in n.name)
    and 'adam' not in n.name
    and 'Assign' not in n.name
]

In [None]:
def freeze_graph(model_dir, output_node_names):

    if not tf.compat.v1.io.gfile.exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.compat.v1.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.compat.v1.Session(graph = tf.compat.v1.Graph()) as sess:
        saver = tf.compat.v1.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
            sess,
            tf.compat.v1.get_default_graph().as_graph_def(),
            output_node_names,
        )
        with tf.compat.v1.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [None]:
freeze_graph('out', strings)

In [None]:
def load_graph(frozen_graph_filename):
    with tf.compat.v1.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.compat.v1.GraphDef()
        graph_def.ParseFromString(f.read())
        
    for node in graph_def.node:
        if node.op == 'RefSwitch':
          node.op = 'Switch'
          for index in xrange(len(node.input)):
            if 'moving_' in node.input[index]:
              node.input[index] = node.input[index] + '/read'
        elif node.op == 'AssignSub':
          node.op = 'Sub'
          if 'use_locking' in node.attr: del node.attr['use_locking']
        elif node.op == 'AssignAdd':
          node.op = 'Add'
          if 'use_locking' in node.attr: del node.attr['use_locking']
        elif node.op == 'Assign':
          node.op = 'Identity'
          if 'use_locking' in node.attr: del node.attr['use_locking']
          if 'validate_shape' in node.attr: del node.attr['validate_shape']
          if len(node.input) == 2:
            node.input[0] = node.input[1]
            del node.input[1]
            
    with tf.compat.v1.Graph().as_default() as graph:
        tf.compat.v1.import_graph_def(graph_def)
    return graph

In [None]:
g = load_graph('out/frozen_model.pb')
i = g.get_tensor_by_name('import/inputs:0')
o = g.get_tensor_by_name('import/SentenceTokenizer_1/SentenceTokenizer/SentencepieceDetokenizeOp:0')
i, o

In [None]:
test_sess = tf.compat.v1.InteractiveSession(graph = g)

In [None]:
string = '1. 1MBD menolong ekonomi negara. 2. Najib Razak menggunakan duit 1MBD sebaiknya. 3. Tiada bukti 1MBD mengambil duit rakyat.'

In [None]:
test_sess.run(o, feed_dict = {i: [string]})[0].decode('utf-8')

In [None]:
model.batch_size = 1
saved_model_path = model.export(
    'output',
    checkpoint_step=-1,
    beam_size=1,
    temperature=0.7,
    sentencepiece_model_path='sp10m.cased.t5.model'
)

In [None]:
saved_model_path.decode()

In [None]:
import tensorflow_text  

@@#reset_default_graph()
sess = tf.compat.v1.InteractiveSession()
meta_graph_def = @@#saved_model.load(sess, ["serve"], saved_model_path.decode())
signature_def = meta_graph_def.signature_def["serving_default"]
pred = lambda x: sess.run(
    fetches=signature_def.outputs["outputs"].name, 
    feed_dict={signature_def.inputs["input"].name: x}
)

In [None]:
q = f'karangan: {string}'

pred([q])[0].decode('utf-8')

In [None]:
string = '1. kerajaan perlu tolong gotong royong. 2. masyarakat juga perlu menolong kerajaan. 3. ibu bapa perlu memastikan anak menolong kerajaan'
q = f'karangan: {string}'

pred([q])[0].decode('utf-8')

In [None]:
string = '1. Neelofa tetap dengan keputusan untuk berkahwin akhir tahun ini. 2. Long Tiger sanggup membantu Neelofa. 3. Tiba-tiba Long Tiger bergaduh dengan Husein.'
q = f'ringkasan: {string}'

pred([q])[0].decode('utf-8')

In [None]:
!rm -rf model

In [None]:
os.system(f'mv {saved_model_path.decode()} model')

In [None]:
!tar -czvf sample-generator-t5-small.tar.gz model

In [None]:
import boto3

bucketName = 'huseinhouse-storage'
Key = 'sample-generator-t5-small.tar.gz'
outPutname = "v35/generator/sample-generator-t5-small.tar.gz"

s3 = boto3.client('s3')
s3.upload_file(Key,bucketName,outPutname)

In [None]:
x = ('Kenyataan media yang dibuat oleh kepimpinan parti adalah sah. Tidak ada '
 'persoalan peletakan jawatan Dr Mahathir adalah sah atau tidak. Ia sudah '
 'diputuskan oleh semua pihak termasuk Presiden, Tan Sri Muhyiddin Yassin')

In [None]:
''.join(x)

In [None]:
import dask