In [1]:
from argparse import ArgumentParser, ArgumentTypeError
import numpy as np
import tensorflow as tf

from config import Config
from interactive_predict import InteractivePredictor
from model import Model
import reader

MODELS_PATH = './models/java-small-model'

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def str2bool(v):
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

parser = ArgumentParser()
parser.add_argument("-d", "--data", dest="data_path",
                    help="path to preprocessed dataset", required=False)
parser.add_argument("-te", "--test", dest="test_path",
                    help="path to test file", metavar="FILE", required=False)

parser.add_argument("-s", "--save_prefix", dest="save_path_prefix",
                    help="path to save file", metavar="FILE", required=False)
parser.add_argument("-l", "--load", dest="load_path",
                    help="path to saved file", metavar="FILE", required=False)
parser.add_argument('--release', action='store_true',
                    help='if specified and loading a trained model, release the loaded model for a smaller model'
                         'size.')

parser.add_argument('--predict', action='store_true')
parser.add_argument('--debug', action='store_true')
parser.add_argument('--seed', type=int, default=239)

parser.add_argument('--lasso', action='store', default=0, type=float, help='L1-regularisation on embeddings layer coefficient')
parser.add_argument('--grouplasso', action='store', default=0, type=float, help='Group Lasso regularisation on embeddings layer coefficient')
parser.add_argument('--threshold', action='store', default=-1, type=float, help='Threshold applying for reseting values of tensors to zeros')

parser.add_argument('--subtoken_words', action='store', default=190000, type=int, help='SUBTOKEN_VOCAB words max number restriction')
parser.add_argument('--nodes_words', action='store', default=-1, type=int, help='NODES_VOCAB words max number restriction')
parser.add_argument('--sparse_nodes', type=str2bool, default=True,  help="Flag responcing for NODES_VOCAB embeddings sparsification")
parser.add_argument('--sparse_subtoken', type=str2bool, default=True,  help="Flag responcing for SUBTOKEN_VOCAB embeddings sparsification")


_StoreAction(option_strings=['--sparse_subtoken'], dest='sparse_subtoken', nargs=None, const=None, default=True, type=<function str2bool at 0x7fe3a7742440>, choices=None, help='Flag responcing for SUBTOKEN_VOCAB embeddings sparsification', metavar=None)

In [3]:
epoch = 55
folder = '2020_05_02__1e-05_0.0001_0.01__bedfb2fa'

dataset_name = 'java-small'
data_dir = 'data/java-small'
data = f'{data_dir}/{dataset_name}'

test_data= f'{data_dir}/{dataset_name}.val.c2s'

args = parser.parse_args(['--load', f'{MODELS_PATH}/{folder}/model_iter{epoch}',
                          '--data', f'{data}',
                          '--test', f'{test_data}'])

In [4]:
if args.nodes_words == -1:
    args.nodes_words = None

np.random.seed(args.seed)
tf.set_random_seed(args.seed)

config = Config.get_default_config(args)

model = Model(config)
print('Created model')


model.queue_thread = reader.Reader(subtoken_to_index=model.subtoken_to_index,
                                  node_to_index=model.node_to_index,
                                  target_to_index=model.target_to_index,
                                  config=model.config)

optimizer, train_loss, print_node = model.build_training_graph(model.queue_thread.get_output())
model.initialize_session_variables(model.sess)
print('Initalized variables')
if model.config.LOAD_PATH:
    model.load_model(model.sess)

    vocab = [v for v in tf.global_variables() if v.name == "model/SUBTOKENS_VOCAB:0"][0]
    vocab_np = vocab.eval(session=model.sess)
    
    vocab_rows = np.sum(np.abs(vocab_np), axis=1)
    print(vocab_rows[vocab_rows == 0].size)
    print(vocab_rows)
    
model.print_hyperparams()

Loading dictionaries from: ./models/java-small-model/2020_05_02__1e-05_0.0001_0.01__bedfb2fa/model_iter55
Done loading dictionaries
Created model

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.print instead of tf.Print. Note that tf.print returns a no-output operator that directly prints the output. Outside of defuns or eager mode, this operator will not be executed unless it is directly specified in session.run or used as a control dependency for other operators. This is only a concern in graph mode. Below is an example of how to ensure tf.print executes in graph mode:
```python
    sess = tf.Session(