In [1]:
import os
import argparse
import random
import numpy as np
import tensorflow as tf
tf.compat.v1.enable_eager_execution()

# set gpu
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

# set random seed
seed = 7152020
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
tf.compat.v1.set_random_seed(seed)
os.environ['TF_DETERMINISTIC_OPS'] = "1"

from beer import get_beer_datasets, get_beer_annotation
from utils import get_pretained_glove
from beer_model import InvRNNwithSpanPred
from train import train_beer
from evaluate import test_beer, validate_beer

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
######################
# parameters
######################
class Placeholder:
    pass

args = Placeholder()

# dataset parameters
args.aspect = 1
args.base_dir = "./data/invariant_beer_release"
args.max_seq_length = 300
args.word_thres = 2
args.batch_size = 500

# pretrained embeddings
args.glove_path = "./embeddings/glove.6B.100d.txt"

# model parameters
args.embedding_dim = 100
args.rnn_dim = 256
args.num_classes = 2
args.num_envs = 2
args.rationale_length = 20

# learning parameters
args.num_epochs = 50
args.diff_lambda = 2.
args.lr = 1e-3
args.num_run = 5

print("\nParameters:")
for attr, value in sorted(args.__dict__.items()):
    print("\t{}={}".format(attr.upper(), value))


Parameters:
	ASPECT=1
	BASE_DIR=./data/invariant_beer_release
	BATCH_SIZE=500
	DIFF_LAMBDA=2.0
	EMBEDDING_DIM=100
	GLOVE_PATH=./embeddings/glove.6B.100d.txt
	LR=0.001
	MAX_SEQ_LENGTH=300
	NUM_CLASSES=2
	NUM_ENVS=2
	NUM_EPOCHS=50
	NUM_RUN=5
	RATIONALE_LENGTH=20
	RNN_DIM=256
	WORD_THRES=2


In [3]:
def run_model(args):
    """
    Running the model. 
    """
    ######################
    # load dataset
    ######################
    data_dir = os.path.join(args.base_dir, "aspect_%d" % args.aspect)

    args.vocab, D_tr_, D_dev = get_beer_datasets(data_dir, args.max_seq_length, args.word_thres)
    D_ann = get_beer_annotation(args.base_dir, args.aspect, args.max_seq_length, args.vocab.word2idx)

    D_tr = D_tr_.shuffle(100000).batch(args.batch_size, drop_remainder=False)
    D_dev = D_dev.batch(args.batch_size, drop_remainder=False)
    D_ann = D_ann.batch(args.batch_size, drop_remainder=False)   
    
    ######################
    # Get pretrained embedding
    ######################
    args.pretrained_embedding = get_pretained_glove(args.vocab.word2idx, args.glove_path)
    
    ######################
    # build the model
    ######################
    inv_rnn = InvRNNwithSpanPred(args)
    
    ######################
    # optimizer
    ######################
    gen_opt = tf.keras.optimizers.Adam(learning_rate=args.lr)
    env_inv_opt = tf.keras.optimizers.Adam(learning_rate=args.lr)
    env_enable_opt = tf.keras.optimizers.Adam(learning_rate=args.lr)

    opts = [gen_opt, env_inv_opt, env_enable_opt]

    global_step = 0
    
    ######################
    # learning
    ######################
    dev_results = []
    ann_results = []

    for epoch in range(args.num_epochs):
        # reshuffle the dataset
        D_tr = D_tr_.shuffle(100000).batch(args.batch_size, drop_remainder=False)

        global_step = train_beer(D_tr, inv_rnn, opts, global_step, args)

        # dev
        dev_result = test_beer(D_dev, inv_rnn)
        dev_results.append(list(dev_result))

        # ann
        ann_result = validate_beer(D_ann, inv_rnn)
        ann_results.append(list(ann_result))

    np_dev_results = np.array(dev_results)
    np_ann_results = np.array(ann_results)

    # check the best dev result
    best_dev_epoch = np.argmax(np_dev_results, axis=0)[0]
    best_dev_result = np_dev_results[best_dev_epoch, :]
    best_cors_ann_result = np_ann_results[best_dev_epoch, :]
    

    return best_dev_result, best_cors_ann_result

In [4]:
dev_results = []
cors_ann_results = []

for run in range(args.num_run):
    print("=========================")
    print("run:", run)
    print("=========================")
    
    dev_result, ann_result = run_model(args)
    
    dev_results.append(list(dev_result))
    cors_ann_results.append(list(ann_result))
    
np_dev_results = np.array(dev_results)
np_cors_ann_results = np.array(cors_ann_results)

best_dev_run = np.argmax(np_dev_results, axis=0)[0]
best_dev_result = np_dev_results[best_dev_run, :]
best_ann_result = np_cors_ann_results[best_dev_run, :]

print("{:s} {:s}{:.4f}, {:s}{:.4f}, {:s}{:.4f}, {:s}{:.4f}, {:s}{:.4f}, {:s}{:.4f}.".format(
    "----> [Final result]",
    "dev inv acc: ", best_dev_result[0], 
    "dev enb acc: ", best_dev_result[1],
    "The best annotation result: sparsity: ", best_ann_result[0], 
    "precision: ", best_ann_result[1],
    "recall: ", best_ann_result[2],
    "f1: ", best_ann_result[3]), flush=True)    
    

run: 0
Training set: 
Number of examples 10000
Dev set: 
Number of examples 2000
Annotated rationales: 877
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
13542 out of 16023 words are covered by the pre-trained embedding.
Initialize the embedding from a pre-trained matrix.
Initialize the embedding from a pre-trained matrix.
Initialize the embedding from a pre-trained matrix.

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

run: 1
Training set: 
Number of examples 10000
Dev set: 
Number of examples 2000
Annotated rationales: 877
13542 out of 16023 words are covered by the pre-trained embedding.
Initialize the embedding from a pre-t

In [5]:
np_dev_results

array([[0.771 , 0.7495],
       [0.8045, 0.804 ],
       [0.8225, 0.811 ],
       [0.796 , 0.792 ],
       [0.81  , 0.8055]], dtype=float32)

In [6]:
np_cors_ann_results

array([[0.1487807 , 0.12457973, 0.11961722, 0.12204805],
       [0.15351497, 0.2655068 , 0.2630426 , 0.26426893],
       [0.1561054 , 0.4962234 , 0.49991354, 0.49806166],
       [0.15406878, 0.20037106, 0.19922753, 0.19979765],
       [0.15633765, 0.4340647 , 0.43794316, 0.4359953 ]], dtype=float32)