# Following instructions from Gong et al to train model 

In [None]:
!pip install tensorflow-gpu==2.4 --user

In [1]:
import param as param
from data_utils import data_helpers, tag_data_helpers
from model.abuse_classifier import AbuseClassifier


In [2]:
#load vocabulary and initial embeddings
vocabulary, pos_vocabulary, init_embed = data_helpers.loadVocabEmb()

pos_vocab_size = len(pos_vocabulary)
vocab_size = len(vocabulary)

x_train, length_train, attention_train, pos_train, pos_length_train, y_train, \
x_dev, length_dev, attention_dev, pos_dev, pos_length_dev, y_dev \
    = data_helpers.loadTrainData()


padded sent: (9232, 100)
feature shape: (9232, 100)
padded pos sentences: (9232, 100)
debug padded_pos_sentences: ['&', 'O', 'V', 'D', 'N', 'N', ',', '&', 'O', 'V']
pos feature shape: (9232, 100)
load train data, input sent size: (9232, 100), input POS size: (9232, 100), label size: (9232, 2)
split into train (7385 examples) and dev sets (1847 examples)


In [2]:
"""
Train abusive language classifier
"""


from sklearn.metrics import roc_curve, precision_recall_curve, auc
import os

import param as param
from data_utils import data_helpers
from model.abuse_classifier import AbuseClassifier

import tensorflow as tf
from tensorflow.compat.v1.app import flags

tf.random.set_seed(111)
tf.compat.v1.disable_eager_execution()

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("Tensorflow version: ",tf.__version__)


Num GPUs Available:  1
Tensorflow version:  2.4.0


In [5]:
# Model Hyperparameters
flags.DEFINE_integer("embedding_dim", 300, "Dimensionality of character embedding (default: 128)")
flags.DEFINE_integer("pos_vocab_size", 26, "Vocab size of POS tags")
flags.DEFINE_integer("pos_embedding_dim", 25, "Dimensionality of pos tag embedding (default: 20)")
flags.DEFINE_float("dropout_keep_prob", 0.99, "Dropout keep probability (default: 0.5)")
flags.DEFINE_float("attention_lambda", 0.2, "Supervised attention lambda (default: 0.05)")
flags.DEFINE_string("attention_loss_type", 'encoded', "loss function of attention")
flags.DEFINE_float("l2_reg_lambda", 0.02, "L2 regularizaion lambda (default: 0.05)")
flags.DEFINE_integer("hidden_size", 300, "Dimensionality of RNN cell (default: 300)")
flags.DEFINE_integer("pos_hidden_size", 25, "Dimensionality of POS-RNN cell")
flags.DEFINE_integer("attention_size", 20, "Dimensionality of attention scheme (default: 50)")
flags.DEFINE_boolean("use_pos_flag", True, "use the sequence of POS tags")
# Training parameters -- evaluate_every should be 100
flags.DEFINE_integer("batch_size", 32, "Batch Size (default: 32)")
flags.DEFINE_integer("num_epochs", 60, "Number of training epochs (default: 200)")
flags.DEFINE_integer("evaluate_every", 50, "Evaluate model on dev set after this many steps (default: 100)")
flags.DEFINE_integer("checkpoint_every", 500000, "Save model after this many steps (default: 100)")
# flags.DEFINE_float("train_ratio", 1.0, "Ratio of training data")
# Misc Parameters
flags.DEFINE_string("checkpoint", '', "model")
flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

#added so it works in command line 
flags.DEFINE_string('f', '', 'kernel')
FLAGS = flags.FLAGS





In [3]:
# -----------------------------  load data  -----------------------------
vocabulary, pos_vocabulary, init_embed = data_helpers.loadVocabEmb()
pos_vocab_size = len(pos_vocabulary)
x_train, length_train, attention_train, pos_train, pos_length_train, y_train, \
x_dev, length_dev, attention_dev, pos_dev, pos_length_dev, y_dev \
    = data_helpers.loadTrainData()






padded sent: (9232, 100)
feature shape: (9232, 100)
padded pos sentences: (9232, 100)
debug padded_pos_sentences: ['&', 'O', 'V', 'D', 'N', 'N', ',', '&', 'O', 'V']
pos feature shape: (9232, 100)
load train data, input sent size: (9232, 100), input POS size: (9232, 100), label size: (9232, 2)
split into train (7385 examples) and dev sets (1847 examples)


In [7]:
# -------------------------- model training --------------------------
with tf.Graph().as_default():
    
    #initialization code required to make tensorflow work on my systemabs

    config = tf.compat.v1.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
    config.gpu_options.allow_growth = True
    
    
    
    sess = tf.compat.v1.Session(config=config)

    with sess.as_default():
        model = AbuseClassifier(
            max_sequence_length=param.max_sent_len,
            num_classes=2,
            pos_vocab_size=pos_vocab_size,
            init_embed=init_embed,
            hidden_size=FLAGS.hidden_size,
            attention_size=FLAGS.attention_size,
            keep_prob=FLAGS.dropout_keep_prob,
            attention_lambda=FLAGS.attention_lambda,
            attention_loss_type=FLAGS.attention_loss_type,
            l2_reg_lambda=FLAGS.l2_reg_lambda,
            use_pos_flag=FLAGS.use_pos_flag)

        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.compat.v1.train.AdamOptimizer()
        grads_and_vars = optimizer.compute_gradients(model.loss, aggregation_method=2)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # save models
        if FLAGS.checkpoint == "":
            out_dir = os.path.abspath(os.path.join(os.path.pardir, "model"))
            print("Writing to {}\n".format(out_dir))
        else:
            out_dir = FLAGS.checkpoint
        if (FLAGS.attention_lambda == 0.0):
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "model_noatt_checkpoints"))
        else:
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "model_att=" + FLAGS.attention_loss_type + "_checkpoints"))
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver =  tf.compat.v1.train.Saver(tf.compat.v1.global_variables())
        # initalize variables
        sess.run(tf.compat.v1.global_variables_initializer())
        #restore models
        try:
            saver.restore(sess, os.path.join(checkpoint_dir, "best_model"))
            print("restoring from trained model...")
        except:
            
            print("something went wrong")
        print("train a new model...")
        print(tf.compat.v1.trainable_variables())


        def train_step(x_batch, pos_batch, y_batch, sequence_length, pos_sequence_length, attention_batch):
            feed_dict = {
                model.input_word: x_batch,
                model.input_pos: pos_batch,
                model.input_y: y_batch,
                model.sequence_length: sequence_length,
                model.input_attention: attention_batch,
                model.dropout_keep_prob: FLAGS.dropout_keep_prob
            }
            _, step, loss = sess.run(
                [train_op, global_step, model.loss],
                feed_dict)
            if (step % FLAGS.evaluate_every == 0):
                print("step {}, loss {:} ".format(step, loss))


        def dev_step(x_dev, pos_dev, y_dev, length_dev, pos_length_dev, writer=None):
            dev_scores = []
            # loss_list = []
            pos = 0
            gap = 50
            while (pos < len(x_dev)):
                x_batch = x_dev[pos:pos + gap]
                pos_batch = pos_dev[pos:pos + gap]
                y_batch = y_dev[pos:pos + gap]
                sequence_length = length_dev[pos:pos + gap]
                pos_sequence_length = pos_length_dev[pos:pos + gap]
                pos += gap
                feed_dict = {
                    model.input_word: x_batch,
                    model.input_pos: pos_batch,
                    model.input_y: y_batch,
                    model.sequence_length: sequence_length,
                    model.dropout_keep_prob: 0.99999
                }
                # step, loss, scores = sess.run(
                #    [global_step, model.loss, model.prob],
                #    feed_dict)
                step, scores = sess.run(
                    [global_step, model.prob],
                    feed_dict)
                dev_scores = dev_scores + list([s[0] for s in scores])
                # loss_list.append(loss)
            gold_scores = [t[0] for t in y_dev]
            pred_scores = dev_scores[:]
            fpr, tpr, _ = roc_curve(gold_scores, pred_scores, pos_label=1)
            roc_auc = auc(fpr, tpr)
            prec, recall, _ = precision_recall_curve(gold_scores, pred_scores, pos_label=1)
            pr_auc = auc(recall, prec)
            # avg_loss = np.mean(loss_list)
            print("dev roc_auc:", roc_auc, "dev pr_auc:", pr_auc)
            return roc_auc, pr_auc  # , avg_loss


        # Generate batches
        batches = data_helpers.batch_iter(
            list(zip(x_train, y_train, pos_train, length_train, pos_length_train, attention_train)), FLAGS.batch_size,
            FLAGS.num_epochs)
        best_auc = 0.10

        for batch in batches:
            x_batch, y_batch, pos_batch, length_batch, pos_length_batch, attention_batch = zip(*batch)
            train_step(x_batch, pos_batch, y_batch, length_batch, pos_length_batch, attention_batch)
            current_step = tf.compat.v1.train.global_step(sess, global_step)
            if (current_step % FLAGS.evaluate_every == 0):
                print("\n Evaluation:")
                roc_auc, pr_auc = dev_step(x_dev, pos_dev, y_dev, length_dev, pos_length_dev)
                # model selection criteria: roc_auc
                # if (best_auc < roc_auc):
                #    best_auc = roc_auc
                if (best_auc < pr_auc):
                    best_auc = pr_auc
                    print("best pr auc:", best_auc)
                    checkpoint_prefix = os.path.join(checkpoint_dir, "best_model")
                    path = saver.save(sess, checkpoint_prefix)
                    print("Saved best model checkpoint.")



Supervised attention with encoded loss.
Writing to C:\Users\apra\Desktop\FALL 2021\CY 7990\model

INFO:tensorflow:Restoring parameters from C:\Users\apra\Desktop\FALL 2021\CY 7990\model\model_att=encoded_checkpoints\best_model
restoring from trained model...
train a new model...
[<tf.Variable 'embedding/W:0' shape=(18161, 300) dtype=float32>, <tf.Variable 'pos_embedding/W_pos:0' shape=(26, 26) dtype=float32>, <tf.Variable 'bi-rnn/bidirectional_rnn/fw/lstm_cell/kernel:0' shape=(626, 1200) dtype=float32>, <tf.Variable 'bi-rnn/bidirectional_rnn/fw/lstm_cell/bias:0' shape=(1200,) dtype=float32>, <tf.Variable 'bi-rnn/bidirectional_rnn/bw/lstm_cell/kernel:0' shape=(626, 1200) dtype=float32>, <tf.Variable 'bi-rnn/bidirectional_rnn/bw/lstm_cell/bias:0' shape=(1200,) dtype=float32>, <tf.Variable 'bi-rnn/Variable:0' shape=(600, 20) dtype=float32>, <tf.Variable 'bi-rnn/Variable_1:0' shape=(20,) dtype=float32>, <tf.Variable 'bi-rnn/Variable_2:0' shape=(20,) dtype=float32>, <tf.Variable 'fc-layer-1

  data = np.array(data)


step 8050, loss 0.18522103130817413 

 Evaluation:
dev roc_auc: 0.8386485369848461 dev pr_auc: 0.701542963756046
best pr auc: 0.701542963756046
Saved best model checkpoint.
step 8100, loss 0.09785278141498566 

 Evaluation:
dev roc_auc: 0.8801980198019802 dev pr_auc: 0.7207924735675048
best pr auc: 0.7207924735675048
Saved best model checkpoint.
step 8150, loss 0.03524046018719673 

 Evaluation:
dev roc_auc: 0.8452420651901256 dev pr_auc: 0.7118638527303766
step 8200, loss -0.010463258251547813 


KeyboardInterrupt: 