In [35]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [36]:
import os
os.chdir('/content/drive/MyDrive/CSC2515 Project Part 2') # change directory to project folder 

In [37]:
os.getcwd()

'/content/drive/MyDrive/CSC2515 Project Part 2'

## import libraries

In [38]:
#!pip uninstall bert-tensorflow
!pip install bert-for-tf2 # bert for tensorflow 2.0 (NOT 1.0 - that's the tf version that causes headaches for tokenizer.py in BERT)



In [39]:
# mount drive , then do:
import pandas as pd
import numpy as np
import tensorflow as tf # version 2.3.0 required
import matplotlib.pyplot as plt

import tensorflow_hub as hub
from datetime import datetime
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
import bert
from bert.tokenization import bert_tokenization
%matplotlib inline

from pathlib import Path # path for .TFRecord files
import collections # for 2 bert preprocessing functions

os.chdir('/content/drive/MyDrive/CSC2515 Project Part 2') # change directory to project folder

In [40]:
os.chdir('bert')
import modeling # manually import modeling.py script on drive - reference: https://github.com/google-research/bert/blob/master/modeling.py

## Define Bert files & import train data

note: we will be using the pre-trained 'BERT-base, uncased' model i.e. `uncased_L-12_H-768_A-12` downloaded at the source code repo README link [here](https://github.com/google-research/bert), with documentation. 

About `uncased_L-12_H-768_A-12` model: 
12-layer, 768-hidden, 12-heads, 110M parameters

In the 'bert' subfolder: 
- A TensorFlow checkpoint (bert_model.ckpt) containing the pre-trained weights (which is actually 3 files).
- A vocab file (vocab.txt) to map subwords in BERT vocab to word indices in comment text.
- A config file (bert_config.json) which specifies the hyperparameters of the model.

In [41]:
os.chdir('..')
os.getcwd()

'/content/drive/MyDrive/CSC2515 Project Part 2'

In [42]:
# import train data (note: won't use test.csv since has no true labels)
trainSet = pd.read_csv('train.csv')
trainSet.head(3)

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0


In [43]:
vocab= 'bert/vocab.txt'
checkpoint = 'bert/bert_model.ckpt'
config_file = 'bert/bert_config.json'

In [44]:
bert_tokenization.validate_case_matches_checkpoint(True,checkpoint) # check

# initialize BERT tokenizer (which expects lower case words only)
tokenizer = bert_tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True)

## Data preprocessing 


### Split train/test data

In [45]:
id = 'id' # user ID
comment = 'comment_text' # corpus of comments
classes = ['toxic','severe_toxic','obscene','threat','insult','identity_hate'] # labels

In [46]:
# define train/test ratio
split = 0.9

In [47]:
# perform 90/10 train/test split
length = trainSet.shape[0]
splitIndex = int(split*length)
train = trainSet[:splitIndex] # train set
test = trainSet[splitIndex:] # test set

In [48]:
# check 
train.head()
# train.shape

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [49]:
# the below 2 functions (docstrings) are from BERT's source code 'run_classifier.py' here (https://github.com/google-research/bert/blob/master/run_classifier.py)
# convert each row in df from `InputExample`instance -> `InputFeatures` instance to feed to BERT
class InputExample(object):
    """A single training/test example (e.g. single row of df as separate elements like `guid`, `text_a` etc."""

    def __init__(self, guid, text_a, text_b=None, labels=None):
        """Constructs a InputExample.

        Args:
            guid: Unique id for the example (= None in our case)
            text_a: string. The untokenized text of the first sequence. 
            text_b: don't need - only used for sequence *pair* (i.e. compare 2 sentences rather than only use 1 for classification) tasks.
            labels: (Optional) [string]. The label of the example. 
        """
        self.guid = guid # ID
        self.text_a = text_a # comment
        self.text_b = text_b # None
        self.labels = labels # list of toxicity labels


class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, input_mask, segment_ids, label_ids, is_real_example=True):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_ids = label_ids,
        self.is_real_example=is_real_example # True, flags example as valid row in df (for BERT real token masking later)

In [50]:
# The following function is adapted from the 'create_examples()' function from the 'run_classifier.py' source code https://github.com/google-research/bert/blob/master/run_classifier.py
#  Because need to accommodate for the multilabel (6 toxicity classes), and 1-hot encoded columns.
def createBertInput(data, labelsExist=True):
    """Creates examples for train set with labels (examples = a format understandable by BERT )
    Return: a list of class InputExample instances, each corresponding to a row in the `df` (as argument)
    """
    input = [] # init. list
    # for each row index (i), and each row (line) of df as a string-delimited list
    for (i, line) in enumerate(data.values):
        # assuming labels available (for train data)
        if labelsExist:
            labels = line[2:] # get toxicity labels for that row 
        else:
            labels = [0,0,0,0,0,0] # if no labels then assume clean comment
        guid = line[0] # extract'id' for that row
        text_a = line[1] # extract 'comment' for that row

        input.append(InputExample(guid=guid, text_a=text_a, labels=labels)) # append as InputExample to list
    return input

In [51]:
# create InputExamples()
trainInput = createBertInput(train) # a list of 'InputExample` class instances, len = train

### Below 2 functions take input embeddings (i.e. examples) and converts them to features usable by BERT. 
**Features = a combination of :** 

a) token embeddings  (tokenized words, with [CLS] and [SEP] tokens added to signal beginning & ending of a phrase)

b) segmentation embeddings (akak *type or segment IDs*)  

c) position embeddings (e.g. where word is in a sentence) 

In [52]:
# Preprocessing params
maxSeqLength = 128 # max number of tokens in a tokenized comment

In [53]:
# taken from source code https://github.com/google-research/bert/blob/master/run_classifier.py
# if don't include might throw errors when padding tokens
class PaddingInputExample(object): 
    """Fake example so the num input examples is a multiple of the batch size.
    When running eval/predict on the GPU, we need to pad the number of examples
    to be a multiple of the batch size, because the GPU requires a fixed batch
    size. The alternative is to drop the last batch, which is bad because it means
    the entire output data won't be generated.
    We use this class instead of `None` because treating `None` as padding
    batches could cause silent errors.
    """

Below 2 functions adapted from source code https://github.com/google-research/bert/blob/master/run_classifier.py 

Changes made: 1) omitted `label_list` argument since already in `example` argument 2) omitted `tokens_b` cases
3) added label_list to append to `inputFeature`

In [54]:
def convert_single_example(ex_index, example, max_seq_length, tokenizer):
    """Convert single `InputExample()` -> single `InputFeature()`."""
    # for non-InputExamples
    if isinstance(example, PaddingInputExample):
        return InputFeatures(
            input_ids=[0] * max_seq_length,
            input_mask=[0] * max_seq_length,
            segment_ids=[0] * max_seq_length,
            label_ids=0,
            is_real_example=False)
    # tokenizes single comment
    tokens_a = tokenizer.tokenize(example.text_a)
    tokens_b = None # remove tokens_b cases since sequence pair classification not apply
 
    # '-2' because [CLS] and [SEP] tokens added to each example comment
    if len(tokens_a) > max_seq_length - 2:
        tokens_a = tokens_a[0:(max_seq_length - 2)] # cut token short to be same length 

    tokens = []
    segment_ids = []
    tokens.append("[CLS]") # marks beginning of token
    segment_ids.append(0) # add '0' to segment_id for [CLS] token
    for token in tokens_a:
        tokens.append(token)
        segment_ids.append(0) # for each token in tokens_a, add '0' to segment_ids (list), so len(segment_ids) == len(tokens)
    tokens.append("[SEP]") # marks end of token
    segment_ids.append(0) # add '0' to segment_id for [SEP] token
    
    # convert from token (list of strings) -> a list of numbers
    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    # input mask: 1 = REAL tokens, 0 = padding tokens
    input_mask = [1] * len(input_ids) 

    # add zero padding until reach max_seq_length.
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)
    # check
    assert len(input_ids) == max_seq_length
    assert len(input_mask) == max_seq_length
    assert len(segment_ids) == max_seq_length
    # append labels for that example
    labels = []
    for tox_label in example.labels:
        labels.append(int(tox_label))

    # create 'InputFeatures()' instance for the single example
    feature = InputFeatures(
        input_ids=input_ids, # a list of numbers , each number represents a word in the tokenized comment
        input_mask=input_mask, # a vector of 1's
        segment_ids=segment_ids, # a vector of 0's
        label_ids=labels, # a vector of labels (length 6, since 6 toxicity classes) for that example e.g. [1, 0, 1, 0, 0, 0]
        is_real_example=True) # flag
    return feature

In [55]:
# calls above function to create single `InputFeature` instance
def file_based_convert_examples_to_features(
        examples, max_seq_length, tokenizer, output_file):
    """Convert a set of `InputExample`s to a TFRecord file."""

    writer = tf.io.TFRecordWriter(output_file) # init. writer to write to tf.record file

    # for each index, example: 
    for (ex_index, example) in enumerate(examples):
        # convert single example -> feature (`InputFeature()` instance )
        feature = convert_single_example(ex_index, example,
                                         max_seq_length, tokenizer)

        def create_int_feature(values):
          ''' Convert features to TFRecord format
          '''
          f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
          return f

        # initialize ordered dictionary e.g. {'input_ids': vector of input_ids, 'input_mask', vector of input_mask, ...}
        features = collections.OrderedDict()
        # append embeddings and labels to dict
        if isinstance(feature.label_ids, list): # check if list
            labels_feat = feature.label_ids
        else:
            labels_feat = feature.label_ids[0]
        features["label_ids"] = create_int_feature(labels_feat)
        features["input_ids"] = create_int_feature(feature.input_ids)
        features["input_mask"] = create_int_feature(feature.input_mask)
        features["segment_ids"] = create_int_feature(feature.segment_ids)
        features["is_real_example"] = create_int_feature(
            [int(feature.is_real_example)])

        # convert `features` to type `tf.train.Example` (format suitable for tf.record file)
        tf_example = tf.train.Example(features=tf.train.Features(feature=features))
        # write to tf.record file
        writer.write(tf_example.SerializeToString())  
    writer.close()

## Training

### Configure training files (.TFRecord)

In [57]:
# .TFRecord file to store training output
trainFile = os.path.join('bert_output', "train.tf_record") # file path 

# create file if not exist in directory
if not os.path.exists(trainFile): 
    open(trainFile, 'w').close()

# write each `InputExample()` (from train df) as tf.Example into training.tf_record file
file_based_convert_examples_to_features(
            trainInput, maxSeqLength, tokenizer, trainFile) # file size: 'training.tf_record' should be ~81 MB

### Configure training function

In [None]:
# below function taken from source code https://github.com/google-research/bert/blob/master/run_classifier.py 

def file_based_input_fn_builder(input_file, seq_length, is_training,
                                drop_remainder):
    """Creates an `input_fn` closure to be passed to TPUEstimator.
    Returns: `input_fn` object"""

    name_to_features = {
        "input_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
        "input_mask": tf.io.FixedLenFeature([seq_length], tf.int64),
        "segment_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
        "label_ids": tf.io.FixedLenFeature([6], tf.int64), # specify 6 toxicity labels
        "is_real_example": tf.io.FixedLenFeature([], tf.int64),
    }

    def _decode_record(record, name_to_features):
        """Decodes a record to a TensorFlow example."""
        example = tf.io.parse_single_example(record, name_to_features)
        # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
        # So cast all int64 to int32.
        for name in list(example.keys()):
            t = example[name]
            if t.dtype == tf.int64:
                t = tf.cast(t, tf.int32)
            example[name] = t

        return example

    def input_fn(params):
        """The actual input function."""
        batch_size = params["batch_size"]

        # For training, we want a lot of parallel reading and shuffling.
        # For eval, we want no shuffling and parallel reading doesn't matter.
        d = tf.data.TFRecordDataset(input_file)
        if is_training:
            d = d.repeat()
            d = d.shuffle(buffer_size=100)

        d = d.apply(
              tf.data.experimental.map_and_batch(
                lambda record: _decode_record(record, name_to_features),
                batch_size=batch_size,
                drop_remainder=drop_remainder))

        return d

    return input_fn

In [None]:
# define training function
trainFunction = file_based_input_fn_builder(
    input_file=trainFile,
    seq_length=maxSeqLength,
    is_training=True,
    drop_remainder=True)

### Functions to initialize BERT 

Below model functions **adapted** from *same* source code (https://github.com/google-research/bert/blob/master/run_classifier.py)
- Changes made to `create_model()` function : 1) labels casting to float32 since already one-hot encoded in train data 2) use sigmoid not softmax activation since labels NOT mutually exclusive
- Changes made to `model_fn_builder()` function: 1) omitted tpu argument & code (since gpu instance used) 2) changed logit and probability computations, added AUC scores for multilabel case

In [None]:
os.chdir('bert')
import optimization # importing optimization.py from BERT source code
os.chdir('..')

In [None]:
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """creates BERT model for text classification."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings) #creating the BERT model

    output_layer = model.get_pooled_output() # "pool" model by taking hidden state corresponding
        # to the first token. Since model has been pre-trained.

    #size of the last hidden layer
    hidden_size = output_layer.shape[-1]
    
    output_weights = tf.compat.v1.get_variable( #this function creates a new variable for tensorflow to work with
        "output_weights", [num_labels, hidden_size],
        initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.compat.v1.get_variable( # add bias term
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.compat.v1.variable_scope("loss"): #apparently another way to create a variable
        if is_training:
            # 0.1 dropout regularization layer
            output_layer = tf.compat.v1.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True) #multiplying matrices output_layer and output_weights
        logits = tf.nn.bias_add(logits, output_bias) # adding bias to the logits matrix
        probabilities = tf.nn.sigmoid(logits) # use sigmoid not softmax since labels not mutually exclusive 
        
        toxic_labels = tf.cast(labels, tf.float32) # labels already one-hot encoded # change to float32 compatible type
       
        per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=toxic_labels, logits=logits) # for multilabel classification
        loss = tf.reduce_mean(per_example_loss) #computing the mean across the per_example_loss

        return (loss, per_example_loss, logits, probabilities)

In [12]:
def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
                     num_train_steps, num_warmup_steps, use_tpu, use_one_hot_embeddings):
    """Returns `model_fn` closure for Estimator."""

    def model_fn(features, labels, mode, params):  
        """The `model_fn` for TPUEstimator."""

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
             is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
        else:
             is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        
        # call above create_model() function
        (total_loss, per_example_loss, logits, probabilities) = create_model(
            bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
            num_labels, use_one_hot_embeddings)
        
        tvars = tf.compat.v1.trainable_variables() #tensorflow adds variables to its Graph
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint: 
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu: 
                def tpu_scaffold():
                    tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map) 
                    return tf.compat.v1.train.Scaffold()
                scaffold_fn = tpu_scaffold
            else: 
                tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map)
            

        tf.compat.v1.logging.info("**** Trainable Variables ****") # edit Dec 5
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"

        output_spec = None

        ## IF TRAIN
        if mode == tf.estimator.ModeKeys.TRAIN: 

            train_op = optimization.create_optimizer( 
                total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) 

            output_spec = tf.estimator.EstimatorSpec( #defines the model to be run by Estimator
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold=scaffold_fn)
        ## ELIF VALIDATION
        elif mode == tf.estimator.ModeKeys.EVAL: 
            # modified `metric_fn()`
            def metric_fn(per_example_loss, label_ids, is_real_example, prob):
                sep_logits = tf.split(prob, num_labels, axis=-1) # split into subtensors
                sep_labels = tf.split(label_ids, num_labels, axis=-1) # ^

                metrics_dict = {}
                # AUC per label class
                for ind, logit in enumerate(sep_logits):
                    label = tf.cast(sep_labels[ind], dtype=tf.int32)
                    auc1, auc2 = tf.compat.v1.metrics.auc(label, logit) # compute AUCs via riemann sum
                    metrics_dict[str(ind)] = (auc1, auc2)
                metrics_dict['test_loss'] = tf.compat.v1.metrics.mean(values=per_example_loss) # eval loss
                preds = tf.compat.v1.argmax(logit, axis=-1, output_type=tf.int32)
                preds = tf.reshape(preds, [tf.shape(probabilities)[0], -1])
                metrics_dict['accuracy'] = tf.compat.v1.metrics.accuracy(labels=label_ids, # eval accuracy
                                                                      predictions=preds, 
                                                                      weights=is_real_example)
                return metrics_dict

            eval_metrics = metric_fn(per_example_loss, label_ids,is_real_example,  probabilities)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metric_ops=eval_metrics,
                scaffold=scaffold_fn)
        ## ELSE TEST
        else:
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold=scaffold_fn)
        return output_spec

    return model_fn

In [None]:
os.getcwd() 

'/content/drive/My Drive/CSC2515 Project Part 2'

### Define training hyperparameters, checkpoint parameters, and initialize model & tf.estimator


In [None]:
# CHECKPOINT PARAMS
checkpointEverySteps = 1000 # save checkpoint every 1000 steps
summaryEverySteps = 500
# created folder 'bert_output' in our shared drive folder to store model + checkpoints
output = "bert_output" # output directory created 
# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig( #this is set up for tf.estimator
    model_dir=output,
    save_summary_steps=summaryEverySteps,
    keep_checkpoint_max= 1 ,
    save_checkpoints_steps=checkpointEverySteps)

In [None]:
# HYPERPARAMETERS
epochs = 2.0
lr = 2e-5 # default 
batchSize = 32 # suitable for gpu
warmup = 0.1

In [None]:
# compute no. of train steps & warmup steps 
trainSteps = int(len(trainInput) / batchSize * epochs)
warmupSteps = int(trainSteps * warmup) # starts training w lower learning rate to help optimizer & attention mechanism 

In [None]:
!pip install transformers
from transformers import BertConfig

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ed/db/98c3ea1a78190dac41c0127a063abf92bd01b4b0b6970a6db1c2f5b66fa0/transformers-4.0.1-py3-none-any.whl (1.4MB)
[K     |▎                               | 10kB 19.1MB/s eta 0:00:01[K     |▌                               | 20kB 25.4MB/s eta 0:00:01[K     |▊                               | 30kB 24.6MB/s eta 0:00:01[K     |█                               | 40kB 18.2MB/s eta 0:00:01[K     |█▏                              | 51kB 15.4MB/s eta 0:00:01[K     |█▌                              | 61kB 17.2MB/s eta 0:00:01[K     |█▊                              | 71kB 13.9MB/s eta 0:00:01[K     |██                              | 81kB 15.1MB/s eta 0:00:01[K     |██▏                             | 92kB 15.3MB/s eta 0:00:01[K     |██▍                             | 102kB 14.2MB/s eta 0:00:01[K     |██▋                             | 112kB 14.2MB/s eta 0:00:01[K     |███                             | 

In [None]:
# define model 
bert_config = BertConfig.from_json_file(config_file) # get model params from downloaded BERT config file

model_fn = model_fn_builder(
  bert_config=bert_config,
  num_labels= len(classes), # 6 toxicity classes
  init_checkpoint=checkpoint,
  learning_rate=lr,
  num_train_steps=trainSteps,
  num_warmup_steps=warmupSteps,
  use_tpu = False, 
  use_one_hot_embeddings=False)

In [None]:
# define estimator (for train and eval)
estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": batchSize})

INFO:tensorflow:Using config: {'_model_dir': 'bert_output', '_tf_random_seed': None, '_save_summary_steps': 500, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 1, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


### Begin training!

In [None]:
print('Training Now...')
time_now = datetime.now()
estimator.train(input_fn=trainFunction, max_steps=trainSteps)
print("Training took ", datetime.now() - time_now)

Beginning Training!
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
Use `tf.data.Dataset.map(map_func, num_parallel_calls)` followed by `tf.data.Dataset.batch(batch_size, drop_remainder)`. Static tf.data optimizations will take care of using the fused implementation.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
INFO:tensorflow:num_labels:6;logits:Tensor("loss/BiasAdd:0", shape=(32, 6), dtype=float32);labels:Tensor("loss/Cast:0", shape=(32, 6), dtype=float32)
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:te

## Testing (evaluating)

### Configure validation .TFRecord file 

In [None]:
fileToEval = os.path.join('bert_output', "test.tf_record") # ~9 mb in size

# create file if not exist
if not os.path.exists(fileToEval):
    open(fileToEval, 'w').close()

evalInput = createBertInput(test) # write test set examples to TFRecord file
# convert examples -> features for BERT eval
file_based_convert_examples_to_features(
   evalInput, maxSeqLength, tokenizer, fileToEval)

### Configure evaluation function

In [None]:
evalFunction = file_based_input_fn_builder(
    input_file=fileToEval,
    seq_length=maxSeqLength,
    is_training=False, # since validation not train
    drop_remainder=False)

### Begin evaluating test set!

In [None]:
evaluation = estimator.evaluate(input_fn=evalFunction, steps=None)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:num_labels:6;logits:Tensor("loss/BiasAdd:0", shape=(None, 6), dtype=float32);labels:Tensor("loss/Cast:0", shape=(None, 6), dtype=float32)
INFO:tensorflow:**** Trainable Variables ****
Instructions for updating:
The value of AUC returned by this may race with the update so this is deprecated. Please use tf.keras.metrics.AUC instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-12-11T18:18:38Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from bert_output/model.ckpt-8975
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 46.66910s
INFO:tensorflow:Finished evaluation at 2020-12-11-18:19:25
INFO:tensorflow:Saving dict for global step 8975: 0 = 0.5102444, 1 = 0.5612399, 2 = 0.534467, 3 = 0.5092827, 4 = 0.5490346, 5 = 0.52136266, accuracy = 0.96273553, eval_loss = 0.19433822, global_step = 8975, loss = 0.19434963


In [None]:
# put test metrics into .txt file
evalOutput = os.path.join("bert_output", "evalOutput.txt")

In [None]:
# write to .txt file
with tf.compat.v1.gfile.GFile(evalOutput, "w") as w:
    tf.compat.v1.logging.info("Evaluating test set...")
    for metric in sorted(evaluation.keys()):
        tf.compat.v1.logging.info("  %s = %s", metric, str(evaluation[metric]))
        w.write("%s = %s\n" % (metric, str(evaluation[metric])))

INFO:tensorflow:***** Eval results *****
INFO:tensorflow:  0 = 0.5102444
INFO:tensorflow:  1 = 0.5612399
INFO:tensorflow:  2 = 0.534467
INFO:tensorflow:  3 = 0.5092827
INFO:tensorflow:  4 = 0.5490346
INFO:tensorflow:  5 = 0.52136266
INFO:tensorflow:  accuracy = 0.96273553
INFO:tensorflow:  eval_loss = 0.19433822
INFO:tensorflow:  global_step = 8975
INFO:tensorflow:  loss = 0.19434963


## Unmount drive

In [None]:
# unmount drive when done
from google.colab import drive
drive.flush_and_unmount()