<a href="https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##### Copyright 2018 The TensorFlow Hub Authors.

Licensed under the Apache License, Version 2.0 (the "License");

In [0]:
# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# BERT All model master

### Set up your TPU environment

In this section, you perform the following tasks:

*   Set up a Colab TPU running environment
*   Verify that you are connected to a TPU device
*   Upload your credentials to TPU to access your GCS bucket.

In [0]:
import datetime
import json
import os
import pprint
import random
import string
import sys
import tensorflow as tf

assert 'COLAB_TPU_ADDR' in os.environ, 'ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!'
TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
print('TPU address is', TPU_ADDRESS)

from google.colab import auth
auth.authenticate_user()
with tf.Session(TPU_ADDRESS) as session:
  print('TPU devices:')
  pprint.pprint(session.list_devices())

  # Upload credentials to TPU.
  with open('/content/adc.json', 'r') as f:
    auth_info = json.load(f)
  tf.contrib.cloud.configure_gcs(session, credentials=auth_info)
  # Now credentials are set for all future sessions on this TPU.

### Prepare and import BERT modules
​
With your environment configured, you can now prepare and import the BERT modules. The following step clones the source code from GitHub and import the modules from the source. Alternatively, you can install BERT using pip (!pip install bert-tensorflow).

In [0]:
import sys

!test -d bert_repo || git clone https://github.com/google-research/bert bert_repo
if not 'bert_repo' in sys.path:
  sys.path += ['bert_repo']

# import python modules defined by BERT
import modeling
import optimization
import run_classifier
import run_classifier_with_tfhub
import tokenization

# import tfhub 
import tensorflow_hub as hub

### Prepare for training

This next section of code performs the following tasks:

*  Specify task and download training data.
*  Specify BERT pretrained model
*  Specify GS bucket, create output directory for model checkpoints and eval results.




In [0]:
# Model : PAWS_QQP_Model ; Data : Final_Data/paws_qqp/
# Model : PAWS_Wiki_Model ; Data : Final_Data/paws_wiki/
BERT_MODEL = 'cased_L-24_H-1024_A-16' #@param {type:"string"}
TEST = 'NO_FINE_TUNNING' #@param {type:"string"}
assert TEST in ('DENSE_CLASSIFICATION','NORMAL',"CNN",'NO_FINE_TUNNING'), 'Test not supported'
BUCKET = 'w266-duplicate-questions-rl-gg' #@param {type:"string"}
bucket_name=BUCKET
assert BUCKET, 'Must specify an existing GCS bucket name'
TASK = 'QQP' #@param {type:"string"}
assert TASK in ('MRPC', 'CoLA','QQP','PAWS'), 'Only (MRPC, CoLA, QQP, PAWS) are demonstrated here.'
SUB_TASK="QQP" #@param {type:"string"}
assert SUB_TASK in ('QQP','WIKI','ALL_FILE','STACK'), 'Only (QQP, PAWS) are demonstrated here.'
# Download glue data.
if TASK != "PAWS":
  #QQP Original
  !gsutil -m cp gs://{bucket_name}/Final_Data/QQP/dev.tsv /content/Final_Data/QQP/dev.tsv
  !gsutil -m cp gs://{bucket_name}/Final_Data/QQP/train.tsv /content/Final_Data/QQP/train.tsv
  TASK_DATA_DIR = "Final_Data/QQP/"
  OUTPUT_DIR = "gs://{}/thursday/{}/QQP_Model/".format(BUCKET, TEST)
  SUB_TASK="NA"
else:
  if SUB_TASK=="QQP":
    # PAWS_QQP
    !gsutil -m cp gs://{bucket_name}/Final_Data/paws_qqp/dev.tsv /content/Final_Data/paws_qqp/dev.tsv
    !gsutil -m cp gs://{bucket_name}/Final_Data/paws_qqp/train.tsv /content/Final_Data/paws_qqp/train.tsv 
    TASK_DATA_DIR = 'Final_Data/paws_qqp/'
    OUTPUT_DIR = 'gs://{}/thursday/{}/PAWS_QQP_Model/'.format(BUCKET, TEST) 
    # PAWS_WIKI
  if SUB_TASK=="WIKI":
    !gsutil -m cp gs://{bucket_name}/Final_Data/paws_wiki/dev.tsv /content/Final_Data/paws_wiki/dev.tsv
    !gsutil -m cp gs://{bucket_name}/Final_Data/paws_wiki/train.tsv /content/Final_Data/paws_wiki/train.tsv
    TASK_DATA_DIR = 'Final_Data/paws_wiki/'
    OUTPUT_DIR = 'gs://{}/thursday/{}/PAWS_WIKI_Model/'.format(BUCKET, TEST) 
  if SUB_TASK=="ALL_FILE":
    !gsutil -m cp gs://{bucket_name}/Final_Data/all_data/dev.tsv /content/Final_Data/all_data/dev.tsv
    !gsutil -m cp gs://{bucket_name}/Final_Data/all_data/train.tsv /content/Final_Data/all_data/train.tsv
    TASK_DATA_DIR = 'Final_Data/paws_wiki/'
    OUTPUT_DIR = 'gs://{}/thursday/{}/PAWS_WIKI_Model/'.format(BUCKET, TEST)
  
print('***** Task data directory: {} *****'.format(TASK_DATA_DIR))
!ls $TASK_DATA_DIR


PREDICT=True #@param{type:"boolean"}
STACK=True #@param{type:"boolean"}
RUN_FROM_CHECKPOINT=False #@param{type:"boolean"}
if STACK:
    !gsutil -m cp gs://{bucket_name}/Final_Data/StackExchange/dev.tsv /content/Final_Data/StackExchange/dev.tsv
    !gsutil -m cp gs://{bucket_name}/Final_Data/StackExchange/train.tsv /content/Final_Data/StackExchange/train.tsv
    STACK_DATA_DIR = 'Final_Data/paws_wiki/'
    
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))
#https://storage.googleapis.com/bert_models/2018_10_18/cased_L-24_H-1024_A-16.zip
# Available pretrained model checkpoints:
#   uncased_L-12_H-768_A-12: uncased BERT base model
#   uncased_L-24_H-1024_A-16: uncased BERT large model
#   cased_L-12_H-768_A-12: cased BERT large model

BERT_MODEL_HUB = 'https://tfhub.dev/google/bert_' + BERT_MODEL + '/1'

#Replace run_classifier_with_tfhub.py with the correct 
if TEST!="NORMAL":
  !rm /content/bert_repo/run_classifier_with_tfhub.py
  !git clone https://github.com/dataSci-rigo/xlnet-QQP-TPU.git
  if TEST=="NO_FINE_TUNNING":
    !cp xlnet-QQP-TPU/run_classifier_with_tfhub.py /content/bert_repo/run_classifier_with_tfhub.py
  if TEST=="CNN":
    !cp xlnet-QQP-TPU/CNN/run_classifier_with_tfhub.py /content/bert_repo/run_classifier_with_tfhub.py
  if TEST=="DENSE_CLASSIFICATION":
    !cp xlnet-QQP-TPU/class_layer/run_classifier_with_tfhub.py /content/bert_repo/run_classifier_with_tfhub.py
!ls bert_repo/

Now let's load tokenizer module from TF Hub and play with it.

## Data Processors


In [0]:
tokenizer = run_classifier_with_tfhub.create_tokenizer_from_hub_module(BERT_MODEL_HUB)
tokenizer.tokenize("This here's an example of using the bert tokenizer")

In [0]:
class QQPProcessor(run_classifier.DataProcessor):
  """Processor for the Quora Question pair data set."""

  def get_train_examples(self, data_dir):
    """Reading train.tsv and converting to list of InputExample"""
    return self._create_examples(
        self._read_tsv(os.path.join(data_dir,"train.tsv")), 'train')

  def get_dev_examples(self, data_dir):
    """Reading dev.tsv and converting to list of InputExample"""
    return self._create_examples(
        self._read_tsv(os.path.join(data_dir,"dev.tsv")), 'dev')
  
  def get_test_examples(self, data_dir):
    """Reading train.tsv and converting to list of InputExample"""
    return self._create_examples(
        self._read_tsv(os.path.join(data_dir,"test.tsv")), 'test')
  
  def get_predict_examples(self, sentence_pairs):
    """Given question pairs, conevrting to list of InputExample"""
    examples = []
    for (i, qpair) in enumerate(sentence_pairs):
      guid = "predict-%d" % (i)
      # converting questions to utf-8 and creating InputExamples
      text_a = tokenization.convert_to_unicode(qpair[0])
      text_b = tokenization.convert_to_unicode(qpair[1])
      # We will add label  as 0, because None is not supported in converting to features
      examples.append(
          run_classifier.InputExample(guid=guid, text_a=text_a, text_b=text_b, label=0))
    return examples
  def convert_examples(self, sentence_pairs):
    """Given question pairs, conevrting to list of InputExample"""
    examples = []
    for (i, qpair) in enumerate(sentence_pairs):
      guid = "predict-%d" % (i)
      # converting questions to utf-8 and creating InputExamples
      text_a = tokenization.convert_to_unicode(qpair[0])
      text_b = tokenization.convert_to_unicode(qpair[1])
      label = int(qpair[2])
      # We will add label  as 0, because None is not supported in converting to features
      examples.append(
          run_classifier.InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
    return examples
  
  def _create_examples(self, lines, set_type):
    """Creates examples for the training, dev and test sets."""
    examples = []
    for (i, line) in enumerate(lines):
      guid = "%s-%d" % (set_type, i)
      if set_type=='test':
        # removing header and invalid data
        if i == 0 or len(line)!=3:
          print(guid, line)
          continue
        text_a = tokenization.convert_to_unicode(line[1])
        text_b = tokenization.convert_to_unicode(line[2])
        label = 0 # We will use zero for test as convert_example_to_features doesn't support None
      else:
        # removing header and invalid data
        if i == 0 or len(line)!=6:
          continue
        text_a = tokenization.convert_to_unicode(line[3])
        text_b = tokenization.convert_to_unicode(line[4])
        label = int(line[5])
      examples.append(
          run_classifier.InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
    return examples

  def get_labels(self):
    "return class labels"
    return [0,1]

Also we initilize our hyperprams, prepare the training data and initialize TPU config.

In [0]:
class PAWSProcessor(run_classifier.DataProcessor):
  """Processor for the Quora Question pair data set."""

  def get_train_examples(self, data_dir):
    """Reading train.tsv and converting to list of InputExample"""
    return self._create_examples(self._read_tsv(data_dir+'train.tsv'), 'train')

  def get_dev_examples(self, data_dir):
    """Reading dev.tsv and converting to list of InputExample"""
    return self._create_examples(
        self._read_tsv(data_dir+'dev.tsv'), 'dev')
  
  def get_test_examples(self, data_dir):
    """Reading train.tsv and converting to list of InputExample"""
    return self._create_examples(
        self._read_tsv(data_dir+'test.tsv'), 'test')
  
  def convert_examples(self, sentence_pairs):
    """Given question pairs, conevrting to list of InputExample"""
    examples = []
    for (i, qpair) in enumerate(sentence_pairs):
      guid = "predict-%d" % (i)
      # converting questions to utf-8 and creating InputExamples
      text_a = tokenization.convert_to_unicode(qpair[0])
      text_b = tokenization.convert_to_unicode(qpair[1])
      label = int(qpair[2])
      # We will add label  as 0, because None is not supported in converting to features
      examples.append(
          run_classifier.InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
    return examples

  def get_predict_examples(self, sentence_pairs):
    """Given question pairs, conevrting to list of InputExample"""
    examples = []
    for (i, qpair) in enumerate(sentence_pairs):
      guid = "predict-%d" % (i)
      # converting questions to utf-8 and creating InputExamples
      text_a = tokenization.convert_to_unicode(qpair[0])
      text_b = tokenization.convert_to_unicode(qpair[1])
      # We will add label  as 0, because None is not supported in converting to features
      examples.append(
          run_classifier.InputExample(guid=guid, text_a=text_a, text_b=text_b, label=0))
    return examples
  
  def _create_examples(self, lines, set_type):
    """Creates examples for the training, dev and test sets."""
    examples = []
    for (i, line) in enumerate(lines):
      guid = "%s-%d" % (set_type, i)
      if set_type=='test':
        # removing header and invalid data
        if i == 0 or len(line)!=4:
          print(guid, line)
          continue
        text_a = tokenization.convert_to_unicode(line[1])
        text_b = tokenization.convert_to_unicode(line[2])
        label = int(line[3]) # We will use zero for test as convert_example_to_features doesn't support None
      else:
        # removing header and invalid data
        if i == 0 or len(line)!=4:
          continue
        text_a = tokenization.convert_to_unicode(line[1])
        text_b = tokenization.convert_to_unicode(line[2])
        label = int(line[3])
      examples.append(
          run_classifier.InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
    return examples

  def get_labels(self):
    "return class labels"
    return [0,1]

## Model Specifications

In [0]:
TRAIN_BATCH_SIZE = 32
EVAL_BATCH_SIZE = 8
PREDICT_BATCH_SIZE = 8
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 8.0
MAX_SEQ_LENGTH = 256
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 1000
SAVE_SUMMARY_STEPS = 500

processors = {
  "cola": run_classifier.ColaProcessor,
  "mnli": run_classifier.MnliProcessor,
  "mrpc": run_classifier.MrpcProcessor,
  "qqp" : QQPProcessor,
  "paws": PAWSProcessor
}
processor = processors[TASK.lower()]()
label_list = processor.get_labels()

# Compute number of train and warmup steps from batch size
train_examples = processor.get_train_examples(TASK_DATA_DIR)
print(len(train_examples))
num_train_steps = int(len(train_examples) / TRAIN_BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# Setup TPU related config
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)
NUM_TPU_CORES = 8
ITERATIONS_PER_LOOP = 1000

def get_run_config(output_dir):
  return tf.contrib.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    model_dir=output_dir,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS,
    tpu_config=tf.contrib.tpu.TPUConfig(
        iterations_per_loop=ITERATIONS_PER_LOOP,
        num_shards=NUM_TPU_CORES,
        per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2))


# Fine-tune and Run Predictions on a pretrained BERT Model from TF Hub

This section demonstrates fine-tuning from a pre-trained BERT TF Hub module and running predictions.


In [0]:
# Force TF Hub writes to the GS bucket we provide.
os.environ['TFHUB_CACHE_DIR'] = OUTPUT_DIR

model_fn = run_classifier_with_tfhub.model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps,
  use_tpu=True,
  bert_hub_module_handle=BERT_MODEL_HUB
)


At this point, you can now fine-tune the model, evaluate it, and run predictions on it.

In [0]:
# Train the model
def model_train(estimator):
  print('MRPC/CoLA on BERT base model normally takes about 2-3 minutes. Please wait...')
  # We'll set sequences to be at most 128 tokens long.
  train_features = run_classifier.convert_examples_to_features(
      train_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('***** Started training at {} *****'.format(datetime.datetime.now()))
  print('  Num examples = {}'.format(len(train_examples)))
  print('  Batch size = {}'.format(TRAIN_BATCH_SIZE))
  tf.logging.info("  Num steps = %d", num_train_steps)
  train_input_fn = run_classifier.input_fn_builder(
      features=train_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=True,
      drop_remainder=True)
  estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
  print('***** Finished training at {} *****'.format(datetime.datetime.now()))

estimator_from_tfhub = tf.contrib.tpu.TPUEstimator(
  use_tpu=True,
  model_fn=model_fn,
  config=get_run_config(OUTPUT_DIR),
  train_batch_size=TRAIN_BATCH_SIZE,
  eval_batch_size=EVAL_BATCH_SIZE,
  predict_batch_size=PREDICT_BATCH_SIZE,
)


In [0]:
def model_eval(estimator):
  # Eval the model.
  eval_examples = processor.get_dev_examples(TASK_DATA_DIR)
  eval_features = run_classifier.convert_examples_to_features(
      eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('***** Started evaluation at {} *****'.format(datetime.datetime.now()))
  print('  Num examples = {}'.format(len(eval_examples)))
  print('  Batch size = {}'.format(EVAL_BATCH_SIZE))

  # Eval will be slightly WRONG on the TPU because it will truncate
  # the last batch.
  eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE)
  eval_input_fn = run_classifier.input_fn_builder(
      features=eval_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=False,
      drop_remainder=True)
  result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
  print('***** Finished evaluation at {} *****'.format(datetime.datetime.now()))
  output_eval_file = os.path.join(OUTPUT_DIR, "eval_results.txt")
  with tf.gfile.GFile(output_eval_file, "w") as writer:
    print("***** Eval results *****")
    for key in sorted(result.keys()):
      print('  {} = {}'.format(key, str(result[key])))
      writer.write("%s = %s\n" % (key, str(result[key])))


## Training and Evaluation

In [0]:
#model_train(estimator_from_tfhub)

In [0]:
#model_eval(estimator_from_tfhub)

In [0]:
if STACK:
  def model_eval_stack(estimator):
  # Eval the model.
  processor = processors['paws']()
  label_list = processor.get_labels()

  eval_examples = processor.get_dev_examples(STACK_DATA_DIR)
  eval_features = run_classifier.convert_examples_to_features(
      eval_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  print('***** Started evaluation at {} *****'.format(datetime.datetime.now()))
  print('  Num examples = {}'.format(len(eval_examples)))
  print('  Batch size = {}'.format(EVAL_BATCH_SIZE))

  # Eval will be slightly WRONG on the TPU because it will truncate
  # the last batch.
  eval_steps = int(len(eval_examples) / EVAL_BATCH_SIZE)
  eval_input_fn = run_classifier.input_fn_builder(
      features=eval_features,
      seq_length=MAX_SEQ_LENGTH,
      is_training=False,
      drop_remainder=True)
  result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
  print('***** Finished evaluation at {} *****'.format(datetime.datetime.now()))
  output_eval_file = os.path.join(OUTPUT_DIR, "eval_results.txt")
  with tf.gfile.GFile(output_eval_file, "w") as writer:
    print("***** Eval results *****")
    for key in sorted(result.keys()):
      print('  {} = {}'.format(key, str(result[key])))
      writer.write("%s = %s\n" % (key, str(result[key])))

## Prediction and Analysis:


In [0]:
def model_predict_from(estimator,examples):
  # Make predictions on a subset of eval examples
  prediction_examples = processor.convert_examples(examples)
  input_features = run_classifier.convert_examples_to_features(prediction_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True)
  predictions = estimator.predict(predict_input_fn)

  for example, prediction in zip(prediction_examples, predictions):
    row={'q1': str(example.text_a),'q2': str(example.text_b),'lbl': str(example.label),'pred': str(prediction['probabilities'])}
    rows.append(row)
  return rows 
def model_predict(estimator):
  # Make predictions on a subset of eval examples
  prediction_examples = processor.get_dev_examples(TASK_DATA_DIR)
  input_features = run_classifier.convert_examples_to_features(prediction_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=True)
  predictions = estimator.predict(predict_input_fn)
  rows=[]
  for example, prediction in zip(prediction_examples, predictions):
    row={'q1': str(example.text_a),'q2': str(example.text_b),'lbl': str(example.label),'pred': str(prediction['probabilities'])}
    rows.append(row)
  return rows 
data = model_predict(estimator_from_tfhub)

In [0]:
current_tasks=["QQP"]
current_tasks=["StackExchange",'StackCola']
predict_results=[]
predict_setups=[]
for SUB_TASK in current_tasks:
  !gsutil -m cp gs://{BUCKET}/Final_Data/{SUB_TASK}/dev.tsv /content/Final_Data/{SUB_TASK}/dev.tsv
  
  TASK_PREDICT="QQP" if SUB_TASK in ["QQP",'StackExchange','StackCola'] else "PAWS"
  TASK_PREDICT_DIR = 'Final_Data/'+SUB_TASK+'/'
  #TASK_PREDICT_DIR='/content'
  processor = processors[TASK_PREDICT.lower()]()
  label_list = processor.get_labels()
  predict_result =model_predict(estimator_from_tfhub,TASK_PREDICT_DIR,processor)
  predict_setup={'TASK_PREDICT':TASK_PREDICT+"_"+SUB_TASK}
  predict_results.append(predict_result)
  predict_setups.append(predict_setup)

In [0]:

def postprocessing(data,predict_set_up):
  df = pd.DataFrame(data)
  df['pred_lbl']=0
  df['type_error']="None"

  df['lbl']=df['lbl'].astype(int)
  preds=[]
  for table in df.pred.values:

    pred_row=json.loads(table.replace(" ",',',1))
    preds.append(pred_row)
  preds=np.array(preds)
  print(preds.shape)
  df['first']=preds[:,0].astype(float)
  df['second']=preds[:,1].astype(float)
  df.loc[ ((df['first'])< (df['second'])),['pred_lbl']]= 1
  df.loc[((df['lbl']==1) & (df['pred_lbl']==1)),['type_error']]="TP" 
  df.loc[((df['lbl']==0) & (df['pred_lbl']==0)),['type_error']]="TN" 
  df.loc[((df['lbl']==0) & (df['pred_lbl']==1)),['type_error']]="FP" 
  df.loc[((df['lbl']==1) & (df['pred_lbl']==0)),['type_error']]="FN" 
  filename="prediction_"+BERT_MODEL+TASK+"_"+SUB_TASK+".csv"
  df.to_csv(filename)
  !gsutil  -m cp /content/{filename} gs://{BUCKET}/prediction_results/{filename}
  error_acronyms=['TN','TP','FN','FP']
  met_dict={error_type:df[df['type_error']==error_type] for error_type in error_acronyms}
  met={error_type:met_dict[error_type].shape[0] for error_type in error_acronyms}

  #Precision = TP/TP+FP
  precision=met['TP']/(met['TP']+met['FP'])
  #Recall = TP/TP+FN
  recall=met['TP']/(met['TP']+met['FN'])
  #F1 Score = 2*(Recall * Precision) / (Recall + Precision)
  f1_score=2*(recall * precision) / (recall + precision)


  accuracy=(met['TP']+met['TN'])/sum([met[m] for m in error_acronyms])
  score_names=('accuracy','precision','recall', 'f1_score')
  sc=(accuracy,precision,recall, f1_score)
  scores={}
  for name,score in zip(score_names,sc):
    print(name, score)
    scores={name:score}
  out={**predict_set_up,**scores,**met}

  e_a=['TN','TP','FN','FP']
  errors=['True Negatives','True positives','False Negatives','False positives']
  for i, error_type in zip(e_a,errors):
    print('The number of '+error_type+' is: ',met_dict[i].shape[0])
  print('using ',TASK, SUB_TASK)
  print('Analysis of ',TASK, SUB_TASK)
  print('Examples of False positives:')
  FP=met_dict['FP'][['q1','q2']].head(15).to_numpy()
  FN=met_dict['FN'][['q1','q2']].head(15).to_numpy()
  for q in range(1,10):
    if q<=FP.shape[0]:
      print('Example: ',q)
      print('Statement 1:',FP[q,0])
      print('Statement 2:',FP[q,1])

  print()  
  print('Examples of False Negatives:')
  for q in range(1,10):
    if q<=FN.shape[0]:
      print('Example: ',q)
      print('Statement 1:',FN[q,0])
      print('Statement 2:',FN[q,1])
      print()
  return out

In [0]:
import numpy as np

postprocessing(predict_results[0],predict_setups[0])

In [0]:
import numpy as np

import json
import pandas as pd

df = pd.DataFrame(data)
df['pred_lbl']=0
df['type_error']="None"

df['lbl']=df['lbl'].astype(int)
preds=[]
for table in df.pred.values:

  pred_row=json.loads(table.replace(" ",',',1))
  preds.append(pred_row)
preds=np.array(preds)
print(preds.shape)
df['first']=preds[:,0].astype(float)
df['second']=preds[:,1].astype(float)
df.loc[ ((df['first'])< (df['second'])),['pred_lbl']]= 1
df.loc[((df['lbl']==1) & (df['pred_lbl']==1)),['type_error']]="TP" 
df.loc[((df['lbl']==0) & (df['pred_lbl']==0)),['type_error']]="TN" 
df.loc[((df['lbl']==0) & (df['pred_lbl']==1)),['type_error']]="FP" 
df.loc[((df['lbl']==1) & (df['pred_lbl']==0)),['type_error']]="FN" 
filename="prediction_"+BERT_MODEL+"_label_wiki.csv"
df.to_csv(filename)
!gsutil  -m cp /content/{filename} gs://{BUCKET}/prediction_results/{filename}

In [0]:
FN=df[df['type_error']=="FN"]
FP=df[df['type_error']=="FP"]
print('The number of False positives is: ',FP[['q1','q2']].shape[0])
print('The number of False Negatives is: ',FN[['q1','q2']].shape[0])

print('Analysis of PAWS WIKI')
print('Examples of False positives:')
FP=FP[['q1','q2']].head(15).to_numpy()
FN=FN[['q1','q2']].head(15).to_numpy()
for q in range(1,10):
  print('Example: ',q)
  print('Statement 1:',FP[q,0])
  print('Statement 2:',FP[q,1])

print()  
print('Examples of False Negatives:')
for q in range(1,10):
  print('Example: ',q)
  print('Statement 1:',FN[q,0])
  print('Statement 2:',FN[q,1])
  print()

Alternatively, you can also load pre-trained BERT models from saved checkpoints.

In [0]:
if RUN_FROM_CHECKPOINT:
  # Setup task specific model and TPU running config.
  BERT_PRETRAINED_DIR = 'gs://cloud-tpu-checkpoints/bert/' + BERT_MODEL 
  print('***** BERT pretrained directory: {} *****'.format(BERT_PRETRAINED_DIR))
  !gsutil ls $BERT_PRETRAINED_DIR

  CONFIG_FILE = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json')
  INIT_CHECKPOINT = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt')

  model_fn = run_classifier.model_fn_builder(
    bert_config=modeling.BertConfig.from_json_file(CONFIG_FILE),
    num_labels=len(label_list),
    init_checkpoint=INIT_CHECKPOINT,
    learning_rate=LEARNING_RATE,
    num_train_steps=num_train_steps,
    num_warmup_steps=num_warmup_steps,
    use_tpu=True,
    use_one_hot_embeddings=True
  )

  OUTPUT_DIR = OUTPUT_DIR.replace('bert-tfhub', 'bert-checkpoints')
  tf.gfile.MakeDirs(OUTPUT_DIR)

  estimator_from_checkpoints = tf.contrib.tpu.TPUEstimator(
    use_tpu=True,
    model_fn=model_fn,
    config=get_run_config(OUTPUT_DIR),
    train_batch_size=TRAIN_BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    predict_batch_size=PREDICT_BATCH_SIZE,
  )
  model_train(estimator_from_checkpoints)
  model_eval(estimator_from_checkpoints)

Now, you can repeat the training, evaluation, and prediction steps.