Try to Merge Google's method with my pickled files.

In [7]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

import os
import pickle

import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

In [2]:
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]

In [9]:
batch_size = 32
def get_input_fn(data_dir, num_examples=None, num_epochs=10):
    data_files = os.listdir(data_dir)
    
    # open pre-embedded data
    feature_list = []
    label_list = []
    for data_file in data_files:
        with open(os.path.join(data_dir, data_file), 'rb') as f:
            features, labels = pickle.load(f)
            feature_list.append(features)
            label_list.append(labels)
    features = np.concatenate(feature_list)
    labels = [label for labels in label_list for label in labels]
    
    # split into train and dev set
    train_features = features[0:int(0.8*len(features))]
    train_labels = labels[0:int(0.8*len(features))]
    dev_features = features[int(0.8*len(features)):len(features)]
    dev_labels = labels[int(0.8*len(features)):len(features)]
    
    train_labels = np.array(train_labels).astype('int32')
    dev_labels = np.array(dev_labels).astype('int32')
    
    if num_examples is not None:
        train_features = train_features[0:num_examples]
        train_labels = train_labels[0:num_examples]
    
#     print('{} train data points'.format(len(train_features)))
#     print('{} dev data points'.format(len(dev_features)))
    
    train_fn = tf.estimator.inputs.numpy_input_fn(
        x={'feature': train_features},
        y=train_labels,
        num_epochs=num_epochs,
        batch_size=batch_size,
        shuffle=True
    )
    dev_fn = tf.estimator.inputs.numpy_input_fn(
        x={'feature': dev_features},
        y=dev_labels,
        num_epochs=1,
        batch_size=batch_size,
        shuffle=False
    )
    return (train_fn, dev_fn)

In [10]:
bert_model = 'train_uncased_small'
train_input_fn, dev_input_fn = get_input_fn('/home/eugenet/final_project/cached_data/{}/'.format(bert_model), None, 3)

In [23]:
def create_model(is_predicting, features, labels, num_labels):
    """Creates a classification model."""
    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_outputs" for token-level output.
    output_layer = features

    hidden_size = output_layer.shape[-1].value

    # Create our own layer to tune for politeness data.
    output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):

        # Dropout helps prevent overfitting
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        # Convert labels into one-hot encoding
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        # If we're predicting, we want predicted labels and the probabiltiies.
        if is_predicting:
            return (predicted_labels, log_probs)

        # If we're train/eval, compute loss between predicted and actual label
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
        return (loss, predicted_labels, log_probs)


In [38]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
    """Returns `model_fn` closure for TPUEstimator."""
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        embeddings = features['feature']
        label_ids = labels
        print(features, labels)
        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
        # TRAIN and EVAL
        if not is_predicting:
            (loss, predicted_labels, log_probs) = create_model(
                is_predicting, embeddings, label_ids, num_labels)

            train_op = bert.optimization.create_optimizer(
              loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

            # Calculate evaluation metrics. 
            def metric_fn(label_ids, predicted_labels):
                accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
                f1_score = tf.contrib.metrics.f1_score(
                    label_ids,
                    predicted_labels)
                auc = tf.metrics.auc(
                    label_ids,
                    predicted_labels)
                recall = tf.metrics.recall(
                    label_ids,
                    predicted_labels)
                precision = tf.metrics.precision(
                    label_ids,
                    predicted_labels) 
                true_pos = tf.metrics.true_positives(
                    label_ids,
                    predicted_labels)
                true_neg = tf.metrics.true_negatives(
                    label_ids,
                    predicted_labels)   
                false_pos = tf.metrics.false_positives(
                    label_ids,
                    predicted_labels)  
                false_neg = tf.metrics.false_negatives(
                    label_ids,
                    predicted_labels)
                return {
                    "eval_accuracy": accuracy,
                    "f1_score": f1_score,
                    "auc": auc,
                    "precision": precision,
                    "recall": recall,
                    "true_positives": true_pos,
                    "true_negatives": true_neg,
                    "false_positives": false_pos,
                    "false_negatives": false_neg
                }

            eval_metrics = metric_fn(label_ids, predicted_labels)

            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode,
                  loss=loss,
                  train_op=train_op)
            else:
                return tf.estimator.EstimatorSpec(mode=mode,
                loss=loss,
                eval_metric_ops=eval_metrics)
        else:
            (predicted_labels, log_probs) = create_model(
            is_predicting, embeddings, label_ids, num_labels)

            predictions = {
              'probabilities': log_probs,
              'labels': predicted_labels
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    # Return the actual model function in the closure
    return model_fn


In [39]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

# Compute # train and warmup steps from batch size
num_train_steps = int(786 / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [40]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir='/home/eugenet/final_project/trained_models/imdb_google_example2',
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})

INFO:tensorflow:Using config: {'_model_dir': '/home/eugenet/final_project/trained_models/imdb_google_example2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f48f9a24ba8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0409 02:14:59.207397 139952757946176 estimator.py:201] Using config: {'_model_dir': '/home/eugenet/final_project/trained_models/imdb_google_example2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f48f9a24ba8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [44]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=500)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Calling model_fn.


I0409 02:43:30.629841 139952757946176 estimator.py:1111] Calling model_fn.


{'feature': <tf.Tensor 'random_shuffle_queue_DequeueUpTo:1' shape=(?, 768) dtype=float32>} Tensor("random_shuffle_queue_DequeueUpTo:2", shape=(?,), dtype=int32, device=/device:CPU:0)
INFO:tensorflow:Done calling model_fn.


I0409 02:43:31.536596 139952757946176 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


I0409 02:43:31.539974 139952757946176 basic_session_run_hooks.py:527] Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


I0409 02:43:31.632353 139952757946176 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Restoring parameters from /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt-100


I0409 02:43:31.639273 139952757946176 saver.py:1270] Restoring parameters from /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt-100


Instructions for updating:
Use standard file utilities to get mtimes.


W0409 02:43:31.690744 139952757946176 deprecation.py:323] From /home/eugenet/miniconda3/envs/bert/lib/python3.7/site-packages/tensorflow/python/training/saver.py:1070: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.


INFO:tensorflow:Running local_init_op.


I0409 02:43:31.723484 139952757946176 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0409 02:43:31.768735 139952757946176 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 100 into /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt.


I0409 02:43:32.181933 139952757946176 basic_session_run_hooks.py:594] Saving checkpoints for 100 into /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt.


INFO:tensorflow:loss = 0.64635396, step = 101


I0409 02:43:32.563053 139952757946176 basic_session_run_hooks.py:249] loss = 0.64635396, step = 101


INFO:tensorflow:global_step/sec: 109.788


I0409 02:43:33.473622 139952757946176 basic_session_run_hooks.py:680] global_step/sec: 109.788


INFO:tensorflow:loss = 0.66941243, step = 201 (0.912 sec)


I0409 02:43:33.475437 139952757946176 basic_session_run_hooks.py:247] loss = 0.66941243, step = 201 (0.912 sec)


INFO:tensorflow:global_step/sec: 601.267


I0409 02:43:33.639870 139952757946176 basic_session_run_hooks.py:680] global_step/sec: 601.267


INFO:tensorflow:loss = 0.6769428, step = 301 (0.166 sec)


I0409 02:43:33.641643 139952757946176 basic_session_run_hooks.py:247] loss = 0.6769428, step = 301 (0.166 sec)


INFO:tensorflow:global_step/sec: 632.094


I0409 02:43:33.798088 139952757946176 basic_session_run_hooks.py:680] global_step/sec: 632.094


INFO:tensorflow:loss = 0.6821265, step = 401 (0.159 sec)


I0409 02:43:33.800198 139952757946176 basic_session_run_hooks.py:247] loss = 0.6821265, step = 401 (0.159 sec)


INFO:tensorflow:Saving checkpoints for 500 into /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt.


I0409 02:43:33.954846 139952757946176 basic_session_run_hooks.py:594] Saving checkpoints for 500 into /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt.


INFO:tensorflow:Loss for final step: 0.6505876.


I0409 02:43:34.076574 139952757946176 estimator.py:359] Loss for final step: 0.6505876.


Training took time  0:00:03.508472


In [45]:
estimator.evaluate(input_fn=dev_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


I0409 02:43:37.925430 139952757946176 estimator.py:1111] Calling model_fn.


{'feature': <tf.Tensor 'fifo_queue_DequeueUpTo:1' shape=(?, 768) dtype=float32>} Tensor("fifo_queue_DequeueUpTo:2", shape=(?,), dtype=int32, device=/device:CPU:0)
INFO:tensorflow:Done calling model_fn.


I0409 02:43:38.751626 139952757946176 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-04-09T02:43:38Z


I0409 02:43:38.772929 139952757946176 evaluation.py:257] Starting evaluation at 2019-04-09T02:43:38Z


INFO:tensorflow:Graph was finalized.


I0409 02:43:38.863159 139952757946176 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Restoring parameters from /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt-500


I0409 02:43:38.866685 139952757946176 saver.py:1270] Restoring parameters from /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt-500


INFO:tensorflow:Running local_init_op.


I0409 02:43:38.941139 139952757946176 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0409 02:43:38.992495 139952757946176 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2019-04-09-02:43:40


I0409 02:43:40.129551 139952757946176 evaluation.py:277] Finished evaluation at 2019-04-09-02:43:40


INFO:tensorflow:Saving dict for global step 500: auc = 0.65139997, eval_accuracy = 0.6514, f1_score = 0.6685681, false_negatives = 742.0, false_positives = 1001.0, global_step = 500, loss = 0.6671107, precision = 0.63718736, recall = 0.7032, true_negatives = 1499.0, true_positives = 1758.0


I0409 02:43:40.131109 139952757946176 estimator.py:1979] Saving dict for global step 500: auc = 0.65139997, eval_accuracy = 0.6514, f1_score = 0.6685681, false_negatives = 742.0, false_positives = 1001.0, global_step = 500, loss = 0.6671107, precision = 0.63718736, recall = 0.7032, true_negatives = 1499.0, true_positives = 1758.0


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 500: /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt-500


I0409 02:43:40.132520 139952757946176 estimator.py:2039] Saving 'checkpoint_path' summary for global step 500: /home/eugenet/final_project/trained_models/imdb_google_example2/model.ckpt-500


{'auc': 0.65139997,
 'eval_accuracy': 0.6514,
 'f1_score': 0.6685681,
 'false_negatives': 742.0,
 'false_positives': 1001.0,
 'loss': 0.6671107,
 'precision': 0.63718736,
 'recall': 0.7032,
 'true_negatives': 1499.0,
 'true_positives': 1758.0,
 'global_step': 500}