A series of experiments run on Bert data *after* all the sentences have already been embedded.

In [168]:
import json
import os
import random

import tensorflow as tf
import numpy as np
import pandas as pd
import pickle
from tensorflow.estimator import BaselineClassifier
from tensorflow.python.estimator.canned.dnn import DNNClassifier
from tensorflow.python.estimator.run_config import RunConfig
from tensorflow.python.estimator.training import TrainSpec, EvalSpec, train_and_evaluate

tf.logging.set_verbosity(tf.logging.ERROR)

# Helper Functions

Get Input. Assumes an 80% training, 20% dev set.

In [118]:
def get_input_fn(data_dir, num_examples=None, num_epochs=10):
    data_files = os.listdir(data_dir)
    
    # open pre-embedded data
    feature_list = []
    label_list = []
    for data_file in data_files:
        with open(os.path.join(data_dir, data_file), 'rb') as f:
            features, labels = pickle.load(f)
            feature_list.append(features)
            label_list.append(labels)
    features = np.concatenate(feature_list)
    labels = [label for labels in label_list for label in labels]
    
    # split into train and dev set
    train_features = features[0:int(0.8*len(features))]
    train_labels = labels[0:int(0.8*len(features))]
    dev_features = features[int(0.8*len(features)):len(features)]
    dev_labels = labels[int(0.8*len(features)):len(features)]
    
    train_labels = np.array(train_labels).astype('int32')
    dev_labels = np.array(dev_labels).astype('int32')
    
    if num_examples is not None:
        train_features = train_features[0:num_examples]
        train_labels = train_labels[0:num_examples]
    
#     print('{} train data points'.format(len(train_features)))
#     print('{} dev data points'.format(len(dev_features)))
    
    train_fn = tf.estimator.inputs.numpy_input_fn(
        x={'feature': train_features},
        y=train_labels,
        num_epochs=num_epochs,
        batch_size=batch_size,
        shuffle=True
    )
    dev_fn = tf.estimator.inputs.numpy_input_fn(
        x={'feature': dev_features},
        y=dev_labels,
        num_epochs=1,
        batch_size=batch_size,
        shuffle=False
    )
    return (train_fn, dev_fn)

Train and evaluate a DNN

In [183]:
def train_and_evaluate_DNN(bert_model, hidden_units, learning_rate, dropout_rate, 
                           train_num_epochs, input_size, activation_function, 
                           optimizer, output_type):
    classifier = 'DNN'
    config = tf.ConfigProto()
    run_config = RunConfig(model_dir='/home/eugenet/final_project/trained_models/imdb_{}_{}_input{}_epochs{}_hu{}_lr{}_dropout{}_activation{}_opt{}'.format(
        bert_model, classifier, input_size, train_num_epochs, '_'.join([str(x) for x in hidden_units]), learning_rate, dropout_rate, activation_function, optimizer),
                       session_config=config,
                       save_checkpoints_steps=1000)
    embedding_size = None
    if 'large' in bert_model:
        embedding_size = 1024
    elif 'small' in bert_model:
        embedding_size = 768

    activation_fn = None
    if activation_function == 'relu':
        activation_fn = tf.nn.relu
    elif activation_function == 'softmax':
        activation_fn = tf.nn.softmax
    elif activation_function == 'tanh':
        activation_fn = tf.nn.tanh
    
    opt = None
    if optimizer == 'AdaGrad':
        opt = tf.train.AdagradOptimizer(learning_rate=learning_rate)
    elif optimizer == 'AdamW':
        opt = tf.contrib.opt.AdamWOptimizer(learning_rate=learning_rate, weight_decay=0.01)
    estimator = DNNClassifier(
        hidden_units=hidden_units,
        feature_columns=[tf.feature_column.numeric_column('feature', shape=(embedding_size,))],
        n_classes=2,
        config=run_config,
        optimizer=opt,
        dropout=dropout_rate,
        activation_fn=activation_fn)
    train_input_fn, dev_input_fn = get_input_fn('/home/eugenet/final_project/cached_data/{}/'.format(bert_model), input_size, train_num_epochs)
    estimator.train(input_fn=train_input_fn)
    if output_type == 'train_accuracy':
        return estimator.evaluate(train_input_fn)
    if output_type == 'accuracy':
        return estimator.evaluate(dev_input_fn)
    elif output_type == 'predictions':
        return estimator.predict(dev_input_fn)


# Defaults
Defaults to use for all parameters (generally a "good" set of parameters)

In [172]:
hidden_units = [1024]
learning_rate = 0.003
bert_model = 'train_uncased_large_max200'
dropout_rate = 0.1
train_num_epochs = 30
input_size = None # all
activation_function = 'relu'
optimizer = 'AdaGrad'

# Experiments

## Effect of Number of Hidden Units
Assuming we only have one hidden layer for now ("fine-tuning"). We pick out 1024 especially because that's the number of dimensions in the embedding.

In [181]:
# hidden_units_list = [[1], [2], [5], [10], [50], [100], [500], [1024], [2000], [10000]]
hidden_units_list = [[]]

res = []
for hu in hidden_units_list:
    print('Progress {}'.format(hu))    
    result = train_and_evaluate_DNN(bert_model, hu, learning_rate, dropout_rate, train_num_epochs, input_size, activation_function, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}_Hidden_Units'.format(hu)])
    res.append(df_result)
pd.concat(res, axis=1)

Unnamed: 0,[]_Hidden_Units
accuracy,0.865
accuracy_baseline,0.5
auc,0.938264
auc_precision_recall,0.937723
average_loss,0.337109
label/mean,0.5
loss,42.138588
precision,0.864271
prediction/mean,0.502513
recall,0.866


## Effect of Number of Epochs

In [170]:
train_num_epochs_list = [1, 2, 5, 10, 20, 30, 40, 50, 100, 150, 200]

res = []
for tne in train_num_epochs_list:
    print('Progress {}'.format(tne))
    result = train_and_evaluate_DNN(bert_model, hidden_units, learning_rate, dropout_rate, tne, input_size, activation_function, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}_Epochs'.format(tne)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress 1
Progress 2
Progress 5
Progress 10
Progress 20
Progress 30
Progress 40
Progress 50
Progress 100
Progress 150
Progress 200


Unnamed: 0,[10000]_Epochs,[10000]_Epochs.1,[10000]_Epochs.2,[10000]_Epochs.3,[10000]_Epochs.4,[10000]_Epochs.5,[10000]_Epochs.6,[10000]_Epochs.7,[10000]_Epochs.8,[10000]_Epochs.9,[10000]_Epochs.10
accuracy,0.875,0.8734,0.882,0.8886,0.8858,0.889,0.8834,0.8968,0.8938,0.8888,0.8944
accuracy_baseline,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
auc,0.940479,0.945295,0.949209,0.952301,0.954016,0.957141,0.956707,0.957187,0.957121,0.955594,0.954894
auc_precision_recall,0.940168,0.945182,0.949012,0.95212,0.953607,0.95619,0.955996,0.956493,0.956405,0.955305,0.955739
average_loss,0.318056,0.302333,0.287993,0.27903,0.280741,0.270329,0.280951,0.265619,0.280871,0.300794,0.326381
label/mean,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
loss,39.756992,37.791618,35.999123,34.878803,35.092659,33.791084,35.118931,33.202381,35.108852,37.599197,40.79768
precision,0.866641,0.885266,0.879269,0.886282,0.861371,0.872463,0.850713,0.899356,0.883522,0.878505,0.897581
prediction/mean,0.511703,0.478039,0.494844,0.495509,0.533123,0.519556,0.546171,0.494667,0.513091,0.513341,0.49293
recall,0.8864,0.858,0.8856,0.8916,0.9196,0.9112,0.93,0.8936,0.9072,0.9024,0.8904


## Effect of Activaton Function

In [173]:
activation_functions = ['relu', 'softmax', 'tanh']

res = []
for af in activation_functions:
    print('Progress {}'.format(af))
    result = train_and_evaluate_DNN(bert_model, hidden_units, learning_rate, dropout_rate, train_num_epochs, input_size, af, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}'.format(af)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress relu
Progress softmax
Progress tanh


Unnamed: 0,relu,softmax,tanh
accuracy,0.8926,0.8726,0.8874
accuracy_baseline,0.5,0.5,0.5
auc,0.956003,0.939229,0.952604
auc_precision_recall,0.955487,0.93813,0.952418
average_loss,0.268104,0.533164,0.280663
label/mean,0.5,0.5,0.5
loss,33.513008,66.645523,35.082836
precision,0.892757,0.867166,0.896764
prediction/mean,0.494607,0.501442,0.479811
recall,0.8924,0.88,0.8756


## Longer Sentences

In [177]:
bert_models = ['train_uncased_large', 'train_uncased_large_max200', 'train_uncased_large_max300']

res = []
for bm in bert_models:
    print('Progress {}'.format(bm))
    result = train_and_evaluate_DNN(bm, hidden_units, learning_rate, dropout_rate, train_num_epochs, input_size, activation_function, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}'.format(bm)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress train_uncased_large
Progress train_uncased_large_max200
Progress train_uncased_large_max300


Unnamed: 0,train_uncased_large,train_uncased_large_max200,train_uncased_large_max300
accuracy,0.8456,0.8924,0.895996
accuracy_baseline,0.5,0.5,0.5
auc,0.929463,0.95712,0.963195
auc_precision_recall,0.927262,0.956087,0.963974
average_loss,0.344465,0.268681,0.245833
label/mean,0.5,0.5,0.5
loss,43.058144,33.58511,31.466631
precision,0.867035,0.902709,0.887667
prediction/mean,0.469592,0.484267,0.509269
recall,0.8164,0.8796,0.906738


## Bert Model Type

In [175]:
bert_models = ['train_uncased_small', 'train_uncased_large', 'train_cased_small', 'train_cased_large']

res = []
for bm in bert_models:
    print('Progress {}'.format(bm))
    result = train_and_evaluate_DNN(bm, hidden_units, learning_rate, dropout_rate, train_num_epochs, input_size, activation_function, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}'.format(bm)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress train_uncased_small
Progress train_uncased_large
Progress train_cased_small
Progress train_cased_large


Unnamed: 0,train_uncased_small,train_uncased_large,train_cased_small,train_cased_large
accuracy,0.8198,0.8512,0.8094,0.8342
accuracy_baseline,0.5,0.5,0.5,0.5
auc,0.906073,0.928734,0.892808,0.917374
auc_precision_recall,0.905724,0.926632,0.891176,0.917563
average_loss,0.387226,0.340968,0.41195,0.364618
label/mean,0.5,0.5,0.5,0.5
loss,48.403313,42.621048,51.493755,45.577282
precision,0.816133,0.838473,0.817659,0.826495
prediction/mean,0.506833,0.514889,0.489571,0.508394
recall,0.8256,0.87,0.7964,0.846


## Learning Rate

In [176]:
learning_rates = [0.3, 0.03, 0.003, 0.0003, 0.00003]

res = []
for lr in learning_rates:
    print('Progress {}'.format(lr))
    result = train_and_evaluate_DNN(bert_model, hidden_units, lr, dropout_rate, train_num_epochs, input_size, activation_function, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}_Learning_Rate'.format(lr)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress 0.3
Progress 0.03
Progress 0.003
Progress 0.0003
Progress 3e-05


Unnamed: 0,0.3_Learning_Rate,0.03_Learning_Rate,0.003_Learning_Rate,0.0003_Learning_Rate,3e-05_Learning_Rate
accuracy,0.8868,0.8882,0.892,0.876,0.8384
accuracy_baseline,0.5,0.5,0.5,0.5,0.5
auc,0.952882,0.954347,0.95737,0.945922,0.917904
auc_precision_recall,0.951977,0.954121,0.95643,0.945547,0.91672
average_loss,0.279179,0.273848,0.265801,0.300285,0.479364
label/mean,0.5,0.5,0.5,0.5,0.5
loss,34.897385,34.230988,33.225075,37.535595,59.920483
precision,0.883122,0.886192,0.884013,0.864341,0.842788
prediction/mean,0.5052,0.493668,0.506328,0.508961,0.498696
recall,0.8916,0.8908,0.9024,0.892,0.832


## Dropout Rates

In [188]:
dropout_rates = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
res = []
for dr in dropout_rates:
    print('Progress {}'.format(dr))
    result = train_and_evaluate_DNN(bert_model, hidden_units, learning_rate, dr, num_epochs, input_size, activation_function, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}_Dropout_Rate'.format(dr)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress 0.1
Progress 0.2
Progress 0.3
Progress 0.4
Progress 0.5
Progress 0.6
Progress 0.7


Unnamed: 0,0.1_Dropout_Rate,0.2_Dropout_Rate,0.3_Dropout_Rate,0.4_Dropout_Rate,0.5_Dropout_Rate,0.6_Dropout_Rate,0.7_Dropout_Rate
accuracy,0.8916,0.8898,0.891,0.8904,0.8884,0.889,0.8876
accuracy_baseline,0.5,0.5,0.5,0.5,0.5,0.5,0.5
auc,0.955017,0.955052,0.95469,0.954391,0.954028,0.954151,0.953673
auc_precision_recall,0.95452,0.954341,0.954274,0.953746,0.953463,0.953789,0.953284
average_loss,0.272572,0.275813,0.272283,0.273443,0.276286,0.273683,0.277831
label/mean,0.5,0.5,0.5,0.5,0.5,0.5,0.5
loss,34.071541,34.476566,34.035358,34.180382,34.53574,34.210354,34.728821
precision,0.899266,0.905535,0.881985,0.893548,0.868082,0.888224,0.867045
prediction/mean,0.48421,0.471242,0.506237,0.489461,0.520649,0.495092,0.522003
recall,0.882,0.8704,0.9028,0.8864,0.916,0.89,0.9156


## Front and Back

In [190]:
bert_models = ['train_uncased_small_max100_frontback']

res = []
for bm in bert_models:
    print('Progress {}'.format(bm))
    result = train_and_evaluate_DNN(bm, hidden_units, learning_rate, dropout_rate, train_num_epochs, input_size, activation_function, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}'.format(bm)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress train_uncased_small_max100_frontback


Unnamed: 0,train_uncased_small_max100_frontback
accuracy,0.8578
accuracy_baseline,0.5
auc,0.930069
auc_precision_recall,0.93103
average_loss,0.337812
label/mean,0.5
loss,42.226532
precision,0.855383
prediction/mean,0.502964
recall,0.8612


## Optimizers

In [180]:
optimizers = ['AdaGrad', 'AdamW']

res = []
for opt in optimizers:
    print('Progress {}'.format(opt))
    result = train_and_evaluate_DNN(bert_model, hidden_units, learning_rate, dropout_rate, train_num_epochs, input_size, activation_function, opt, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}'.format(opt)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress AdaGrad
Progress AdamW


Unnamed: 0,AdaGrad,AdamW
accuracy,0.8934,0.8732
accuracy_baseline,0.5,0.5
auc,0.956967,0.946567
auc_precision_recall,0.955776,0.946463
average_loss,0.270448,0.313827
label/mean,0.5,0.5
loss,33.805943,39.228344
precision,0.892615,0.835853
prediction/mean,0.498137,0.553442
recall,0.8944,0.9288


## Number of Training Samples

In [189]:
input_sizes = [1, 10, 100, 500, 1000, 5000, 10000, 15000, 20000]
res = []
for size in input_sizes:
    print('Progress {}'.format(size))
    result = train_and_evaluate_DNN(bert_model, hidden_units, learning_rate, dropout_rate, train_num_epochs, size, activation_function, optimizer, 'accuracy')
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}_examples'.format(size)])
    res.append(df_result)
pd.concat(res, axis=1)
    

Progress 1
Progress 10
Progress 100
Progress 500
Progress 1000
Progress 5000
Progress 10000
Progress 15000
Progress 20000


Unnamed: 0,1_examples,10_examples,100_examples,500_examples,1000_examples,5000_examples,10000_examples,15000_examples,20000_examples
accuracy,0.5,0.5106,0.803,0.853,0.8654,0.8774,0.8814,0.8852,0.8908
accuracy_baseline,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
auc,0.46164,0.762616,0.885826,0.930019,0.939475,0.946812,0.950486,0.952998,0.954151
auc_precision_recall,0.505561,0.73926,0.881257,0.929774,0.938564,0.946737,0.950522,0.953064,0.953761
average_loss,2.370815,0.788171,0.463019,0.341427,0.317231,0.294282,0.286849,0.282498,0.274466
label/mean,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
loss,296.351898,98.521355,57.877411,42.678318,39.653831,36.785309,35.856133,35.312244,34.308189
precision,0.5,0.884058,0.79281,0.850616,0.883019,0.874553,0.862962,0.861758,0.89427
prediction/mean,0.990626,0.235611,0.509609,0.504742,0.476736,0.50158,0.521222,0.530584,0.487267
recall,1.0,0.0244,0.8204,0.8564,0.8424,0.8812,0.9068,0.9176,0.8864


## Training set evaluation

In [185]:
output_types = ['train_accuracy', 'accuracy']

res = []
for ot in output_types:
    print('Progress {}'.format(ot))
    result = train_and_evaluate_DNN(bert_model, hidden_units, learning_rate, dropout_rate, train_num_epochs, input_size, activation_function, optimizer, ot)
    df_result = pd.DataFrame.from_dict(result, orient='index', columns=['{}'.format(ot)])
    res.append(df_result)
pd.concat(res, axis=1)

Progress train_accuracy
Progress accuracy


Unnamed: 0,0.1_Dropout_Rate,0.2_Dropout_Rate,0.3_Dropout_Rate,0.4_Dropout_Rate,0.5_Dropout_Rate,0.6_Dropout_Rate,0.7_Dropout_Rate,train_accuracy,accuracy
accuracy,0.8932,0.892,0.886,0.8894,0.8882,0.8874,0.8882,0.97265,0.8904
accuracy_baseline,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
auc,0.956792,0.95385,0.953909,0.953723,0.953071,0.95281,0.952604,0.995915,0.956435
auc_precision_recall,0.955753,0.953374,0.953411,0.953326,0.952562,0.952251,0.952275,0.99615,0.95582
average_loss,0.274101,0.274592,0.278475,0.274842,0.277718,0.278925,0.277938,0.103875,0.281596
label/mean,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
loss,34.262611,34.323967,34.809357,34.355209,34.714706,34.865616,34.742275,13.294526,35.199451
precision,0.884886,0.884013,0.863602,0.886156,0.873126,0.870647,0.880738,0.977183,0.876543
prediction/mean,0.508933,0.505995,0.526587,0.498705,0.513469,0.516423,0.50012,0.491556,0.516529
recall,0.904,0.9024,0.9168,0.8936,0.9084,0.91,0.898,0.9679,0.9088
