In [52]:
import json
import os
import random
import pandas as pd
import numpy as np

In [2]:
from bert_serving.client import BertClient
bc = BertClient()

In [3]:
DATA_DIR = "data/"

def get_datasets(type='medium'):
    if type not in ['medium', 'all', 'mini']:
        raise Exception("Invalid type!")
    cols =  ['requester_received_pizza', 'request_text']
    train_df= pd.read_csv(DATA_DIR + f'train_{type}.csv', usecols = cols).reindex(columns=cols)
    train_df[cols[0]] = (train_df[cols[0]] == True ).astype(int)
    dev_df = pd.read_csv(DATA_DIR + f'val_{type}.csv', usecols = cols).reindex(columns=cols)
    dev_df[cols[0]] = (dev_df[cols[0]] == True ).astype(int)
    return train_df, dev_df

In [20]:
train_df, dev_df = get_datasets(type='medium')

In [30]:
def get_encodes(df):
    samples = list(df['request_text'])
    text = [s[:50] + s[-50:] for s in samples]
    features = bc.encode(text)
    return features
  

In [82]:
import tensorflow as tf
from tensorflow.python.estimator.canned.dnn import DNNClassifier
from tensorflow.python.estimator.run_config import RunConfig
from tensorflow.python.estimator.training import TrainSpec, EvalSpec, train_and_evaluate


classifier = DNNClassifier(
    hidden_units=[64, 32],
    feature_columns=[tf.feature_column.numeric_column('feature', shape=(768,))],
    n_classes=2,
    dropout=0.1)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/c1/qyvzj96j1bj6cl7g51yg09pw0000gn/T/tmpuio2tuf7', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x13b4733c8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [83]:
def input(df):
    features = get_encodes(df)
    return features, np.array(df['requester_received_pizza'].astype(np.int32))

In [84]:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
 x={"feature": input(train_df)[0]},
 y=input(train_df)[1],
 num_epochs=None,
 batch_size=50,
 shuffle=True
)

In [85]:
classifier.train(input_fn=train_input_fn, steps=10)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/c1/qyvzj96j1bj6cl7g51yg09pw0000gn/T/tmpuio2tuf7/model.ckpt.
INFO:tensorflow:loss = 53.354557, step = 1
INFO:tensorflow:Saving checkpoints for 10 into /var/folders/c1/qyvzj96j1bj6cl7g51yg09pw0000gn/T/tmpuio2tuf7/model.ckpt.
INFO:tensorflow:Loss for final step: 23.010542.


<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x10f755d68>

In [86]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
 x={"feature": input(dev_df)[0]},
 y=input(dev_df)[1],
 num_epochs=1,
 shuffle=False
)

In [87]:
accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-11-15T00:35:30Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/c1/qyvzj96j1bj6cl7g51yg09pw0000gn/T/tmpuio2tuf7/model.ckpt-10
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-11-15-00:35:30
INFO:tensorflow:Saving dict for global step 10: accuracy = 0.75454545, accuracy_baseline = 0.75454545, auc = 0.55644804, auc_precision_recall = 0.2558612, average_loss = 0.6036542, global_step = 10, label/mean = 0.24545455, loss = 66.40196, precision = 0.0, prediction/mean = 0.13286994, recall = 0.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 10: /var/folders/c1/qyvzj96j1bj6cl7g51yg09pw0000gn/T/tmpuio2tuf7/model.ckpt-10


In [89]:
print("\nTest Accuracy: {0:f}%\n".format(accuracy_score*100))



Test Accuracy: 75.454545%

