In [1]:
import numpy as np
import tensorflow as tf
import keras as keras
import os

from keras_bert import load_trained_model_from_checkpoint, load_vocabulary
from keras_bert import Tokenizer
from keras_bert import AdamWarmup, calc_train_steps

import flask
from flask import request, jsonify

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
SEQ_LEN = 256
BATCH_SIZE = 128
EPOCHS = 1
LR = 2e-5

In [3]:
pretrained_path = './uncased_L-12_H-768_A-12'
config_path = os.path.join(pretrained_path, 'bert_config.json')
checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')
vocab_path = os.path.join(pretrained_path, 'vocab.txt')

DATA_COLUMN = 'comment_text'
LABEL_COLUMN = 'target'

token_dict = load_vocabulary(vocab_path)
tokenizer = Tokenizer(token_dict)

model = load_trained_model_from_checkpoint(
    config_path,
    checkpoint_path,
    training=True,
    trainable=True,
    seq_len=SEQ_LEN,
)

inputs = model.inputs[:2]
dense = model.layers[-3].output
outputs = keras.layers.Dense(1, activation='sigmoid', kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.02),
                             name = 'real_output')(dense)

decay_steps, warmup_steps = calc_train_steps(
    1804874,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
)

model = keras.models.Model(inputs, outputs)
model.compile(
    AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.load_weights('./model_tpu_256_1.h5')





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [4]:
def convert_test(test_df):
    global tokenizer
    indices = []
    ids, segments = tokenizer.encode(test_df[0], max_len=SEQ_LEN)
    indices.append(ids)
    #items = list(zip(indices))
    #indices = zip(*items)
    indices = np.array(indices)
    return [indices, np.zeros_like(indices)]

def load_test(comment):
    data = []
    data.append(comment)
    data_x = convert_test(data)
    return data_x

In [6]:
test_x1 = load_test('How are you')
prediction1 = model.predict(test_x1)
print(prediction1[0][0])

0.004777763


In [7]:
test_x1 = load_test('you should not be doing anything')
prediction1 = model.predict(test_x1)
print(prediction1[0][0])

0.020211812
