In [1]:
import os
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import bert
import numpy as np 
import tokenization
from tensorflow.keras.models import Model
from keras_bert import Tokenizer, load_vocabulary
from sklearn.model_selection import train_test_split


Using TensorFlow backend.


In [2]:

bert_layer=hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1",trainable=True)


In [3]:
pretrained_path = 'uncased_L-12_H-768_A-12'
config_path = os.path.join(pretrained_path, 'bert_config.json')
checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')
vocab_path = os.path.join(pretrained_path, 'vocab.txt')
SEQ_LEN = 128
BATCH_SIZE = 128
EPOCHS = 5
LR = 1e-4
token_dict = load_vocabulary(vocab_path)
tokenizer = Tokenizer(token_dict)



MAX_SEQ_LEN=128
input_word_ids = tf.keras.layers.Input(shape=(MAX_SEQ_LEN,), dtype=tf.int32,
                                       name="input_word_ids")
input_mask = tf.keras.layers.Input(shape=(MAX_SEQ_LEN,), dtype=tf.int32,
                                   name="input_mask")
segment_ids = tf.keras.layers.Input(shape=(MAX_SEQ_LEN,), dtype=tf.int32,
                                    name="segment_ids")

pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])
x = tf.keras.layers.GlobalAveragePooling1D()(sequence_output)
x = tf.keras.layers.Dropout(0.2)(x)
out = tf.keras.layers.Dense(4, activation="sigmoid", name="dense_output")(x)

model = tf.keras.models.Model(
      inputs=[input_word_ids, input_mask, segment_ids], outputs=out)


model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

In [4]:
from tqdm import tqdm
import bert_tokenizer as tokenizerBT
FullTokenizer=bert.bert_tokenization.FullTokenizer(vocab_path, True)
tokenizer = FullTokenizer 

In [5]:
def create_single_input(sentence,MAX_LEN):
  
  stokens = tokenizer.tokenize(sentence)
  
  stokens = stokens[:MAX_LEN]
  
  stokens = ["[CLS]"] + stokens + ["[SEP]"]
 
  ids = get_ids(stokens, tokenizer, MAX_SEQ_LEN)
  masks = get_masks(stokens, MAX_SEQ_LEN)
  segments = get_segments(stokens, MAX_SEQ_LEN)
 
  return ids,masks,segments

def create_input_array(sentences):

  input_ids, input_masks, input_segments = [], [], []

  for sentence in tqdm(sentences,position=0, leave=True):
  
    ids,masks,segments=create_single_input(sentence,MAX_SEQ_LEN-2)

    input_ids.append(ids)
    input_masks.append(masks)
    input_segments.append(segments)

  return [np.asarray(input_ids, dtype=np.int32), 
            np.asarray(input_masks, dtype=np.int32), 
            np.asarray(input_segments, dtype=np.int32)]

def get_masks(tokens, max_seq_length):
    return [1]*len(tokens) + [0] * (max_seq_length - len(tokens))

def get_segments(tokens, max_seq_length):
    """Segments: 0 for the first sequence, 1 for the second"""
    segments = []
    current_segment_id = 0
    for token in tokens:
        segments.append(current_segment_id)
        if token == "[SEP]":
            current_segment_id = 1
    return segments + [0] * (max_seq_length - len(tokens))
def get_ids(tokens, tokenizer, max_seq_length):
    """Token ids from Tokenizer vocab"""
    token_ids = tokenizer.convert_tokens_to_ids(tokens,)
    input_ids = token_ids + [0] * (max_seq_length-len(token_ids))
    return input_ids

In [6]:
datasetV1 = pd.read_csv('datasetV1.csv')
datasetV1.head()

Unnamed: 0,customerMesssage,orderSummary,Salads,Drinks,Pizzas,Desserts
0,1 Large Hand Tossed Round Pizza with Pepperon...,Your Order for Pickup at: \nJet's MI 012 - 193...,1,0,1,0
1,I would like to order 1 large Hawaiian with tu...,Your Order for Delivery To: \n7560 Weddel St T...,1,0,1,0
2,Hi Jets! I want... \n1 - 8 corner pizza With h...,Your Order for Pickup at: \nJet's MI 097 - 444...,0,1,0,0
3,Hi Jets I want... DEEP DISH DUO Detroit-Style ...,Your Order for Pickup at: \nJet's OH 008 - 888...,0,0,1,0
4,Hi Jets! I want...delivery. 1large thin crust ...,Your Order for Delivery To: \n1304 N Rumbaugh ...,0,0,1,0


In [7]:
train_examples,test_examples = train_test_split(datasetV1, test_size=0.2)

train_sentences = train_examples["customerMesssage"].fillna("CVxTz").values
list_classes = [ "Salads", "Drinks", "Pizzas", "Desserts"]


inputs=create_input_array(train_sentences)
train_y = train_examples[list_classes].values

100%|████████████████████████████████████████████████████████████████████████████| 1593/1593 [00:00<00:00, 2641.53it/s]


In [8]:
#model.fit(inputs,train_y,epochs=1,batch_size=32,validation_split=0.2,shuffle=True)
model.fit(inputs,train_y,epochs=5,batch_size=50,validation_split=0.2,shuffle=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x271fe8ef040>