In [None]:
# https://github.com/google-research/bert
# https://github.com/CyberZHG/keras-bert

# folder name for BERT storing
folder = 'multi_cased_L-12_H-768_A-12'
# link to download pre-trained neural network BERT
download_url = 'https://storage.googleapis.com/bert_models/2018_11_23/multi_cased_L-12_H-768_A-12.zip' 

print('Downloading model...')
zip_path = '{}.zip'.format(folder)
!test -d $folder || (wget $download_url && unzip $zip_path)

# dowload tokenization.py from BERT repository
!wget https://raw.githubusercontent.com/google-research/bert/master/tokenization.py

# install Keras BERT
!pip install keras-bert

import sys
import numpy as np
from keras_bert import load_trained_model_from_checkpoint
import tokenization

config_path = folder+'/bert_config.json'
checkpoint_path = folder+'/bert_model.ckpt'
vocab_path = folder+'/vocab.txt'

# create an object for transforming sentence with spaces into tokens 
tokenizer = tokenization.FullTokenizer(vocab_file=vocab_path, do_lower_case=False)

# Loading the model
print('Loading model...')
model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True)
#model.summary()          # neural network parameters
print('OK')


In [13]:
# BERT Regime #2: What is probaility that 2nd sentence can be logical continue of the 1st sentence.
sentence_1 = 'In need of some fencing to hold a pig in and food got all burnt out through fire anyone able to help please? '      #@param {type:"string"}
sentence_2 = 'I have fence material.'          #@param {type:"string"}


print(sentence_1, '->', sentence_2)

# Make tokenize of the sentences
tokens_sen_1 = tokenizer.tokenize(sentence_1)
tokens_sen_2 = tokenizer.tokenize(sentence_2)

tokens = ['[CLS]'] + tokens_sen_1 + ['[SEP]'] + tokens_sen_2 + ['[SEP]']
#print(tokens)

# tranform string tokens into integer indexes:
token_input = tokenizer.convert_tokens_to_ids(tokens)  
# make the length 512      
token_input = token_input + [0] * (512 - len(token_input))

# the mask consist of all zeros
mask_input = [0] * 512

# in the mask of the 2nd sentence, including final SEP, we need to put 1, and all others is 0
seg_input = [0]*512
len_1 = len(tokens_sen_1) + 2                   # 1st phrase length , +2 - including original CLS и delilmenter SEP
for i in range(len(tokens_sen_2)+1):            # +1, including last SEP
        seg_input[len_1 + i] = 1                # make mask for 2nd phrase, including last SEP, by 1
#print(seg_input)


# convert into pythin array  (1,) -> (1,512)
token_input = np.asarray([token_input])
mask_input = np.asarray([mask_input])
seg_input = np.asarray([seg_input])


# process sentences through the neural network...
predicts = model.predict([token_input, seg_input, mask_input])[1]       #  in [1] answer on the quesion, is the 2nd sentece logicaly continue the 1st sentance
#print('Sentence is okey: ', not bool(np.argmax(predicts, axis=-1)[0]), predicts)
#print('Sentence is okey:', int(round(predicts[0][0]*100)), '%')                    # [[0.9657724  0.03422766]] - left number is probability that  2nd senetence logicaly continue 1st sentence, right number - probability that it's random
print('Sentences is matched on:', round(predicts[0][0]*100,2), '%')
out = int(round(predicts[0][0]*100)) 


In need of some fencing to hold a pig in and food got all burnt out through fire anyone able to help please?  -> I have fence material.
Sentences is matched on: 99.29 %
