<a href="https://colab.research.google.com/github/esalimi/Kaggle_NLI/blob/master/Watson.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Step 1: Building the BERT Model**



In [None]:
!pip install keras-bert

Collecting keras-bert
  Downloading https://files.pythonhosted.org/packages/e2/7f/95fabd29f4502924fa3f09ff6538c5a7d290dfef2c2fe076d3d1a16e08f0/keras-bert-0.86.0.tar.gz
Collecting keras-transformer>=0.38.0
  Downloading https://files.pythonhosted.org/packages/89/6c/d6f0c164f4cc16fbc0d0fea85f5526e87a7d2df7b077809e422a7e626150/keras-transformer-0.38.0.tar.gz
Collecting keras-pos-embd>=0.11.0
  Downloading https://files.pythonhosted.org/packages/09/70/b63ed8fc660da2bb6ae29b9895401c628da5740c048c190b5d7107cadd02/keras-pos-embd-0.11.0.tar.gz
Collecting keras-multi-head>=0.27.0
  Downloading https://files.pythonhosted.org/packages/e6/32/45adf2549450aca7867deccfa04af80a0ab1ca139af44b16bc669e0e09cd/keras-multi-head-0.27.0.tar.gz
Collecting keras-layer-normalization>=0.14.0
  Downloading https://files.pythonhosted.org/packages/a4/0e/d1078df0494bac9ce1a67954e5380b6e7569668f0f3b50a9531c62c1fc4a/keras-layer-normalization-0.14.0.tar.gz
Collecting keras-position-wise-feed-forward>=0.6.0
  Downloading

In [None]:
# Importing required packages keras, keras_bert, os
import os
import json
import keras
from keras import layers

# Importing required packages from keras_bert
from keras_bert.loader import load_trained_model_from_checkpoint
from keras_bert.bert import *
from keras_bert import extract_embeddings 

In [None]:
# Loading bert_base from google drive
from google.colab import drive
drive.mount('/content/drive')
model_path = '/content/drive/My Drive/uncased_L-12_H-768_A-12'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Testing the import
texts = ['all work and play', 'makes jack a dull boy~']
embeddings = extract_embeddings(model_path, texts)
embedding_sentence1 = embeddings[0]
embedding_sentence2 = embeddings[1]

print(len(embedding_sentence1[0]))
print(embeddings[0])

768
[[-0.3183194   0.17749757  0.14557648 ... -0.47990972 -0.685876
  -0.14804517]
 [-0.2539365   0.20985767  0.24948463 ... -0.24461871 -0.4896369
  -0.24464582]
 [ 0.08667843  0.17276856  0.31595454 ... -0.75597656 -0.7137721
  -0.20142424]
 [ 0.1849249   0.46440715 -0.4347621  ...  0.4690967   0.11448962
  -0.4629252 ]
 [ 0.25327638 -0.06883571 -0.16352269 ... -1.0589056  -0.70101875
   0.05821889]
 [-0.06629732  1.3095123   0.25281972 ... -1.1509613  -0.30832586
  -0.14396843]]


In [None]:
# Function to define the structure of the bert

def get_bert_model(model_path, trainable = True, max_tokens=512, mode = 'pair'):
  config_file = os.path.join(model_path, 'bert_config.json')
  checkpoint_file = os.path.join(model_path, 'bert_model.ckpt')
  bert = load_trained_model_from_checkpoint(config_file, checkpoint_file, training=False, trainable=True)
  """
  The pair mode gets 2 sntcs as input: premise & hypothesis.
  Using Lambda layers: Lambda layer is an easy way to customize a layer to do simple arithmetic
  """
  if mode == 'pair':
    input_tensor = layers.Input(shape=(2,max_tokens, ))
    word_tokens = layers.Lambda(lambda x: x[:,0, :])(input_tensor)
    segment_encode = layers.Lambda(lambda x: x[:,1,:])(input_tensor)
  if mode == 'single':
    input_tensor = layers.Input(shape=(max_tokens, ))
    word_tokens = input_tensor
    segment_encode = layers.Lambda(lambda x: x*0)(input_tensor) #seems
  
  output0 = bert([word_tokens, segment_encode])
  output1 = layers.Lambda(lambda x: x[:,0,:])(output0) #0-> cls : [cls]+...[SEP],...,[sep]
  output = layers.Dense(768)(output1)
  final_output = layers.Dense(3, activation='softmax')(output)

  model = keras.models.Model(inputs =input_tensor , outputs =final_output )
  print(model.summary)
  return model

In [None]:
# Test for model installation: calling the function to print model summary
model = get_bert_model(model_path)
print(model.summary())

<bound method Model.summary of <tensorflow.python.keras.engine.functional.Functional object at 0x7f5bc80b0748>>
Model: "functional_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 2, 512)]     0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 512)          0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 512)          0           input_1[0][0]                    
__________________________________________________________________________________________________
functional_7 (Functional)       (None, 512, 768)     108891648   lambda[0]

# **Step 2: Importing Data from Google Drive**

In [1]:
# Importing required packages
import pandas as pd
from sklearn import preprocessing

In [5]:
# reading the training data from google drive: English senteces so far
data =  pd.read_csv('/content/drive/My Drive/train.csv')
data.head()

# Picking only english sentences
data_en = data[data['language']=='English']
data_en.head()


Unnamed: 0,id,premise,hypothesis,lang_abv,language,label
0,5130fd2cb5,and these comments were considered in formulat...,The rules developed in the interim were put to...,en,English,0
1,5b72532a0b,These are issues that we wrestle with in pract...,Practice groups are not permitted to work on t...,en,English,2
3,5622f0c60b,you know they can't really defend themselves l...,They can't defend themselves because of their ...,en,English,0
7,fdcd1bd867,From Cockpit Country to St. Ann's Bay,From St. Ann's Bay to Cockpit Country.,en,English,2
8,7cfb3d272c,"Look, it's your skin, but you're going to be i...",The boss will fire you if he sees you slacking...,en,English,1


In [3]:
# Training data set: Input and labels
X_train = data_en[['premise','hypothesis']]
labels = data_en['label']

#Need to convert labels (0,1,2) into binary labels
LB = preprocessing.LabelBinarizer()
y_train = LB.fit_transform(labels)


In [4]:
# Printing data to verify the import process
X_train.head()
y_train[:10]

array([[1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1]])

# **Step 3: Fine Tuning Bert**


In [None]:
import numpy as np
import codecs
from keras_bert import load_trained_model_from_checkpoint

token_dict = {}
with codecs.open('/content/drive/My Drive/uncased_L-12_H-768_A-12/vocab.txt', 'r', 'utf8') as reader:
    for line in reader:
        token = line.strip()
        token_dict[token] = len(token_dict)

In [None]:
from keras_bert import Tokenizer
tokenizer = Tokenizer(token_dict)
  

In [None]:
def bertEncoder(sentences_df,max_len =512):
  """
  inputs: a dataframe of form df[sentence1,sentence2]
  output : bert encoded in the form of [indeces,segments]
  Note: for pretraining you will need Mask input as well.
  """
  encoded_data = []
  for idx, row in sentences_df.iterrows():
    indices, segments = tokenizer.encode(first=row[0], second=row[1], max_len=512)
    encoded_data.append([indices,segments])
    # encode_seg.append(segments)
  return np.array(encoded_data)

# **Step 4: Train Data**

In [None]:
train_data= bertEncoder(X_train, max_len=512)

In [None]:
len(train_data[0])
np.shape(train_data),np.shape(y_train)

((6870, 2, 512), (6870, 3))