In [1]:
!pip install --upgrade tensorflow
!pip install --upgrade keras
!pip install sentencepiece
!pip install bert-for-tf2

import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
import bert
import os
from bert import BertModelLayer

Requirement already up-to-date: tensorflow in /usr/local/lib/python3.6/dist-packages (2.2.0)
Requirement already up-to-date: keras in /usr/local/lib/python3.6/dist-packages (2.4.3)


Import data

In [2]:
# Example data, sample from the OffensEval 2020 Danish corpus 
# The parallel data in English was translated using the Google Translate API

data_file = "example_data.tsv"
LAN = "DA"  # change this according to the language you are working with

data = pd.read_csv(data_file, sep="\t", header=0, names = ["ID", LAN, "label", "EN"])

# Map labels to 0 and 1
mapping = {"OFF": 1, "NOT": 0}
data["label"] = data["label"].apply(lambda x :mapping[x])

# Divide into train and test for each language
train, test = train_test_split(data, test_size=0.33, random_state=22)

train_1 = train[["ID", LAN, "label"]]
train_1 = train_1.rename(columns={LAN : "tweet"})
train_2 = train[["ID", "EN", "label"]]
train_2 = train_2.rename(columns={"EN" : "tweet"})

test_1 = test[["ID", LAN, "label"]]
test_1 = test_1.rename(columns={LAN:"tweet"})
test_2 = test[["ID", "EN", "label"]]
test_2 = test_2.rename(columns={"EN" : "tweet"})

ID_COLUMN="ID"
TEXT_COLUMN="tweet"
LABEL_COLUMN="label"
label_list=[0, 1]

Import the two BERT models which will constitute the channels of the multi-channel model

In [3]:
model_name1 = "multi_cased_L-12_H-768_A-12"
model_dir1 = bert.fetch_google_bert_model(model_name1, ".models")
model_ckpt1 = os.path.join(model_dir1, "bert_model.ckpt")

model_name2 = "cased_L-12_H-768_A-12"
model_dir2 = bert.fetch_google_bert_model(model_name2, ".models")
model_ckpt2 = os.path.join(model_dir2, "bert_model.ckpt")

Already  fetched:  multi_cased_L-12_H-768_A-12.zip
already unpacked at: .models/multi_cased_L-12_H-768_A-12
Already  fetched:  cased_L-12_H-768_A-12.zip
already unpacked at: .models/cased_L-12_H-768_A-12


Preparation of inputs to BERT

In [4]:
max_seq_len = 128

# first model
do_lower_case1 = "uncased" in model_name1
bert.bert_tokenization.validate_case_matches_checkpoint(do_lower_case1, model_ckpt1)
vocab_file1 = os.path.join(model_dir1, "vocab.txt")
tokenizer1 = bert.bert_tokenization.FullTokenizer(vocab_file1, do_lower_case1)

# second model
do_lower_case2 = "uncased" in model_name2
bert.bert_tokenization.validate_case_matches_checkpoint(do_lower_case2, model_ckpt2)
vocab_file2 = os.path.join(model_dir2, "vocab.txt")
tokenizer2 = bert.bert_tokenization.FullTokenizer(vocab_file2, do_lower_case2)

def prepare_input(df, tokenizer, max_seq_len):
  x, y = [], []
  for ndx, row in df.iterrows():
    text, label = row[TEXT_COLUMN], row[LABEL_COLUMN]
    tokens = tokenizer.tokenize(text)
    tokens = ["[CLS]"] + tokens + ["[SEP]"]
    ids = tokenizer.convert_tokens_to_ids(tokens)
    if len(ids) > max_seq_len:
      ids = ids[:max_seq_len]
    else:
      ids = ids + [0] * (max_seq_len - len(ids))
    x.append(ids)
    y.append(int(label))
  return np.array(x), np.array(y)                         

# first dataset
((train_1_x, train_1_y), 
 (test_1_x, test_1_y)) = map(prepare_input, [train_1, test_1], 
                             [tokenizer1] * 2, 
                             [max_seq_len] * 2)

# second dataset
((train_2_x, train_2_y), 
 (test_2_x, test_2_y)) = map(prepare_input, [train_2, test_2], 
                             [tokenizer2] * 2, 
                             [max_seq_len] * 2)


Creation of model

In [6]:
# language model 1
bert_params_1 = bert.params_from_pretrained_ckpt(model_dir1)
l_bert_1 = bert.



ayer.from_params(bert_params_1, dtype = "int32", name="bert1")      
input_ids_1 = keras.layers.Input(shape=(max_seq_len,), dtype="int32", name="input_ids1")
output_1 = l_bert_1(input_ids_1)

print("bert shape", output_1.shape)
pooling_1 = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output_1)  # pooling layer
dropout_1 = keras.layers.Dropout(0.1)(pooling_1)  # dropout layer
forward_1 = keras.layers.Dense(units=768, activation="relu", 
                              kernel_regularizer=keras.regularizers.l2(0.01))(dropout_1)

# language model 2
bert_params_2 = bert.params_from_pretrained_ckpt(model_dir2)
l_bert_2 = bert.BertModelLayer.from_params(bert_params_2, name="bert2")
input_ids_2 = keras.layers.Input(shape=(max_seq_len,), dtype="int32", name="input_ids2")
output_2 = l_bert_2(input_ids_2)

print("bert shape", output_2.shape)
pooling_2 = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output_2)  # pooling layer
dropout_2 = keras.layers.Dropout(0.1)(pooling_2)  # dropout layer
forward_2 = keras.layers.Dense(units=768, activation="relu", 
                              kernel_regularizer=keras.regularizers.l2(0.01))(dropout_2)

# add hidden states together
added = keras.layers.add([forward_1, forward_2])

# final forward layers
logits = keras.layers.Dense(units=128, activation="relu", 
                            kernel_regularizer=keras.regularizers.l2(0.01))(added)
out = keras.layers.Dense(units=2, activation="softmax", 
                            kernel_regularizer=keras.regularizers.l2(0.01))(logits)

model = keras.Model(inputs=[input_ids_1, input_ids_2], outputs=out)
model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)])

# bert model weights loading
bert.load_bert_weights(l_bert_1, model_ckpt1)
bert.load_bert_weights(l_bert_2, model_ckpt2)

model.compile(optimizer=keras.optimizers.Adam(lr=0.00002),
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[keras.metrics.SparseCategoricalAccuracy(name="sparse_categorical_accuracy")])

model.summary()

bert shape (None, 128, 768)
bert shape (None, 128, 768)
Done loading 196 BERT weights from: .models/multi_cased_L-12_H-768_A-12/multi_cased_L-12_H-768_A-12/bert_model.ckpt into <bert.model.BertModelLayer object at 0x7f1183e74710> (prefix:bert1). Count of weights not found in the checkpoint was: [0]. Count of weights with mismatched shape: [0]
Unused weights from checkpoint: 
	bert/embeddings/token_type_embeddings
	bert/pooler/dense/bias
	bert/pooler/dense/kernel
	cls/predictions/output_bias
	cls/predictions/transform/LayerNorm/beta
	cls/predictions/transform/LayerNorm/gamma
	cls/predictions/transform/dense/bias
	cls/predictions/transform/dense/kernel
	cls/seq_relationship/output_bias
	cls/seq_relationship/output_weights
Done loading 196 BERT weights from: .models/cased_L-12_H-768_A-12/cased_L-12_H-768_A-12/bert_model.ckpt into <bert.model.BertModelLayer object at 0x7f117ce2e5c0> (prefix:bert2). Count of weights not found in the checkpoint was: [0]. Count of weights with mismatched shap

In [9]:
TOTAL_EPOCHS = 2

model.fit([train_1_x, train_2_x], train_2_y,
          validation_split=0.,
          batch_size=32,
          shuffle=True,
          epochs=TOTAL_EPOCHS)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f117a2f9eb8>

In [10]:
from sklearn.metrics import classification_report

y_pred = model.predict([test_1_x, test_2_x]).argmax(axis=-1)
print(classification_report(test_2_y, y_pred))

              precision    recall  f1-score   support

           0       0.60      1.00      0.75         3
           1       0.00      0.00      0.00         2

    accuracy                           0.60         5
   macro avg       0.30      0.50      0.37         5
weighted avg       0.36      0.60      0.45         5



  _warn_prf(average, modifier, msg_start, len(result))
