# Santander Questions Classification Using:
### A Bidirectional Encoder Representations from Transformers (BERT) Model
> Author: Jefferson Licet

> Email: jeffersonlicet@gmail.com

In [1]:
!pip install transformers



In [2]:
import os
import gc
import random as rn
import numpy as np
import pandas as pd
import transformers
import tensorflow as tf
from tqdm.notebook import tqdm

# Define constants

SEED_NUMBER = 44 # Magic number
CACHE_PATH = '/cache' # Cache folder
BERT_MODEL = 'dccuchile/bert-base-spanish-wwm-cased' # Beto - Spanish Bert model

# Seed random numbers, only words with CPU/GPU
rn.seed(SEED_NUMBER)
np.random.seed(SEED_NUMBER)
tf.random.set_seed(SEED_NUMBER)
os.environ['PYTHONHASHSEED'] = '0'

# When using a pool of TPUS random functions can't use SEED

In [3]:
# Enable TPUStrategy

AUTO = tf.data.experimental.AUTOTUNE
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)

# Register pandas progress bar using tqdm
tqdm.pandas()

INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


In [4]:
from tokenizers import BertWordPieceTokenizer

tokenizer = transformers.BertTokenizer.from_pretrained(BERT_MODEL)

def generateBertPath(bert_model):
  return os.path.join(CACHE_PATH, bert_model + '-cache')

save_vocab_path = generateBertPath(BERT_MODEL)
vocab_file_path = os.path.join(save_vocab_path, 'vocab.txt')

if not os.path.exists(save_vocab_path):
    os.makedirs(save_vocab_path)

tokenizer.save_pretrained(save_vocab_path)

# Initialize BERT tokenizer
fast_tokenizer = BertWordPieceTokenizer(
    vocab_file_path,
    lowercase=True,
    strip_accents=True
)

In [5]:
"""
  Tokenizes an array of texts using BERT tokenizer
  returns an array of arrays containing ids of the vocab
"""
def tokenizeAndEncode(texts, tokenizer, chunk_size=256, maxlen=512):
    tokenizer.enable_truncation(max_length=maxlen)
    tokenizer.enable_padding()

    bert_ids = []
    
    encs = tokenizer.encode_batch(texts)
    bert_ids.extend([enc.ids for enc in encs])
        
    return np.array(bert_ids)

In [6]:
# Descargarmos el dataset desde el drive de la competencia
!wget -q -O train.csv https://drive.google.com/u/0/uc?id=1SvVbsYUpKphC3NuU4y7JDYsDJxYT61Yl&export=download
!wget -q -O test_santander.csv https://drive.google.com/u/0/uc?id=1bsV_URfRHy8LNLA1SKJ24hv0lRNVXMV4&export=download

In [7]:
# Load train and test data

DATA_PATH = ''
TRAIN_CSV_DIR = os.path.join(DATA_PATH, 'train.csv')
TEST_CSV_DIR = os.path.join(DATA_PATH, 'test_santander.csv')

train_data = pd.read_csv(TRAIN_CSV_DIR, sep='|')

# Append samples for less populated category
appendQuestions = [
 "correo electrónico inválido",
 "correo electrónico incorrecto",
 "el correo electronico es incorrecto",
 "el correo electronico no es correcto",
]

appendCategories = ["Cat_104", "Cat_104", "Cat_104", "Cat_104"]

df_concat = pd.DataFrame({'Pregunta': appendQuestions, 'Intencion': appendCategories })
train_data = pd.concat([train_data, df_concat], ignore_index=True)

print(train_data.tail(10))

test_data = pd.read_csv(TEST_CSV_DIR)

train_data['labels'], labels = pd.factorize(train_data.Intencion)
np.save('labels.npy', labels)

# Assert prints train data
print(train_data.head())

# Assert prints test data
print(test_data.head())

                                                Pregunta Intencion
20098            estoy necesitando una tarjeta de debito    Cat_39
20099   el monto del prestamo depende de los ingresos???   Cat_251
20100       quiero cancelar una compra puntual el cuotas   Cat_339
20101                               necesito pagar deuda   Cat_192
20102  teniendo otro hipotecario es posible aplicar p...   Cat_218
20103                               comisión descubierto    Cat_56
20104                        correo electrónico inválido   Cat_104
20105                      correo electrónico incorrecto   Cat_104
20106                el correo electronico es incorrecto   Cat_104
20107               el correo electronico no es correcto   Cat_104
                                            Pregunta Intencion  labels
0               como puedo trabajar en santander rio   Cat_102       0
1                pagar tarjeta visa querer reintegro   Cat_350       1
2                      pagar tarjeta naranja siste

In [8]:
# Test the tokenizer
tokenizeAndEncode([train_data.Pregunta.values[0]], fast_tokenizer, maxlen=15)[0]

array([    4,  1184,  1769,  3460,  1036, 18244, 30935,  1295, 25355,
           5])

In [9]:
MAX_LEN = 30

In [10]:
# Encode test dataset
test_data_encoded = tokenizeAndEncode(test_data.Pregunta.values, fast_tokenizer, maxlen=MAX_LEN) 
processed_test_ids = test_data.id

np.save('test_ids.npy', processed_test_ids)
test_dataset = tf.data.Dataset.from_tensor_slices(test_data_encoded).batch(64)

In [11]:
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Model

"""
  Creates a keras model using BERT as a Input layer
"""
def createModel(transformer,
                op=None,
                max_len=512,
                categories=[],
                loss='sparse_categorical_crossentropy'):
  
    input = Input(shape=(max_len,), dtype=tf.int32)
    bert_output = transformer(input)[0]
    hidden_state = bert_output[:, 0, :]
    dropped_state = Dropout(0.35)(hidden_state)
    output = Dense(len(categories), activation='softmax')(dropped_state)
    
    model = Model(inputs=input, outputs=output)
    model.compile(op, loss=loss, metrics=['acc'])
    
    return model

In [22]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from transformers import TFBertModel
import matplotlib.pyplot as plt

"""
  Training the model using encoded data, early stops
  when val_acc stops increasing
"""
def trainModel(
    x_train,
    y_train,
    x_validate,
    y_validate,
    class_w,
    iteration,
    maxLen=30,
    epochs=75):
    MAX_LEN = maxLen
    categories = np.unique(train_data.labels.values)
    optimizer = Adam(lr=3e-5)
    loss = 'sparse_categorical_crossentropy'

    with strategy.scope():
        bert_layer = TFBertModel.from_pretrained(BERT_MODEL,from_pt=True)
        model = createModel(bert_layer,
                            max_len=MAX_LEN,
                            categories=categories,
                            op=optimizer,
                            loss=loss)
        model.summary()

    x_encoded = tokenizeAndEncode(x_train, fast_tokenizer, maxlen=MAX_LEN)
    x_test_encoded = tokenizeAndEncode(x_validate, fast_tokenizer, maxlen=MAX_LEN)
    
    batch_size = 64

    train_dataset = (
        tf.data.Dataset
        .from_tensor_slices((x_encoded, y_train))
        .batch(batch_size)
        .prefetch(AUTO)
    )

    valid_dataset = (
        tf.data.Dataset
        .from_tensor_slices((x_test_encoded, y_validate))
        .batch(batch_size)
        .shuffle(2008)
        .prefetch(AUTO)
    )

    nb_epochs=epochs

    es = EarlyStopping(
        monitor='val_acc',
        mode='max',
        verbose=1,
        patience=8,
        restore_best_weights=True)
  
    callbacks_list=[es]

    history = model.fit_generator(
        train_dataset,
        validation_data=valid_dataset,
        epochs=nb_epochs,
        callbacks=callbacks_list,
        class_weight=dict(enumerate(class_w))
    )

    prefix = 'bert'

    plt.figure(1)
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.savefig(str(iteration)+'_acc_'+prefix+'.png')
    plt.clf()
    plt.figure(1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.savefig(str(iteration)+'_loss_'+prefix+'.png')
    plt.clf()

    return model

In [23]:
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.utils import class_weight
from sklearn.metrics import balanced_accuracy_score

N_FOLDS = 10
EPOCHS = 150

x = train_data.Pregunta.values.astype(str)
y = train_data.labels.values.astype(int)

class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(y),
    y=y)

kfold = StratifiedKFold(N_FOLDS, True, 1)
iteration = 0

scores = []
predictions = []

for train_ix, test_ix in kfold.split(x, y):
    tf.tpu.experimental.initialize_tpu_system(tpu)
    print('🚀 Starting kfold iteration: ' + str(iteration) + '/' + str(N_FOLDS-1))
    trainX, trainy = x[train_ix], y[train_ix]
    testX, testy = x[test_ix], y[test_ix]
    
    model = trainModel(
        trainX,
        trainy,
        testX,
        testy,
        class_weights,
        iteration,
        MAX_LEN,
        EPOCHS)
    
    encoded_for_test = tokenizeAndEncode(testX, fast_tokenizer, maxlen=MAX_LEN)

    y_pred = model.predict(encoded_for_test, verbose=0)
    y_pred_max = np.argmax(y_pred, axis=1).tolist()
    iteration = iteration + 1

    print("Balanced Acc for: ")
    bacc = balanced_accuracy_score(testy, y_pred_max)
    print ("\n########## Balanced Acc: %0.8f ##########\n" % bacc )
    scores.append(bacc)
    predictions.append(model.predict(test_dataset))

    del model
    gc.collect()






INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 0/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_18 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_17 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_17 [(None, 768)]             0         
_________________________________________________________________
dropout_683 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_17 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 00031: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.83244222 ##########









INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 1/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_19 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_18 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_18 [(None, 768)]             0         
_________________________________________________________________
dropout_721 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 00020: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.81084146 ##########





INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 2/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_20 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_19 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_19 [(None, 768)]             0         
_________________________________________________________________
dropout_759 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 00023: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.83898266 ##########





INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 3/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_20"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_21 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_20 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_20 [(None, 768)]             0         
_________________________________________________________________
dropout_797 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 00041: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.85161745 ##########





INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 4/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_22 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_21 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_21 [(None, 768)]             0         
_________________________________________________________________
dropout_835 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 00035: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.83977804 ##########





INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 5/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_23 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_22 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_22 [(None, 768)]             0         
_________________________________________________________________
dropout_873 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 00032: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.84038546 ##########





INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 6/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_24 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_23 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_23 [(None, 768)]             0         
_________________________________________________________________
dropout_911 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_23 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 00044: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.84562335 ##########





INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 7/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_25 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_24 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_24 [(None, 768)]             0         
_________________________________________________________________
dropout_949 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_24 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 00037: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.82966169 ##########





INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 8/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_26 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_25 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_25 [(None, 768)]             0         
_________________________________________________________________
dropout_987 (Dropout)        (None, 768)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 00023: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.84444292 ##########





INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.23.5.226:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


🚀 Starting kfold iteration: 9/9



You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "model_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_27 (InputLayer)        [(None, 30)]              0         
_________________________________________________________________
tf_bert_model_26 (TFBertMode ((None, 30, 768), (None,  109850880 
_________________________________________________________________
tf_op_layer_strided_slice_26 [(None, 768)]             0         
_________________________________________________________________
dropout_1025 (Dropout)       (None, 768)               0         
_________________________________________________________________
dense_26 (Dense)             (None, 352)               270688    
Total params: 110,121,568
Trainable params: 110,121,568
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
















Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 00027: early stopping
Balanced Acc for: 

########## Balanced Acc: 0.83930396 ##########



<Figure size 432x288 with 0 Axes>

In [24]:
summed = np.sum(scores, axis=0)
print ("\n########## Global Balanced Acc: %0.8f ##########\n" % (summed/len(scores)) )


########## Global Balanced Acc: 0.83730792 ##########



In [25]:
summed = np.sum(predictions, axis=0)
np.save('bert.npy', summed)

In [26]:
pred_summed = np.load('bert.npy', allow_pickle=True)
labels[np.argmax(pred_summed[0])]

'Cat_303'

In [27]:
# Download predictions, test ids and labels

from google.colab import files
files.download('bert.npy')  # Predictions
files.download('labels.npy') # Mapping indexes to Cat_[Index]
files.download('test_ids.npy') # Mapping test id to test index

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>