In [1]:
from BERT import BERT
from BERT import mlm_custom_loss, nsp_custom_loss
import numpy as np
import tensorflow as tf

DISERT_DATA_PATH = r'C:\Users\yunus\Desktop\DisERT Data'
CHECKPOINT_PATH = r'C:\Users\yunus\Desktop\Checkpoints\DisERT'


X_MLM = np.load(f'{DISERT_DATA_PATH}\X_MLM.npy')
Y_MLM = np.load(f'{DISERT_DATA_PATH}\Y_MLM.npy')
X_NSP = np.load(f'{DISERT_DATA_PATH}\X_NSP.npy')
Y_NSP = np.load(f'{DISERT_DATA_PATH}\Y_NSP.npy')

X_MLM = X_MLM[:4000]
X_NSP = X_NSP[:4000]
Y_MLM = Y_MLM[:4000]
Y_NSP = Y_NSP[:4000]

In [2]:
sCheckPointFilePath = f'{CHECKPOINT_PATH}\model'
sCsvLogFilePath = f'{CHECKPOINT_PATH}\log'

In [3]:
import time
import os

class CsvLogger(tf.keras.callbacks.Callback):
    
    def __init__(self, sFilePath, sDelimeter = ';'):
        sFilePath = f'{sFilePath}.txt'
        self.sFilePath = sFilePath
        self.sDelimeter = sDelimeter


    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()
        

    def on_epoch_end(self, epoch, logs={}):
        endTime = time.time()
        
        logs['start'] = self.epoch_time_start
        logs['end'] = endTime
        logs['duration'] = endTime - self.epoch_time_start
        
        logs['epoch'] = epoch
        logs['learning_rate'] = self.model.optimizer.lr.numpy()
        
        if os.path.exists(self.sFilePath) == False:
            with open(self.sFilePath, 'a') as f: 
                f.write(self.sDelimeter.join([str(i) for i in logs.keys()]))
                f.write('\n') 
                
        with open(self.sFilePath, 'a') as f: 
            f.write(self.sDelimeter.join([str(i) for i in logs.values()]))
            f.write('\n')
            
    
class ThresholdStopper(tf.keras.callbacks.Callback):
    def __init__(self, threshold):
        self.threshold = threshold

    def on_batch_end(self, batch, logs={}):
        if logs.get('loss') <= self.threshold:
             self.model.stop_training = True
                
                
                
oCsvLogger=  CsvLogger(sCsvLogFilePath)

oDisERTCheckPoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=sCheckPointFilePath,
    save_weights_only=False,
    monitor='loss',
    mode='min',
    save_best_only=True,
    save_traces = True
)

oLearningRateReducer = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='loss', 
    factor=0.80,
    patience=3, 
    min_lr=1e-4
)

oEarlyStopper = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=20)

oThresholdStopper = ThresholdStopper(0.05)

In [4]:
# oDisERT = BERT(
#     mlm_input_shape = (10, 129), 
#     nsp_input_shape = (1, 129),
#     nr_of_encoder_blocks = 4,
#     attention_key_dims = 32,
#     attention_nr_of_heads = 2,
#     attention_dense_dims = 128,
#     dropout_rate = 0.0
# )

# oDisERT.compile(
#     optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#     loss = [mlm_custom_loss, nsp_custom_loss]
# )

# oDisERT.fit(
#     x = [X_MLM, X_NSP], 
#     y = [Y_MLM, Y_NSP], 
#     batch_size= 512,
#     epochs=2,
#     verbose=1,
#     # callbacks = [oDisERTCheckPoint, oCsvLogger, oLearningRateReducer, oEarlyStopper, oThresholdStopper]
# )

# oDisERT.save(sCheckPointFilePath)

# # oDisERT.summary()

# # tf.keras.utils.plot_model(oDisERT, show_shapes=True)

In [5]:
oDisERT = tf.keras.models.load_model(
    sCheckPointFilePath, 
    custom_objects={
        'mlm_custom_loss':mlm_custom_loss,
        'nsp_custom_loss':nsp_custom_loss
    }
)

oDisERT.fit(
    x = [X_MLM, X_NSP], 
    y = [Y_MLM, Y_NSP], 
    batch_size= 512,
    epochs=2,
    verbose=1,
    # callbacks = [oDisERTCheckPoint, oCsvLogger, oLearningRateReducer, oEarlyStopper, oThresholdStopper]
)

oDisERT.save(sCheckPointFilePath)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)> and <tf.Variable 'mlm_classifier/kernel:0' shape=(129, 129) dtype=float32, numpy=
array([[-0.07809452, -0.0392647 ,  0.11604145, ..., -0.0823417 ,
         0.01284003,  0.13814363],
       [-0.11431655,  0.00238571,  0.07519951, ...,  0.0574014 ,
        -0.04757044,  0.04661798],
       [-0.09807503,  0.03060101, -0.02770466, ...,  0.00589302,
         0.14047253,  0.14759356],
       ...,
       [-0.123781



INFO:tensorflow:Assets written to: C:\Users\yunus\Desktop\Checkpoints\DisERT\model\assets


INFO:tensorflow:Assets written to: C:\Users\yunus\Desktop\Checkpoints\DisERT\model\assets
