# Imports

In [1]:
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf
import h5py
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import f1_score, accuracy_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation, Convolution2D, Conv2D, LocallyConnected2D, MaxPooling2D
from tensorflow.keras.layers import AveragePooling2D, GlobalAveragePooling2D, SeparableConv2D
from tensorflow.keras.layers import BatchNormalization, Flatten, Dense, Dropout, Input, concatenate
from tensorflow.keras.layers import add, Add, ZeroPadding2D, GlobalMaxPooling2D, DepthwiseConv2D
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import CategoricalCrossentropy
import gc

import sys
sys.path.append('C:/w266/cris/BERTVision')

from utils.model_zoo import *

tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)
tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[1], True)

# Load Data

In [2]:
with open('./data/train_embeddings_3_epoch.pkl', 'rb') as handle:
#with open('./data/train_embeddings_1_epoch.pkl', 'rb') as handle:
#with open('./data/train_embeddings_2_tenths_epochs.pkl', 'rb') as handle:
    train = pickle.load(handle)
    train = np.expand_dims(train, axis=1)

with open('./data/dev_embeddings_3_epoch.pkl', 'rb') as handle:
#with open('./data/dev_embeddings_1_epoch.pkl', 'rb') as handle:
#with open('./data/dev_embeddings_2_tenths_epochs.pkl', 'rb') as handle:
    dev = pickle.load(handle)
    dev = np.expand_dims(dev, axis=1)

print(f"Train shape: {train.shape}, Dev shape: {dev.shape}")


Train shape: (131911, 1, 1024, 26), Dev shape: (12227, 1, 1024, 26)


In [3]:
train_data = h5py.File(r'C:/w266/cris/BERTVision/data/squad_train.h5', 'r')

indices = np.array(eval(open('indices.txt', 'r').readline()))

train_input_start = np.array(train_data['input_start'], dtype = np.int32)[indices]
train_input_end = np.array(train_data['input_end'], dtype = np.int32)[indices]

answer_no_answer = np.where(train_input_start + train_input_end > 0, 0, 1)
answer_no_answer = to_categorical(answer_no_answer).astype(np.uint8)

with open('./data/dev_answers.pkl', 'rb') as handle:
    dev_answers = pickle.load(handle)
with open('./data/dev_qasids.pkl', 'rb') as handle:
    dev_qasids = pickle.load(handle)

del train_data, indices, train_input_start, train_input_end

In [4]:
gc.collect()

20

# Build Model <font color="Red">Tiny Tenney Linear</font>

In [5]:
def get_tiny_tenney(input_shape = (1, 1024, 26), gpu_device = "/gpu:1"):
    
    with tf.device(gpu_device):
        inp = layers.Input(input_shape, name = 'input_layer')
        X = BertConcat() (inp)
        X = tf.squeeze(X, axis = 1)
        X = layers.Dense(2)(X)
        model = Model(inputs = inp, outputs = X, name = 'BinaryClassification_Adapter_Tenney')

    return model

model_eval = get_tiny_tenney()
model_eval.summary()

Model: "BinaryClassification_Adapter_Tenney"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 1, 1024, 26)]     0         
_________________________________________________________________
bert_concat (BertConcat)     (None, 1, 1024)           27        
_________________________________________________________________
tf_op_layer_Squeeze (TensorF [(None, 1024)]            0         
_________________________________________________________________
dense (Dense)                (None, 2)                 2050      
Total params: 2,077
Trainable params: 2,077
Non-trainable params: 0
_________________________________________________________________


# Build Model <font color="red">AdapterPooling Tenney</font>

In [56]:
def get_adapter_shared_skip_tenney_first(input_shape = (1, 1024, 26), gpu_device = "/gpu:1"):
    
    with tf.device(gpu_device):
        inp = layers.Input(input_shape, name = 'input_layer')
        inp_seq = inp[:,:,:,-1]
        X = BertConcat() (inp)
        X = tf.expand_dims(X, axis = -1, name ='expand_dims')
        X = AdapterPooler(386, shared_weights = True)(X)
        X = tf.reshape(X, (-1, X.shape[1], X.shape[2] * X.shape[3]))
        X = tf.concat([X, inp_seq], axis = 2)
        X = tf.squeeze(X, axis = 1)
        X = layers.Dense(2)(X)
        model = Model(inputs = inp, outputs = X, name = 'BinaryClassification_Adapter_Tenney')

    return model

model_eval = get_adapter_shared_skip_tenney_first()
model_eval.summary()

Model: "BinaryClassification_Adapter_Tenney"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_layer (InputLayer)        [(None, 1, 1024, 26) 0                                            
__________________________________________________________________________________________________
bert_concat_32 (BertConcat)     (None, 1, 1024)      27          input_layer[0][0]                
__________________________________________________________________________________________________
tf_op_layer_expand_dims_25 (Ten [(None, 1, 1024, 1)] 0           bert_concat_32[0][0]             
__________________________________________________________________________________________________
adapter_pooler_25 (AdapterPoole (None, 1, 386, 1)    395650      tf_op_layer_expand_dims_25[0][0] 
________________________________________________________________

# Build Model <font color="red">Xception Abbreviated</font>

In [None]:
def get_xception_abbreviated(input_shape = (1, 1024, 26), gpu_device = "/gpu:1"):

    with tf.device(gpu_device):

        # input image size
        input_img = layers.Input(shape = input_shape, dtype = tf.float32)

        # Block 1
        x = Conv2D(64, (1, 3), strides=(1, 3), use_bias=False) (input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(128, (1, 3), use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        residual = Conv2D(512, (1, 1), strides=(1, 2), padding='same', use_bias=False)(x)
        residual = BatchNormalization()(residual)

        # Block 2
        x = SeparableConv2D(256, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = SeparableConv2D(512, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)

        # Block 2 Pool
        x = AveragePooling2D((1, 3), strides=(1, 2), padding='same')(x)
        x = layers.add([x, residual])

        # Fully Connected Layer
        x = GlobalAveragePooling2D()(x)

        x = layers.Dense(2, dtype = tf.float32, name = 'dense_2_final') (x)

        model = models.Model(input_img, x, name = 'Xception_BC')

    return model

model_eval = get_xception_abbreviated()
model_eval.summary()

# Build Model <font color="red">Xception (Full)</font>

In [10]:
def get_xception(input_shape = (1, 1024, 26), gpu_device = "/gpu:1"):

    with tf.device(gpu_device):

        # input image size
        input_img = layers.Input(shape = input_shape, dtype = tf.float32)

        # Block 1
        x = Conv2D(32, (1, 3), strides=(1, 3), use_bias=False) (input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(64, (1, 3), use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        residual = Conv2D(128, (1, 1), strides=(1, 2), padding='same', use_bias=False)(x)
        residual = BatchNormalization()(residual)

        # Block 2
        x = SeparableConv2D(128, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = SeparableConv2D(128, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)

        # Block 2 Pool
        x = AveragePooling2D((1, 3), strides=(1, 2), padding='same')(x)
        x = layers.add([x, residual])

        residual = Conv2D(256, (1, 1), strides=(1, 2), padding='same', use_bias=False)(x)
        residual = BatchNormalization()(residual)

        # Block 3
        x = Activation('relu')(x)
        x = SeparableConv2D(256, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = SeparableConv2D(256, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)

        # Block 3 Pool
        x = AveragePooling2D((1, 3), strides=(1, 2), padding='same')(x)
        x = layers.add([x, residual])

        residual = Conv2D(728, (1, 1), strides=(1, 2), padding='same', use_bias=False)(x)
        residual = BatchNormalization()(residual)

        # Block 4
        x = Activation('relu')(x)
        x = SeparableConv2D(728, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = SeparableConv2D(728, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)

        x = AveragePooling2D((1, 3), strides=(1, 2), padding='same')(x)
        x = layers.add([x, residual])

        # Block 5 - 12
        for i in range(8):
            residual = x

            x = Activation('relu')(x)
            x = SeparableConv2D(728, (1, 3), padding='same', use_bias=False)(x)
            x = BatchNormalization()(x)
            x = Activation('relu')(x)
            x = SeparableConv2D(728, (1, 3), padding='same', use_bias=False)(x)
            x = BatchNormalization()(x)
            x = Activation('relu')(x)
            x = SeparableConv2D(728, (1, 3), padding='same', use_bias=False)(x)
            x = BatchNormalization()(x)

            x = layers.add([x, residual])

        residual = Conv2D(1024, (1, 1), strides=(1, 2), padding='same', use_bias=False)(x)
        residual = BatchNormalization()(residual)

        # Block 13
        x = Activation('relu')(x)
        x = SeparableConv2D(728, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = SeparableConv2D(1024, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)

        # Block 13 Pool
        x = AveragePooling2D((1, 3), strides=(1, 2), padding='same')(x)
        x = layers.add([x, residual])

        # Block 14
        x = SeparableConv2D(1536, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Block 14 part 2
        x = SeparableConv2D(2048, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
                
        # Fully Connected Layer
        x = GlobalAveragePooling2D()(x)

        x = layers.Dense(2, dtype = tf.float32, name = 'dense_2_final') (x)

        model = models.Model(input_img, x, name = 'Xception_BC')

    return model

model_eval = get_xception()
model_eval.summary()

Model: "Xception_BC"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1, 1024, 26) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 1, 341, 32)   2496        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 1, 341, 32)   128         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 1, 341, 32)   0           batch_normalization[0][0]        
________________________________________________________________________________________

# Build Model <font color="red">Xception Abbreviated CLS Residual</font>

In [None]:
def get_xception_abbreviated_clsresidual(input_shape = (1, 1024, 26), gpu_device = "/gpu:1"):

    with tf.device(gpu_device):

        # input image size
        input_img = layers.Input(shape = input_shape, dtype = tf.float32)

        # pull the last channel layer for residual connection layer
        inp_seq = input_img[:,:,:,-1]
        inp_seq = tf.squeeze(inp_seq, axis = 1)

        # Block 1
        x = Conv2D(64, (1, 3), strides=(1, 3), use_bias=False) (input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(128, (1, 3), use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        residual = Conv2D(512, (1, 1), strides=(1, 2), padding='same', use_bias=False)(x)
        residual = BatchNormalization()(residual)

        # Block 2
        x = SeparableConv2D(256, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = SeparableConv2D(512, (1, 3), padding='same', use_bias=False)(x)
        x = BatchNormalization()(x)

        # Block 2 Pool
        x = AveragePooling2D((1, 3), strides=(1, 2), padding='same')(x)
        x = layers.add([x, residual])

        # Fully Connected Layer
        x = GlobalAveragePooling2D()(x)
        
        # add the skip level residual back to the last CLS token
        x = layers.concatenate([x, inp_seq])

        x = layers.Dense(2, dtype = tf.float32, name = 'dense_2_final') (x)

        model = models.Model(input_img, x, name = 'Xception_BC')

    return model

model_eval = get_xception_abbreviated_clsresidual()
model_eval.summary()

# Build Model <font color="Red">AdapterPooling MeanAvg</font>

In [40]:
def get_adapaterpooling_meanavg(input_shape = (1, 1024, 26), gpu_device = "/gpu:1"):
    
    with tf.device(gpu_device):
        inp = layers.Input(input_shape, name = 'input_layer')
        inp_seq = inp[:,:,:,-1]
        X = MeanConcat() (inp)
        X = tf.expand_dims(X, axis = -1, name ='expand_dims')
        X = AdapterPooler(386, shared_weights = True)(X)
        X = tf.reshape(X, (-1, X.shape[1], X.shape[2] * X.shape[3]))
        X = tf.concat([X, inp_seq], axis = 2)
        X = tf.squeeze(X, axis = 1)
        X = layers.Dense(2)(X)
        model = Model(inputs = inp, outputs = X, name = 'BinaryClassification_Adapter_Tenney')

    return model

model_eval = get_adapaterpooling_meanavg()
model_eval.summary()

Model: "BinaryClassification_Adapter_Tenney"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_layer (InputLayer)        [(None, 1, 1024, 26) 0                                            
__________________________________________________________________________________________________
mean_concat (MeanConcat)        (None, 1, 1024)      0           input_layer[0][0]                
__________________________________________________________________________________________________
tf_op_layer_expand_dims_23 (Ten [(None, 1, 1024, 1)] 0           mean_concat[0][0]                
__________________________________________________________________________________________________
adapter_pooler_23 (AdapterPoole (None, 1, 386, 1)    395650      tf_op_layer_expand_dims_23[0][0] 
________________________________________________________________

# Evaluate Model Performance Against DEV

In [11]:
def train_model(epochs = 1, model_name = "adapterpooling_tenney", model = None):
    
    if (not model):
        if model_name == "adapterpooling_tenney":
            model = get_adapter_shared_skip_tenney_first()
        elif model_name == "xception_abbreviated":
            model = get_xception_abbreviated()
        elif model_name == "xception":
            model = get_xception()
        elif model_name == "xception_abbreviated_clsresidual":
            model = get_xception_abbreviated_clsresidual()
        elif model_name == "get_adapaterpooling_meanavg":
            model = get_adapaterpooling_meanavg()
        elif model_name == "get_tiny_tenney":
            model = get_tiny_tenney()
        else:
            raise RuntimeError(f"no model found with name `{model_name}.")
    
    opt = Adam(lr = 1e-3, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-8)
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    model.compile(loss = loss, optimizer = opt, metrics = ['accuracy'])
    
    _ = model.fit(x = train, y = answer_no_answer, epochs = epochs, 
            batch_size = 64, verbose = True, shuffle = True)
    
    return model


In [None]:
MODEL_NAME = "xception"

print(f"Evaluating performance for {MODEL_NAME}...")
results = {'epoch':[], 'f1':[], 'em':[]}
model = None

for epochs in range(10):
    e = epochs + 1
    
    model = train_model(epochs = 1, model_name = MODEL_NAME, model = model)

    pred = model.predict(dev)
    pred = np.argmax(pred, axis = 1).astype(np.uint8)
    
    df = pd.DataFrame({'qas_id':dev_qasids, 'prediction':pred}).groupby(by='qas_id').agg({'prediction':'max'})
    #ans = pd.DataFrame(dev_answers, index =[0]).T
    #ans.columns = ['answer']
    df = df.merge(dev_answers, how='inner', left_index = True, right_index = True)

    f1 = f1_score(y_true = df.answer.values, y_pred = df.prediction.values)
    em = accuracy_score(y_true = df.answer.values, y_pred = df.prediction.values)

    results['epoch'].append(e)
    results['f1'].append(f1)
    results['em'].append(em)

    print(f"epoch [{e}] f1 score: {f1}")
    print(f"epoch [{e}] accuracy: {em}")
    gc.collect()


Evaluating performance for xception...
epoch [1] f1 score: 0.777610818933133
epoch [1] accuracy: 0.8005558830961004
epoch [2] f1 score: 0.7862272141410421
epoch [2] accuracy: 0.8044302198264971
epoch [3] f1 score: 0.7872733999260082
epoch [3] accuracy: 0.806283163480165
epoch [4] f1 score: 0.7915402298850575
epoch [4] accuracy: 0.809062578960667
epoch [5] f1 score: 0.7905392968893796
epoch [5] accuracy: 0.8083045565568938

## Export the performance dictionary for reporting

In [9]:
pd.DataFrame(results).to_dict()

{'epoch': {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10},
 'f1': {0: 0.794728653793356,
  1: 0.7922723091076358,
  2: 0.7896631287494231,
  3: 0.7884562020164646,
  4: 0.7869795047760363,
  5: 0.786032689450223,
  6: 0.7853091585309159,
  7: 0.7853023255813953,
  8: 0.784518049869743,
  9: 0.7840655249441549},
 'em': {0: 0.8110839720373958,
  1: 0.8098206013644403,
  2: 0.8080518824223027,
  3: 0.8073780847300598,
  4: 0.8065358376147562,
  5: 0.8059462646340436,
  6: 0.8055251410763918,
  7: 0.8056093657879222,
  8: 0.8049355680956792,
  9: 0.8045986692495578}}