In [1]:
# ! pip install --upgrade tensorflow
# ! pip install --upgrade keras

In [18]:
import os
import pandas as pd
import numpy as np
import tempfile
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Layer, Input, Embedding, Bidirectional, LSTM, Dense, Dropout, concatenate, GlobalMaxPooling1D, GRU
from tensorflow.keras.utils import register_keras_serializable
import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split
from datetime import datetime
from sklearn.metrics import accuracy_score, classification_report
from sentence_transformers import SentenceTransformer

In [19]:
data = pd.read_csv('train.csv')
validation_data = pd.read_csv('dev.csv')

In [20]:
# # Convert text columns to strings and handle NaN values
# data['premise'] = data['premise'].astype(str).fillna('<NAN>')
# data['hypothesis'] = data['hypothesis'].astype(str).fillna('<NAN>')
# validation_data['premise'] = validation_data['premise'].astype(str).fillna('<NAN>')
# validation_data['hypothesis'] = validation_data['hypothesis'].astype(str).fillna('<NAN>')

In [24]:
@register_keras_serializable()
class AttentionLayer(Layer):
    def __init__(self, my_custom_arg=42, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)
        self.my_custom_arg = my_custom_arg
    
    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight', 
                                 shape=(input_shape[-1], 1),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(name='attention_bias',
                                 shape=(input_shape[1], 1),
                                 initializer='zeros',
                                 trainable=True)
        super(AttentionLayer, self).build(input_shape)
    
    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        a = K.softmax(e, axis=1)
        output = x * a
        return output
    
    def compute_output_shape(self, input_shape):
        return input_shape
    
    def get_config(self):
        config = super(AttentionLayer, self).get_config()
        config.update({
            'my_custom_arg': self.my_custom_arg,
        })
        return config

In [22]:
# Prepare SentenceTransformer model for embeddings
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
def generate_embeddings(dataframe, model):
    premise_embeddings = model.encode(dataframe['premise'].tolist(), show_progress_bar=True)
    hypothesis_embeddings = model.encode(dataframe['hypothesis'].tolist(), show_progress_bar=True)
    embeddings = np.concatenate((premise_embeddings, hypothesis_embeddings), axis=1)
    return embeddings

In [23]:
# Generate embeddings
train_embeddings = generate_embeddings(data, sentence_model)
validation_embeddings = generate_embeddings(validation_data, sentence_model)

train_labels = data['label'].values
validation_labels = validation_data['label'].values

Batches:   0%|          | 0/842 [00:00<?, ?it/s]

Batches:   0%|          | 0/842 [00:00<?, ?it/s]

Batches:   0%|          | 0/211 [00:00<?, ?it/s]

Batches:   0%|          | 0/211 [00:00<?, ?it/s]

In [49]:
# Model building
def create_model(embedding_dim):
    input_shape = (embedding_dim,)
    input_layer = Input(shape=input_shape)
    
    # Bi-GRU with Attention
    reshape_layer = tf.keras.layers.Reshape((1, -1))(input_layer)
    bi_gru = Bidirectional(GRU(64, return_sequences=True))(reshape_layer)
    attention_output = AttentionLayer()(bi_gru)
    max_pooling = GlobalMaxPooling1D()(attention_output)
    
    # Classification layers
    dense_layer = Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(max_pooling)
    dropout_layer = Dropout(0.5)(dense_layer)
    predictions = Dense(1, activation='sigmoid')(dropout_layer)
    
    model = Model(inputs=input_layer, outputs=predictions)
    return model

In [57]:
embedding_dim = sentence_model.get_sentence_embedding_dimension() * 2
model = create_model(embedding_dim)
model.compile(optimizer=Adam(learning_rate=0.00035), loss='binary_crossentropy', metrics=['accuracy'])

In [58]:
# Add learning rate reduction
PATIENCE = 3

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy',
                                             patience=PATIENCE,
                                             verbose=1,
                                             factor=0.5,
                                             min_lr=0.0000001)

early_stopping = EarlyStopping(monitor='val_loss',
                               patience=PATIENCE,
                               verbose=1)

In [63]:
# Function to generate unique file paths for model checkpoints
def create_unique_checkpoint_file(base_dir):
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    filename = f'model_checkpoint_{timestamp}.keras'
    unique_file_path = os.path.join(base_dir, filename)
    return unique_file_path

In [64]:
# Create a temporary directory
temp_dir = tempfile.mkdtemp()

# Create a model checkpoint callback with a unique file path for each checkpoint
model_checkpoint = ModelCheckpoint(
    filepath=create_unique_checkpoint_file(temp_dir),
    save_best_only=True,
    save_weights_only=False,
    verbose=1
)

callbacks_list = [learning_rate_reduction, early_stopping, model_checkpoint]

In [65]:
# Model Training
history = model.fit(
                    train_embeddings, 
                    train_labels, 
                    epochs=50, 
                    batch_size=32, 
                    validation_data=(validation_embeddings, validation_labels),
                    callbacks=callbacks_list
                    )

Epoch 1/50
[1m823/842[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.6281 - loss: 1.0329
Epoch 1: val_loss improved from inf to 0.62202, saving model to C:\Users\ttt\AppData\Local\Temp\tmp8rsxsj04\model_checkpoint_20240420-181030.keras
[1m842/842[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6287 - loss: 1.0275 - val_accuracy: 0.6629 - val_loss: 0.6220 - learning_rate: 3.5000e-04
Epoch 2/50
[1m842/842[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6844 - loss: 0.6057
Epoch 2: val_loss improved from 0.62202 to 0.60731, saving model to C:\Users\ttt\AppData\Local\Temp\tmp8rsxsj04\model_checkpoint_20240420-181030.keras
[1m842/842[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6844 - loss: 0.6057 - val_accuracy: 0.6730 - val_loss: 0.6073 - learning_rate: 3.5000e-04
Epoch 3/50
[1m813/842[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.6898

In [55]:
# model.save('GRUModel.keras')

In [66]:
load_model('model_checkpoint_20240420-181030.keras', custom_objects={'AttentionLayer': AttentionLayer}, compile=False)

ValueError: A total of 1 objects could not be loaded. Example error message for object <GRUCell name=gru_cell, built=True>:

Layer 'gru_cell' expected 3 variables, but received 0 variables during loading. Expected: ['kernel', 'recurrent_kernel', 'bias']

List of objects that could not be loaded:
[<GRUCell name=gru_cell, built=True>]

In [None]:
predictions = model.predict(validation_embeddings)
predicted_labels = (predictions > 0.5).astype(int)
predictions_df = pd.DataFrame(predicted_labels, columns = ['prediction'])
predictions_df.to_csv('predictions_B.csv',index = False)

accuracy = accuracy_score(validation_labels, predicted_labels)
report = classification_report(validation_labels, predicted_labels)
print(accuracy)
print(report)