<a href="https://colab.research.google.com/github/jahnavirishikesh/contradiction_detection/blob/main/contradiction_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Dense, concatenate, Dropout, GlobalMaxPooling1D, Dot

seed_value = 42
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# loading dataset
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# training and testing data
X_train = train_data[['SENTENCE A', 'SENTENCE B']]
y_train = train_data['label']
X_test = test_data[['Question', 'Answer 1', 'Answer 2']]
y_test = test_data['label']

# handling missing values
X_test = X_test.fillna('')

# tokenizing and padding data - to make sure inputs are of fixed length for feeding to neural network
max_sequence_length = 100
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train['SENTENCE A'] + X_train['SENTENCE B'])

X_train_seq_A = tokenizer.texts_to_sequences(X_train['SENTENCE A'])
X_train_seq_B = tokenizer.texts_to_sequences(X_train['SENTENCE B'])
X_test_seq_A = tokenizer.texts_to_sequences(X_test['Question'])
X_test_seq_B = tokenizer.texts_to_sequences(X_test['Answer 1'])

X_train_padded_A = pad_sequences(X_train_seq_A, maxlen=max_sequence_length)
X_train_padded_B = pad_sequences(X_train_seq_B, maxlen=max_sequence_length)
X_test_padded_A = pad_sequences(X_test_seq_A, maxlen=max_sequence_length)
X_test_padded_B = pad_sequences(X_test_seq_B, maxlen=max_sequence_length)


# defining model architecture
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 100

input_A = Input(shape=(max_sequence_length,))
input_B = Input(shape=(max_sequence_length,))

# attention mechanism function - to weigh relevant parts of the sentence and improve accuracy of model
def attention_mechanism(inputs):
    attention_scores = Dense(max_sequence_length, activation='softmax')(inputs)
    context_vector = Dot(axes=[1, 1])([attention_scores, inputs])
    return context_vector

# embedding layer - to transform words into meaningful representation
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim)

embedded_A = embedding_layer(input_A)
embedded_B = embedding_layer(input_B)

# LSTM layer - to understand sequential nature of data here
lstm_layer = LSTM(64, return_sequences=True)

lstm_output_A = lstm_layer(embedded_A)
lstm_output_B = lstm_layer(embedded_B)

# applying attention mechanism to LSTM outputs
attention_A = attention_mechanism(lstm_output_A)
attention_B = attention_mechanism(lstm_output_B)

# Global max pooling - to weigh importance of different elements in sequence
pooled_A = GlobalMaxPooling1D()(attention_A)
pooled_B = GlobalMaxPooling1D()(attention_B)

# adding dropout layers - to prevent overfitting
pooled_A = Dropout(0.5)(pooled_A)
pooled_B = Dropout(0.5)(pooled_B)

concatenated_output = concatenate([pooled_A, pooled_B], axis=-1)

# adding Dropout after the concatenation
concatenated_output = Dropout(0.5)(concatenated_output)

# dense layer - to extract features
dense_layer = Dense(64, activation='relu')(concatenated_output)
output = Dense(1, activation='sigmoid')(dense_layer)

# defining and compiling model
model = Model(inputs=[input_A, input_B], outputs=output)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

# training the model
epochs = 50
batch_size = 64
model.fit(x=[X_train_padded_A, X_train_padded_B], y=y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1)

# evaluating the model
loss, accuracy = model.evaluate(x=[X_test_padded_A, X_test_padded_B], y=y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')


Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_7 (InputLayer)        [(None, 100)]                0         []                            
                                                                                                  
 input_8 (InputLayer)        [(None, 100)]                0         []                            
                                                                                                  
 embedding_3 (Embedding)     (None, 100, 100)             453900    ['input_7[0][0]',             
                                                                     'input_8[0][0]']             
                                                                                                  
 lstm_3 (LSTM)               (None, 100, 64)              42240     ['embedding_3[0][0]',   

In [None]:
# Two sentences for testing
sentence_A = "The sky is red."
sentence_B = "The sky is blue."

# tokenizing and pad the test sentences
test_seq_A = tokenizer.texts_to_sequences([sentence_A])
test_seq_B = tokenizer.texts_to_sequences([sentence_B])
test_padded_A = pad_sequences(test_seq_A, maxlen=max_sequence_length)
test_padded_B = pad_sequences(test_seq_B, maxlen=max_sequence_length)

# making predictions
predictions = model.predict([test_padded_A, test_padded_B])

# interpreting the predictions
for i in range(len(predictions)):
    print(f"Sentence A: '{sentence_A}'")
    print(f"Sentence B: '{sentence_B}'")
    print(f"Prediction (probability of contradiction): {predictions[i][0]:.4f}")
    if predictions[i][0] > 0.5:
        print("Prediction: Contradiction")
    else:
        print("Prediction: No Contradiction")
    print()

Sentence A: 'The sky is red.'
Sentence B: 'The sky is blue.'
Prediction (probability of contradiction): 0.9695
Prediction: Contradiction

