# Inference Notebook for Co-Attention Siamese Model

### Include Libraries

In [None]:
import re
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers

2025-04-03 18:02:25.558159: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-03 18:02:25.568094: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-03 18:02:25.578928: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-03 18:02:25.582082: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-03 18:02:25.590547: I tensorflow/core/platform/cpu_feature_guar

### Setting Model Parameters

In [2]:
# input_file_path = './training_data/training_data/ED/dev.csv'
input_file_path = './test_data/ED/test.csv'
model_file_path = 'CoAttentionSiameseDeepLearning.keras'
output_file = './predictions.csv'

threshold = 0.34

### Load and Pre-process the Data

In [3]:
# Load the data
data = pd.read_csv(input_file_path)

#### Pre-Process the Text

In [4]:
# Remove [ref] from the texts
remove_ref = lambda x: re.sub(r'\[ref\]|\[ref|ref\]', '', x)

data['Claim'] = data['Claim'].apply(remove_ref)
data['Evidence'] = data['Evidence'].apply(remove_ref)

#### Encode the Texts

In [5]:
from sentence_transformers import SentenceTransformer

# Load the Encoder
encoder_model = SentenceTransformer("all-MiniLM-L6-v2")

# Encode the Training Set
claim_embeddings = encoder_model.encode(data['Claim'])
evidence_embeddings = encoder_model.encode(data['Evidence'])

  from .autonotebook import tqdm as notebook_tqdm


### Load the Model

In [None]:
# Custom MultiHead Co-Attention layer
class MultiHeadCoAttention(layers.Layer):
    def __init__(self, attention_dim=128, num_heads=4, dropout_rate=0.1, **kwargs):
        super(MultiHeadCoAttention, self).__init__(**kwargs)
        self.attention_dim = attention_dim
        self.num_heads = num_heads
        self.head_dim = attention_dim // num_heads
        self.dropout_rate = dropout_rate
        
        # Ensure the attention dimension is divisible by the number of heads
        assert attention_dim % num_heads == 0, "Attention dimension must be divisible by number of heads"
        
    def build(self, input_shape):
        # Ensure we have two inputs
        assert isinstance(input_shape, list) and len(input_shape) == 2
        
        self.claim_dim = input_shape[0][-1]
        self.evidence_dim = input_shape[1][-1]
        
        # Claim projections
        self.claim_projection = layers.Dense(self.attention_dim * 3, use_bias=False)
        
        # Evidence projections
        self.evidence_projection = layers.Dense(self.attention_dim * 3, use_bias=False)
        
        # Output projections
        self.claim_output_projection = layers.Dense(self.claim_dim, use_bias=False)
        self.evidence_output_projection = layers.Dense(self.evidence_dim, use_bias=False)
        
        # Dropout layer
        self.dropout = layers.Dropout(self.dropout_rate)
        
        super(MultiHeadCoAttention, self).build(input_shape)
    
    def split_heads(self, x):
        """Split the last dimension into (num_heads, head_dim) with fixed batch size"""
        batch_size = tf.shape(x)[0]
        seq_len = tf.shape(x)[1]
        
        # Reshape with fixed dimensions where possible
        x = tf.reshape(x, [batch_size, seq_len, self.num_heads, self.head_dim])
        return tf.transpose(x, [0, 2, 1, 3])
    
    def call(self, inputs, training=None):
        # Unpack inputs
        claim, evidence = inputs

        # Get projection for claim (Q, K, V)
        claim_proj = self.claim_projection(claim)

        # Split claim projection into query, key, value
        claim_proj_split = tf.split(claim_proj, 3, axis=-1)
        claim_query, claim_key, claim_value = claim_proj_split

        # Get projection for evidence (Q, K, V)
        evidence_proj = self.evidence_projection(evidence)

        # Split evidence projection into query, key, value
        evidence_proj_split = tf.split(evidence_proj, 3, axis=-1)
        evidence_query, evidence_key, evidence_value = evidence_proj_split

        # Split heads
        claim_query_heads = self.split_heads(claim_query)
        claim_key_heads = self.split_heads(claim_key)
        claim_value_heads = self.split_heads(claim_value)

        evidence_query_heads = self.split_heads(evidence_query)
        evidence_key_heads = self.split_heads(evidence_key)
        evidence_value_heads = self.split_heads(evidence_value)

        # Claim attends to evidence with fixed scaling factor
        claim_evidence_scores = tf.matmul(claim_query_heads, evidence_key_heads, transpose_b=True)
        claim_evidence_scores = claim_evidence_scores / tf.sqrt(tf.cast(self.head_dim, tf.float32))

        claim_evidence_attention = tf.nn.softmax(claim_evidence_scores, axis=-1)
        claim_evidence_attention = self.dropout(claim_evidence_attention, training=training)

        claim_context = tf.matmul(claim_evidence_attention, evidence_value_heads)

        # Evidence attends to claim with fixed scaling factor
        evidence_claim_scores = tf.matmul(evidence_query_heads, claim_key_heads, transpose_b=True)
        evidence_claim_scores = evidence_claim_scores / tf.sqrt(tf.cast(self.head_dim, tf.float32))

        evidence_claim_attention = tf.nn.softmax(evidence_claim_scores, axis=-1)
        evidence_claim_attention = self.dropout(evidence_claim_attention, training=training)

        evidence_context = tf.matmul(evidence_claim_attention, claim_value_heads)

        # Combine heads and transpose back
        claim_context = tf.transpose(claim_context, [0, 2, 1, 3])
        evidence_context = tf.transpose(evidence_context, [0, 2, 1, 3])

        # Instead of reshaping to force seq_len=1, perform average pooling over the sequence dimension:
        claim_context = tf.reduce_mean(claim_context, axis=1, keepdims=True)
        evidence_context = tf.reduce_mean(evidence_context, axis=1, keepdims=True)

        # Reshape with fixed dimensions where possible
        batch_size = tf.shape(claim)[0]
        claim_context = tf.reshape(claim_context, [batch_size, 1, self.attention_dim])
        evidence_context = tf.reshape(evidence_context, [batch_size, 1, self.attention_dim])

        # Project back to original dimensions using Dense layers
        claim_output = self.claim_output_projection(claim_context)
        evidence_output = self.evidence_output_projection(evidence_context)

        return claim_output, evidence_output

In [None]:
# Load the model with custom layer
model = tf.keras.models.load_model(
    model_file_path, 
    custom_objects={'MultiHeadCoAttention': MultiHeadCoAttention}
)

I0000 00:00:1743699756.935031   34163 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1743699756.936292   34163 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1743699756.936467   34163 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1743699756.937450   34163 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

### Predict the Results

In [8]:
results = model.predict([claim_embeddings, evidence_embeddings])

I0000 00:00:1743699757.776063   34363 service.cc:146] XLA service 0x7c7ff8003480 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743699757.776085   34363 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Ti Laptop GPU, Compute Capability 8.6
2025-04-03 18:02:37.786808: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-04-03 18:02:37.834348: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907
2025-04-03 18:02:37.854512: W external/local_xla/xla/service/gpu/nvptx_compiler.cc:762] The NVIDIA driver's CUDA version is 12.2 which is older than the ptxas CUDA version (12.3.107). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward

[1m102/147[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 1000us/step

I0000 00:00:1743699758.403518   34363 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step 


#### Threshold the Results

In [9]:
threshold_results = (results > threshold).astype(int)

### Write the Results to CSV

In [13]:
results_df = pd.DataFrame(threshold_results, columns=['prediction'])
results_df.head(10)

Unnamed: 0,prediction
0,0
1,0
2,1
3,0
4,0
5,0
6,0
7,0
8,0
9,0


In [None]:
# results_df.to_csv(output_file, index=False)