# Siamese Training: Training the Model

#### General Steps to Follow

1. Importing Packages
2. Defining x_train, x_test, y_train, y_test
3. Building and training the siamese network
4. Model Evaluation

## 1) Importing Packages

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Flatten, Dense, Conv2D, MaxPooling2D, Layer,Concatenate, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

2025-05-23 17:00:11.267425: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748008811.328404  259461 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748008811.350266  259461 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748008811.475472  259461 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748008811.475511  259461 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748008811.475514  259461 computation_placer.cc:177] computation placer alr

In [53]:
tf.__version__

'2.19.0'

### ----------------------------------------------------------------------------------------------------------------------------------------------------------

## 2) Defining x_train, x_test, y_train, y_test

#### Loading the training and test data from "other data" folder

In [2]:
train_data = np.load("../other-data/train_data.npy", allow_pickle = True)
test_data = np.load("../other-data/test_data.npy"  , allow_pickle = True)

* x_train and x_test will contain pairs of the anchor image and the validation image(positive or negative image).
* y_train and y_test will contain the label of each pair:
  - 1 if the pairs are similar images.
  - 0 if the pairs are different images.

In [3]:
x_train = train_data[:,0:2]
y_train = train_data[:,2]
x_test = test_data[:,0:2]
y_test = test_data[:,2]

#### Reshaping the input

In [4]:
x1_train = x_train[:,0]                   #anchor images
x1_train = np.array(x1_train.tolist())    
x2_train = x_train[:,1]                   #validation images(positive/negative)
x2_train = np.array(x2_train.tolist())

x1_test = x_test[:,0]                    #anchor images
x1_test = np.array(x1_test.tolist())
x2_test = x_test[:,1]                    #validation images(positive/negative)
x2_test = np.array(x2_test.tolist())

y_train = tf.convert_to_tensor(y_train.tolist())
y_test = tf.convert_to_tensor(y_test.tolist())

2025-05-23 17:00:24.225076: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-05-23 17:00:24.225102: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:167] env: CUDA_VISIBLE_DEVICES="-1"
2025-05-23 17:00:24.225108: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:170] CUDA_VISIBLE_DEVICES is set to -1 - this hides all GPUs from CUDA
2025-05-23 17:00:24.225114: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:178] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module
2025-05-23 17:00:24.225119: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:183] retrieving CUDA diagnostic information for host: amroPC
2025-05-23 17:00:24.225122: I external/local_xla/xla/stream_executor/cuda/cuda_d

#### Checking the shapes

In [5]:
print("Train Data:")
print("Shape of anchor images    : ", x1_train.shape)
print("Shape of validation images: ", x2_train.shape)
print("Shape of labels           : ", y_train.shape)

print("--------------------------------------------------")

print("Test Data:")
print("Shape of anchor images    : ", x1_test.shape)
print("Shape of validation images: ", x2_test.shape)
print("Shape of labels           : ", y_test.shape)

Train Data:
Shape of anchor images    :  (961, 105, 105, 3)
Shape of validation images:  (961, 105, 105, 3)
Shape of labels           :  (961,)
--------------------------------------------------
Test Data:
Shape of anchor images    :  (240, 105, 105, 3)
Shape of validation images:  (240, 105, 105, 3)
Shape of labels           :  (240,)


### ----------------------------------------------------------------------------------------------------------------------------------------------------------

## 3) Building and training the siamese network

### 3.1 Building the base of the network

In [6]:
inp_shape = [105,105,3]

In [7]:
def make_base_network():
    model = Sequential(
        [
            Input(shape = inp_shape, name = "input_image"),
            Conv2D(64, (10, 10), activation = 'relu'),
            MaxPooling2D(64, (2,2), padding = 'same'),
            
            Conv2D(128, (7, 7), activation = 'relu'),
            MaxPooling2D(64, (2,2), padding = 'same'),
            
            Conv2D(128, (4, 4), activation = 'relu'),
            MaxPooling2D(64, (2,2), padding = 'same'),
            
            Conv2D(256, (4, 4), activation = 'relu'),
            
            Flatten(),
            
            Dense(4096, activation = 'sigmoid')
        ], name = "BaseNetwork"
    )
    
    return model

In [8]:
base_model = make_base_network()

In [9]:
base_model.summary()

### --------------------------------------------------------------------------------

### 3.2 Building tthe L1Dist layer

In [10]:
class L1Dist(Layer):
    def __init__(self, **kwargs):
        super(L1Dist, self).__init__(**kwargs)

    def call(self, anchor, validation):
        return tf.abs(anchor - validation)

### --------------------------------------------------------------------------------

### 3.3 Defining the siamese model

In [11]:
def make_siamese_model():
    
    # Anchor input image to the network
    anc_image = Input(shape = inp_shape, name = "input_image")
    
    # Validation input image to the network
    validation_image = Input(shape = inp_shape, name = "Validation_image")
    
    # creating a base model
    base_model = make_base_network()
    
    # Encoding the anchor image
    anchor = base_model(anc_image)

    # Encoding the validation image
    validation = base_model(validation_image)
    
    # Using L1Dist Layer to calculate the L1 distance between the two encodings
    distance_layer = L1Dist()
    distance_layer._name = "distance_layer"
    distance = distance_layer(anchor, validation)

    
    # Defining the output layer
    output_layer = Dense(1, activation = 'linear')(distance)
    
    siamese_model = Model(inputs = [anc_image, validation_image], outputs = output_layer, name = "SiameseNetwork")
    
    return siamese_model

In [20]:
siamese_model = make_siamese_model()
siamese_model.summary()

### --------------------------------------------------------------------------------

### 3.4 Compiling and training the siamese model

In [21]:
siamese_model.compile(
    optimizer = Adam(learning_rate = 0.0001),
    loss = BinaryCrossentropy(from_logits = True)
)

In [22]:
siamese_model.fit([x1_train, x2_train], y_train, epochs = 10)

Epoch 1/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m230s[0m 7s/step - loss: 0.6128
Epoch 2/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 7s/step - loss: 0.4892
Epoch 3/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 7s/step - loss: 0.3685
Epoch 4/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 7s/step - loss: 0.3159
Epoch 5/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 7s/step - loss: 0.2467
Epoch 6/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 7s/step - loss: 0.2252
Epoch 7/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 7s/step - loss: 0.1961
Epoch 8/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 7s/step - loss: 0.1431
Epoch 9/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 7s/step - loss: 0.3555
Epoch 10/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 7s/step - loss: 0.1618

<keras.src.callbacks.history.History at 0x7883e2fd0520>

### ----------------------------------------------------------------------------------------------------------------------------------------------------------

## 4) Model Evaluation

In [39]:
def model_eval(y, y_hat):
    m = y.shape[0]
    
    y_hat = y_hat.numpy()
    for i in range(len(y_hat)):
        if(y_hat[i] >= 0.5):
            y_hat[i] = 1
        else:
            y_hat[i] = 0
    
    sum = 0
    for i in range(y.shape[0]):
        if int(y[i]) == int(y_hat[i]):
            sum+=1
    accuracy = 100*(sum/m)
    print("Accuracy =", accuracy)

#### Evaluation on training data

In [42]:
output1 = siamese_model.predict([x1_train, x2_train])

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 646ms/step


In [43]:
y_hat = tf.nn.sigmoid(output1)
y = y_train
model_eval(y, y_hat)

Accuracy = 92.7159209157128


  if int(y[i]) == int(y_hat[i]):


#### Evaluation on test data

In [44]:
output2 = siamese_model.predict([x1_test, x2_test])

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 668ms/step


In [45]:
y_hat = tf.nn.sigmoid(output2)
y = y_test
model_eval(y, y_hat)

Accuracy = 85.83333333333333


  if int(y[i]) == int(y_hat[i]):


### Saving the model

In [46]:
siamese_model.save("../weights/siamese_model.h5")



### Loading the model

In [48]:
model = tf.keras.models.load_model('../weights/siamese_model.h5', 
                                   custom_objects={'L1Dist':L1Dist, 'BinaryCrossentropy':tf.losses.BinaryCrossentropy})

