# Imports

If running in ITDC, Jupyter might complain about not finding CUDA drivers - we disregard this because we're not using a CUDA GPU.

In [3]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, Model

2024-09-29 02:40:01.430741: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-29 02:40:01.626681: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-29 02:40:01.703956: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-29 02:40:01.704004: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-29 02:40:01.705634: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

# Base model
To construct the Siamese neural network, we're using two [VGG16 models](https://arxiv.org/abs/1409.1556) as feature extractors. All input images are 400x400 pixels with 3 color channels. To minimize computational cost, we freeze the pretrained CNN layers and train only the final comparison layers.

In [4]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(400, 400, 3))

for layer in base_model.layers:
    layer.trainable = False

2024-09-29 02:40:13.629272: I tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.cc:118] Created 1 TensorFlow NextPluggableDevices. Physical device type: XPU


# Siamese network construction

We extract features for both images, flatten them, and compute their difference. This is fed into the fully-connected comparison layers.

In [5]:
def build_comparison_model():
    input_1 = layers.Input(shape=(400, 400, 3))
    input_2 = layers.Input(shape=(400, 400, 3))

    features_1 = base_model(input_1)
    features_2 = base_model(input_2)

    flattened_1 = layers.Flatten()(features_1)
    flattened_2 = layers.Flatten()(features_2)

    subtract = layers.Subtract()([flattened_1, flattened_2])
    
    x = layers.Dense(256, activation='relu')(subtract)
    x = layers.Dense(128, activation='relu')(x)
    output = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=[input_1, input_2], outputs=output)
    return model

In [6]:
comparison_model = build_comparison_model()
comparison_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Loading the dataset
The model trains on a dataset of 2000 images - one correct and one marginally incorrect (labeled) image pair for each of 500 CSS sample rulesets. See `generate_data.py` for generation of training data. Note that each reference `i` only has two *distinct* associated images:

`(ref_i_right.png, ref_i_wrong.png), (ref_i_right.png, ref_i_right.png)`

So, for space efficiency, we only store `ref_i_right.png` and `ref_i_wrong.png` and construct the lists of image paths manually.

In [7]:
def load_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, (400, 400))
    return image / 255.0

def load_pairs(image1_path, image2_path, label):
    image1 = load_image(image1_path)
    image2 = load_image(image2_path)
    return (image1, image2), label

In [11]:
image_paths_1 = [f"dataset/ref_{i}_right.png" for i in range(1, 501) for _ in range(2)]
image_paths_2 = [f"dataset/ref_{i}_{either}.png" for i in range(1, 501) for either in ["right", "wrong"]]
labels = [j for _ in range(1, 501) for j in range(1, -1, -1)]

train_dataset = tf.data.Dataset.from_tensor_slices((image_paths_1, image_paths_2, labels))
train_dataset = train_dataset.map(load_pairs).batch(32)

# Training

In [12]:
comparison_model.fit(train_dataset, epochs=10)
comparison_model.save('siamese_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


# Testing

In [23]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy as np

model = load_model('siamese.h5')

def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(400, 400))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = img / 255.0
    return img

image1_path = "test_right_3.png"
image2_path = "test_wrong_3.png"

image1 = preprocess_image(image1_path)
image2 = preprocess_image(image2_path)

input_pair = [image1, image2]

prediction = model.predict(input_pair)
print(f"Prediction: {prediction}")

Prediction: [[0.9223337]]
