In [15]:
import tensorflow.keras.backend as K
import tensorflow as tf
import cv2 as cv
import numpy as np
import utils.model_helper as mh
import os
import matplotlib.pyplot as plt

In [16]:
tf.__version__

'2.12.0'

In [17]:
DATASET_PATH = '/home/irizqy/ml_ws/bangkit-ws/data/bizz.it-sim_dataset'

In [18]:
im_path_arr = os.listdir(DATASET_PATH)
im_classes_arr = []

for file in im_path_arr:
    im_classes_arr.append(file.split('_')[0])

im_classes_arr = np.asarray(im_classes_arr)

In [19]:
classes = np.unique(im_classes_arr)

dict_keys = {val:key for key, val in enumerate(classes.flatten())}

In [20]:
grouped_im_path = [np.where(im_classes_arr == cls)[0] for cls in classes]

In [None]:
def make_train_test_pairs(ims_arr, percentage):
    train_pairs = []
    train_labels = []
    test_pairs = []
    test_labels = []

    for file in ims_arr:
        file_cls = file.split('_')[0]
        cls = dict_keys[file_cls]

        current_im = cv.imread(os.path.join(DATASET_PATH, file))
        current_im = cv.cvtColor(current_im, cv.COLOR_BGR2RGB)
        # current_im = cv.cvtColor(current_im, cv.COLOR_BGR2GRAY)
        current_im = cv.resize(current_im, (150, 150))
        current_im = current_im / 255

        pos_idx = np.random.choice(grouped_im_path[cls], 1)[0]
        pos_pair_im = cv.imread(os.path.join(DATASET_PATH, im_path_arr[pos_idx]))
        pos_pair_im = cv.cvtColor(pos_pair_im, cv.COLOR_BGR2RGB)
        # pos_pair_im = cv.cvtColor(pos_pair_im, cv.COLOR_BGR2GRAY)
        pos_pair_im = cv.resize(pos_pair_im, (150, 150))
        pos_pair_im = pos_pair_im / 255

        train_pairs.append((current_im, pos_pair_im))
        train_labels.append(1)

        neg_idx = np.random.choice(np.where(im_classes_arr != file_cls)[0], 1)[0]
        neg_pair_im = cv.imread(os.path.join(DATASET_PATH, im_path_arr[neg_idx]))
        neg_pair_im = cv.cvtColor(neg_pair_im, cv.COLOR_BGR2RGB)
        # neg_pair_im = cv.cvtColor(neg_pair_im, cv.COLOR_BGR2GRAY)
        neg_pair_im = cv.resize(neg_pair_im, (150, 150))
        neg_pair_im = neg_pair_im / 255

        train_pairs.append((current_im, neg_pair_im))
        train_labels.append(0)

    arr_length = len(train_pairs)
    num_of_data = int(percentage * arr_length)

    for i in range(num_of_data):
        rand_i = np.random.randint(arr_length - 1)
        test_pairs.append(train_pairs[rand_i])
        train_pairs.pop(rand_i)
        test_labels.append(train_labels[rand_i])
        train_labels.pop(rand_i)

        arr_length -= 1

    return np.asarray(train_pairs), np.asarray(train_labels), np.asarray(test_pairs), np.asarray(test_labels)

In [None]:
train_pairs, train_labels, test_pairs, test_labels = make_train_test_pairs(im_path_arr, .2)

In [None]:
w = 20
h = 15
fig = plt.figure(figsize=(8, 8))
columns = 8
rows = 8
for index, i in enumerate(range(1, (columns*rows +1)//2)):
    img = train_pairs[index][0]
    fig.add_subplot(rows, columns, 2*i - 1) 
    plt.title(train_labels[index])
    plt.imshow(img, cmap='gray')
    plt.axis('off')
    img = train_pairs[index][1]
    fig.add_subplot(rows, columns, 2*i)
    plt.imshow(img, cmap='gray')
    plt.axis('off')


In [None]:
IMG_SHAPE = (150, 150, 3)
BATCH_SIZE = 8
EPOCHS = 100

In [None]:
class SiameseModel:

    def __init__(self, input_shape, embedding_dim=224):
        self.input_shape = input_shape
        self.embedding_dim = embedding_dim

    def _build(self):
        inputs = tf.keras.layers.Input(self.input_shape)
        
        x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(inputs)
        x = tf.keras.layers.MaxPooling2D()(x)
        x = tf.keras.layers.Dropout(.1)(x)

        x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(x)
        x = tf.keras.layers.MaxPooling2D()(x)

        # x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(x)
        # x = tf.keras.layers.MaxPooling2D()(x)

        # x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(x)
        # x = tf.keras.layers.MaxPooling2D()(x)
        # x = tf.keras.layers.Dropout(.3)(x)

        # pooled_output = tf.keras.layers.GlobalAveragePooling2D()(x)
        pooled_output = tf.keras.layers.Flatten()(x)
        outputs = tf.keras.layers.Dense(self.embedding_dim)(pooled_output)

        model = tf.keras.Model(inputs, outputs)

        return model

In [None]:
# sm = SiameseModel(IMG_SHAPE)

# featureExtractor = sm._build()
# featureExtractor.summary()

base_cnn =  tf.keras.applications.resnet.ResNet50(
    weights='imagenet', input_shape=IMG_SHAPE, include_top=False
)

cnn_model = base_cnn.get_layer('conv5_block3_2_conv')

flatten = tf.keras.layers.Flatten()(cnn_model.output)
# dense_1 = tf.keras.layers.Dense(units=256, activation='relu')(flatten)
dense_2 = tf.keras.layers.Dense(units=128, activation='relu')(flatten)

featureExtractor = tf.keras.Model(base_cnn.input, dense_2)

for layer in featureExtractor.layers[:-15]:
    layer.trainable = False

In [None]:
featureExtractor.summary()

In [None]:
def euclidean_distance(vectors):
	# unpack the vectors into separate lists
	(feats_A, feats_B) = vectors
	# compute the sum of squared distances between the vectors
	sum_squared = K.sum(K.square(feats_A - feats_B), axis=1,
		keepdims=True)
	# return the euclidean distance between the vectors
	return K.sqrt(K.maximum(sum_squared, K.epsilon()))

In [None]:
def cosine_similarity(vectors):
    (featsA, featsB) = vectors
    sum_product = K.sum(featsA*featsB)
    sum_squared_featsA = K.sqrt(K.sum(featsA**2, keepdims=1, axis=1))
    sum_squared_featsB = K.sqrt(K.sum(featsB**2, keepdims=1, axis=1))
    sum_mul_feats = sum_squared_featsA * sum_squared_featsB

    return sum_product / sum_mul_feats

In [None]:
# configure the siamese network
print("[INFO] building siamese network...")
imgA = tf.keras.layers.Input(shape=IMG_SHAPE)
imgB = tf.keras.layers.Input(shape=IMG_SHAPE)
featsA = featureExtractor(imgA)
featsB = featureExtractor(imgB)

In [None]:
# finally, construct the siamese network
distance = tf.keras.layers.Lambda(euclidean_distance)([featsA, featsB])
outputs = tf.keras.layers.Dense(1, activation="sigmoid")(distance)
model = tf.keras.Model(inputs=[imgA, imgB], outputs=outputs)

In [None]:
model.summary()

In [None]:
tf.keras.backend.clear_session()
# compile the model
print("[INFO] compiling model...")
model.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(),
	metrics=["accuracy"])
# train the model
print("[INFO] training model...")
history = model.fit(
	[train_pairs[:, 0], train_pairs[:, 1]], train_labels[:],
	validation_data=([test_pairs[:, 0], test_pairs[:, 1]], test_labels[:]),
	batch_size=BATCH_SIZE, 
	epochs=EPOCHS)

In [None]:
def plot_training(H):
	# construct a plot that plots and saves the training history
	plt.style.use("ggplot")
	plt.figure()
	plt.plot(H.history["loss"], label="train_loss")
	plt.plot(H.history["val_loss"], label="val_loss")
	plt.plot(H.history["accuracy"], label="train_acc")
	plt.plot(H.history["val_accuracy"], label="val_acc")
	plt.title("Training Loss and Accuracy")
	plt.xlabel("Epoch #")
	plt.ylabel("Loss/Accuracy")
	plt.legend(loc="lower left")

In [None]:
plot_training(history)

In [None]:
model.save('/home/irizqy/ml_ws/bangkit-ws/src/logo-detector/im_similar')

In [2]:
model = tf.keras.models.load_model('/home/irizqy/ml_ws/bangkit-ws/src/logo-detector/im_similar')

2023-06-08 00:38:55.141804: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-06-08 00:38:55.142394: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [13]:
from PIL import Image, ImageOps

preds = []

test_data = os.listdir('/home/irizqy/ml_ws/bangkit-ws/data/bizz.it-sim_dataset')

path_im1_1 = '/home/irizqy/ml_ws/bangkit-ws/src/logo-detector/cropped-logo0.jpg'
path_im1_2 = '/home/irizqy/Downloads/yamie-panda.jpeg'

path_im2_1 = '/home/irizqy/ml_ws/bangkit-ws/data/bizz.it-sim_dataset/sabana_12.jpg'
path_im2_2 = '/home/irizqy/Downloads/Screenshot from 2023-06-08 00-18-31.png'

im1 = mh.adjust_im(path_im1_2, (150, 150))
im2 = mh.adjust_im(path_im2_2, (150, 150))

model.predict((im1, im2))[0][0]

# for file in test_data:
#     im2 = cv.imread(os.path.join('/home/irizqy/ml_ws/bangkit-ws/data/bizz.it-sim_dataset', file))
#     print(file)
#     im2 = cv.resize(im2, (150, 150))
#     im2 = cv.cvtColor(im2, cv.COLOR_BGR2RGB)

#     im2 = im2 / 255

#     image2 = np.expand_dims(im2, 0)
#     pred = model.predict((im1, image2))[0][0]
#     preds.append(pred)

# preds = np.asarray(preds)



0.0017043775

In [None]:
max_idx = np.argmax(preds)
print(max_idx, preds[max_idx])

In [None]:
test_data[max_idx]