In [10]:
import numpy as np
import tensorflow as tf
import os

from triplet_dataset_loader import *
from TL_class import SiameseTripletModel
from TL_siamese_network import generate_siamese_triplet_network
from top_accuracy_functions import *

In [11]:
database_path = "../databases/my-fruit-recognition"
folder_path_walk = True
split_ratio = (0.8, 0.1, 0.1)
image_size = (100,100)
batch_size = 32

auto = tf.data.AUTOTUNE

learning_rate = 0.0001
steps_per_epoch = 50
validation_steps = 10
epochs = 5

# Model Creation & Training

In [12]:
train_dataset, val_dataset, test_dataset = create_dataset(database_path, split_ratio, image_size, batch_size, folder_path_walk)

triplet_siamese_network = generate_siamese_triplet_network(image_size)

triplet_siamese_model = SiameseTripletModel(triplet_siamese_network)

triplet_siamese_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate))
triplet_siamese_model.summary()

Label names: ['Banana', 'Apple/Apple A', 'Kiwi B', 'Apple/Apple F', 'guava B', 'Tomatoes', 'Mango', 'Persimmon', 'Kiwi C', 'Apple/Apple C', 'Apple/Apple B', 'Peach', 'Pitaya', 'muskmelon', 'Apple/Apple E', 'Carambola', 'Pomegranate', 'Plum', 'kiwi A', 'Orange', 'Apple/Apple D', 'Pear', 'guava A']
number of labels: 23
Processing label: Banana, Number of images: 3027
Processing label: Apple/Apple A, Number of images: 692
Processing label: Kiwi B, Number of images: 1067
Processing label: Apple/Apple F, Number of images: 2030
Processing label: guava B, Number of images: 1626
Processing label: Tomatoes, Number of images: 2171
Processing label: Mango, Number of images: 4154
Processing label: Persimmon, Number of images: 2072
Processing label: Kiwi C, Number of images: 1025
Processing label: Apple/Apple C, Number of images: 1002
Processing label: Apple/Apple B, Number of images: 740
Processing label: Peach, Number of images: 2629
Processing label: Pitaya, Number of images: 2501
Processing lab

In [13]:
triplet_siamese_model.load_weights("../triplet_siamese_model_2_23_epochs.weights.h5")

In [14]:
# history = triplet_siamese_model.fit(
#     train_dataset,
#     validation_data=val_dataset,
#     steps_per_epoch=steps_per_epoch,
#     validation_steps=validation_steps,
#     epochs=epochs
# )

In [15]:
# triplet_siamese_model.save_weights('5epoch_model.weights.h5')

# Extracting embedding layers

In [16]:
embedding_model = triplet_siamese_network.get_layer("Embedding")

# Check if the embedding model is correctly extracted
# print(embedding_model.summary())

# Get reference images and their embeddings

In [17]:
# Define the base path for the test images
reference_test_folder = "../databases/my-fruit-recognition"

# Initialize an empty dictionary to store reference image embeddings with labels
reference_embeddings_dict = {}
label_names = []

# Loop through each subfolder in the base_test_folder
for root, dirs, files in os.walk(reference_test_folder):
  # Skip hidden folders (".DS_Store")
  files = [f for f in files if f != ".DS_Store"]

  # Check if there are files in the subfolder
  if files:
    files_sorted = sorted(files)
    reference_image_path = os.path.join(root, files_sorted[0])

    # Get relative path from base_test_folder
    label_name = os.path.relpath(root, reference_test_folder)
    label_names.append(label_name)

    mapF = MapFunction(image_size)
    preprocessed_reference_image = mapF.decode_and_resize(reference_image_path)
    reference_image_tensor = tf.expand_dims(preprocessed_reference_image, axis=0)
    embedding = embedding_model.predict(reference_image_tensor)

    # Convert the embedding to a hashable type (e.g., tuple or string)
    hashable_embedding = tuple(embedding.flatten())  # Convert to a tuple

    # Store the hashable embedding and relative path label in the dictionary
    reference_embeddings_dict[hashable_embedding] = label_name

print("Class names:", label_names)
print("Number of Reference Embeddings:", len(reference_embeddings_dict))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 626ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

# Get query images and predict their embeddings

In [None]:
# Initialize an empty dictionary to store test embeddings with labels
test_embeddings_dict = {}

# Generate embeddings for the test images and pair them with labels
for image_path, label in test_dataset.items():
    # Preprocess the image using the MapFunction
    preprocessed_image = mapF.decode_and_resize(image_path)

    # Convert the preprocessed image to a tensor and add batch dimension
    image_tensor = tf.expand_dims(preprocessed_image, axis=0)

    # Generate embedding for the image using the embedding model
    embedding = embedding_model.predict(image_tensor)

    # Convert embedding numpy array to tuple for hashable key
    embedding_tuple = tuple(embedding.flatten())

    # Store the embedding tuple and label directly in the dictionary
    test_embeddings_dict[embedding_tuple] = label

# Print the number of test embeddings generated
print("Number of Test Embeddings:", len(test_embeddings_dict))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35

In [None]:
print(test_embeddings_dict.values())

In [None]:
def calculate_top3_accuracy(test_data, reference_data):
    total_tests = len(test_data)
    correct_top3_count = 0

    for test_embedding, test_label in test_data.items():
        test_embedding_np = np.array(test_embedding)  # Convert tuple back to numpy array
        print(type(list(reference_data.keys())[0]))
        closest_3_embeddings = top3_accuracy(test_embedding_np, list(reference_data.keys()))

        closest_3_labels = [reference_data[embedding] for embedding in closest_3_embeddings]

        # print(test_label)
        # for label in closest_3_labels:
        #     print(label)
        # print(correct_top3_count)
        if test_label in closest_3_labels:
            correct_top3_count += 1
        # print(correct_top3_count)

    top3_accuracy_value = correct_top3_count / total_tests * 100
    return top3_accuracy_value



def top3_accuracy(vector_anchor, list_of_vectors):
    # if len(vector_anchor).shape > 1:
    #     vector_anchor = vector_anchor.flatten()
    distances = [(vector, euclidean_distance(vector_anchor, vector)) for vector in list_of_vectors]
    distances.sort(key=lambda x: x[1])
    closest_3_vecs = [item[0] for item in distances[:3]]
    return closest_3_vecs

# Calculate top 3 accuracy for the test dataset

In [None]:
top3_acc = calculate_top3_accuracy(test_embeddings_dict, reference_embeddings_dict)
print(f"Top-3 Accuracy: {top3_acc:.2f}%")

## calculate accuracies for 1 image

In [None]:
query_image_path = "../databases/my-fruit-recognition/Banana/Banana01.png"
query_image = mapF.decode_and_resize(query_image_path)

query_image = tf.expand_dims(query_image, axis=0)  # Add batch dimension
query_embedding = embedding_model.predict(query_image)

## Top 1 accuracy

In [None]:
closest_vector = top1_accuracy(query_embedding, first_embeddings)
closest_image_index = np.argmin([euclidean_distance(closest_vector, vec) for vec in first_embeddings])

In [None]:
show_top1_accuracy(first_image_paths, closest_image_index, query_image_path, mapF)

## Top 3 accuracy

In [None]:
closest_3_vectors = top3_accuracy(query_embedding, first_embeddings)

closest_3_indices = [np.argmin([euclidean_distance(vec, ref_vec) for ref_vec in first_embeddings]) for vec in closest_3_vectors]

In [None]:
show_top3_accuracy(first_image_paths, closest_3_indices, query_image_path, mapF)