In [1]:
# import the necessary packages
import tensorflow as tf
import numpy as np
import random
import os





# Define parametes

In [20]:


# model input image size
IMAGE_SIZE = (100,100)

# batch size and the buffer size

BATCH_SIZE = 256
BUFFER_SIZE = BATCH_SIZE * 2

# define autotune
AUTO = tf.data.AUTOTUNE

# define the training parameters
LEARNING_RATE = 0.0001
STEPS_PER_EPOCH = 20
VALIDATION_STEPS = 10
EPOCHS = 5

# define the path to save the model
OUTPUT_PATH = "output"
MODEL_PATH = os.path.join(OUTPUT_PATH, "siamese_network")
OUTPUT_IMAGE_PATH = os.path.join(OUTPUT_PATH, "output_image.png")

epochs = 10
batch_size = 128
margin = 1 

In [3]:
class MapFunction():
	def __init__(self, imageSize):
		# define the image width and height
		self.imageSize = imageSize
	def decode_and_resize(self, imagePath):
		# read and decode the image path
		image = tf.io.read_file(imagePath)
		image = tf.image.decode_jpeg(image, channels=3)
		# convert the image data type from uint8 to float32 and then resize
		# the image to the set image size
		image = tf.image.convert_image_dtype(image, dtype=tf.float32)
		image = tf.image.resize(image, self.imageSize)
		# return the image
		return image
	def __call__(self, pair, label):
		positive, negative=pair
		positive = self.decode_and_resize(positive)
		negative = self.decode_and_resize(negative)
		return ( positive, negative), label

# PairGenerator

In [4]:
class PairGenerator:
    def __init__(self, datasetPath):
        self.fruitNames = list()  # path to dir with fruits
        for folderName in os.listdir(datasetPath):
            absoluteFolderName = os.path.join(datasetPath, folderName)
            numImages = len(os.listdir(absoluteFolderName))
            if numImages > 1:
                self.fruitNames.append(absoluteFolderName)
        self.allFruit = self.generate_all_fruit_dict()
    def generate_all_fruit_dict(self):
        allFruit = dict()
        
        for fruitName in self.fruitNames:
            imageNames = os.listdir(fruitName) # all names of photo one fruit
            fruitPhotos = [
                os.path.join(fruitName, imageName) for imageName in imageNames
            ]
            allFruit[fruitName] = fruitPhotos
        return allFruit #all path photo in dict
    def get_next_element(self):
        i=0
        while True:
            i=i+1
                        
            
            imageNames = random.choice(self.fruitNames)
            temporaryNames = self.fruitNames.copy()
            temporaryNames.remove(imageNames)
            negativeNames = random.choice(temporaryNames)

            imagePhoto = random.choice(self.allFruit[imageNames])
            positivePhoto = random.choice(self.allFruit[imageNames])
            negativePhoto = random.choice(self.allFruit[negativeNames])

            yield ((imagePhoto, positivePhoto), 1) 
            yield ((imagePhoto, negativePhoto), 0)
            

            
            

# Creating model

In [21]:
input = tf.keras.layers.Input(shape=(100, 100, 3))
x = tf.keras.layers.BatchNormalization()(input)
x = tf.keras.layers.Conv2D(4, (5, 5), activation="tanh")(x)
x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
x = tf.keras.layers.Conv2D(16, (5, 5), activation="tanh")(x)
x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
x = tf.keras.layers.Flatten()(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(100, activation="tanh")(x)
embedding_network = tf.keras.Model(input, x)

input_1 = tf.keras.layers.Input((100, 100, 3))
input_2 = tf.keras.layers.Input((100, 100, 3))

# Calculate Distance

In [22]:

def euclidean_distance(vects):
    """Find the Euclidean distance between two vectors.

    Arguments:
        vects: List containing two tensors of same length.

    Returns:
        Tensor containing euclidean distance
        (as floating point value) between vectors.
    """

    x, y = vects
    sum_square = tf.keras.backend.sum(tf.keras.backend.square(x - y), axis=1, keepdims=True)
    return tf.keras.backend.sqrt(tf.keras.backend.maximum(sum_square, tf.keras.backend.epsilon()))

In [23]:
tower_1 = embedding_network(input_1)
tower_2 = embedding_network(input_2)

merge_layer = tf.keras.layers.Lambda(euclidean_distance, output_shape=(1,))(
    [tower_1, tower_2]
)
normal_layer = tf.keras.layers.BatchNormalization()(merge_layer)
output_layer = tf.keras.layers.Dense(1, activation="sigmoid")(normal_layer)
siamese = tf.keras.Model(inputs=[input_1, input_2], outputs=output_layer)

# Define the contrastive loss


In [24]:
def loss(margin=1):
    """Provides 'contrastive_loss' an enclosing scope with variable 'margin'.

    Arguments:
        margin: Integer, defines the baseline for distance for which pairs
                should be classified as dissimilar. - (default is 1).

    Returns:
        'contrastive_loss' function with data ('margin') attached.
    """

    def contrastive_loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)


        square_pred = tf.keras.backend.square(y_pred)
        margin_square = tf.keras.backend.square(tf.keras.backend.maximum(margin - y_pred, 0))
        return tf.keras.backend.mean((1 - y_true) * square_pred + (y_true) * margin_square)


    return contrastive_loss

In [25]:
siamese.compile(loss=loss(margin=margin), optimizer="RMSprop", metrics=["accuracy"])
siamese.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_5 (InputLayer)        [(None, 100, 100, 3)]        0         []                            
                                                                                                  
 input_6 (InputLayer)        [(None, 100, 100, 3)]        0         []                            
                                                                                                  
 model_2 (Functional)        (None, 100)                  807408    ['input_5[0][0]',             
                                                                     'input_6[0][0]']             
                                                                                                  
 lambda_1 (Lambda)           (None, 1)                    0         ['model_2[0][0]',       

TEST


In [26]:
import random
import os

class MapFunction():
    def __init__(self, imageSize):
        self.imageSize = imageSize

    def decode_and_resize(self, imagePath):
        image = tf.io.read_file(imagePath)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        image = tf.image.resize(image, self.imageSize)
        return image

    def __call__(self, pair, label):
        positive, negative = pair
        positive = self.decode_and_resize(positive)
        negative = self.decode_and_resize(negative)
        return (positive, negative), label


import os
import random

class PairGenerator1:
    def __init__(self, datasetPath, split_ratio=(0.7, 0.2, 0.1)):
        self.datasetPath = datasetPath
        self.split_ratio = split_ratio
        self.label_names = self._get_label_names()
        self.label_images = self._generate_label_images_dict()
        self.train_images, self.val_images, self.test_images = self._split_label_images()

    def _get_label_names(self):
        label_names = []
        for folder_name in os.listdir(self.datasetPath):
            folder_path = os.path.join(self.datasetPath, folder_name)
            if os.path.isdir(folder_path):
                subfolders = [os.path.join(folder_name, subfolder) for subfolder in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, subfolder))]
                if subfolders:
                    label_names.extend(subfolders)
                else:
                    label_names.append(folder_name)
        return label_names

    def _generate_label_images_dict(self):
        label_images = {}
        for label_name in self.label_names:
            label_path = os.path.join(self.datasetPath, label_name)
            if os.path.isdir(label_path):
                image_files = [os.path.join(label_path, imageName) for imageName in os.listdir(label_path) if os.path.isfile(os.path.join(label_path, imageName))]
            else:
                image_files = [os.path.join(self.datasetPath, label_name, imageName) for imageName in os.listdir(os.path.join(self.datasetPath, label_name)) if os.path.isfile(os.path.join(self.datasetPath, label_name, imageName))]
            label_images[label_name] = image_files
        return label_images

    def _split_label_images(self):
        train_images = []
        val_images = []
        test_images = []

        for label, images in self.label_images.items():
            random.shuffle(images)
            num_train = int(len(images) * self.split_ratio[0])
            num_val = int(len(images) * self.split_ratio[1])

            train_images.extend(images[:num_train])
            val_images.extend(images[num_train:num_train + num_val])
            test_images.extend(images[num_train + num_val:])

        return train_images, val_images, test_images

    def _get_pair(self, image_set):
        while True:
            positive_image = random.choice(image_set)
            negative_image = random.choice(image_set)

            label_positive = os.path.dirname(positive_image)
            label_negative = os.path.dirname(negative_image)

            if label_positive == label_negative:
                yield (positive_image, negative_image), 1
            else:
                yield (positive_image, negative_image), 0

    def get_train_element(self):
        return self._get_pair(self.train_images)

    def get_val_element(self):
        return self._get_pair(self.val_images)

    def get_test_element(self):
        return self._get_pair(self.test_images)





def create_dataset(path, img_size, batch_size):
    pair_generator = PairGenerator1(path)
    image_processor = MapFunction(img_size)

    train_dataset = tf.data.Dataset.from_generator(pair_generator.get_train_element,
                                                   output_signature=((tf.TensorSpec(shape=(), dtype=tf.string),
                                                                      tf.TensorSpec(shape=(), dtype=tf.string)),
                                                                     tf.TensorSpec(shape=(), dtype=tf.int32)))

    val_dataset = tf.data.Dataset.from_generator(pair_generator.get_val_element,
                                                 output_signature=((tf.TensorSpec(shape=(), dtype=tf.string),
                                                                    tf.TensorSpec(shape=(), dtype=tf.string)),
                                                                   tf.TensorSpec(shape=(), dtype=tf.int32)))

    test_dataset = tf.data.Dataset.from_generator(pair_generator.get_test_element,
                                                  output_signature=((tf.TensorSpec(shape=(), dtype=tf.string),
                                                                     tf.TensorSpec(shape=(), dtype=tf.string)),
                                                                    tf.TensorSpec(shape=(), dtype=tf.int32)))

    train_dataset = train_dataset.map(image_processor).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    val_dataset = val_dataset.map(image_processor).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    test_dataset = test_dataset.map(image_processor).batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return train_dataset, val_dataset, test_dataset



In [27]:
# dataset_path="/kaggle/input/fruit-recognition"
dataset_path = "../Training"
# dataset_path="C:/Users/tokar/Downloads/archive"
dataset_path=r"C:\Users\tokar\Downloads\archive"

train_data, val_data, test_data = create_dataset(dataset_path, (100, 100), 32)

In [12]:
label_names = []
for folder_name  in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder_name)
    if os.path.isdir(folder_path):
        subfolders = [os.path.join(folder_name, subfolder) for subfolder in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, subfolder))]
        if subfolders:
            label_names.extend(subfolders)
        else:
            label_names.append(folder_name)

            
print(label_names)



['Apple\\Apple_A', 'Apple\\Apple_B', 'Apple\\Apple_C', 'Apple\\Apple_D', 'Apple\\Apple_E', 'Apple\\Apple_F', 'Apple\\Total_Number_of_Apples', 'Banana', 'Carambola', 'Guava\\guava A', 'Guava\\guava B', 'Guava\\Guava total', 'Guava\\guava total final', 'Kiwi\\kiwi A', 'Kiwi\\Kiwi B', 'Kiwi\\Kiwi C', 'Kiwi\\Total Number of Kiwi fruit', 'Mango', 'muskmelon', 'Orange', 'Peach', 'Pear', 'Persimmon', 'Pitaya', 'Plum', 'Pomegranate', 'Tomatoes']


In [13]:
label_images = {}
for label_name in label_names:
    image_dir = os.path.join(dataset_path, label_name)
    image_files = []
    for imageName in os.listdir(image_dir):
        imagePath = os.path.join(image_dir, imageName)
        if os.path.isfile(imagePath):
            image_files.append(imagePath)
    label_images[label_name] = image_files# print(label_images)

    


In [14]:
import matplotlib.pyplot as plt

# def show_image_pair(pair):
#     plt.figure(figsize=(8, 4))
#     for i, image in enumerate(pair):
#         plt.subplot(1, 2, i + 1)
#         plt.imshow(image.numpy())
#         plt.axis("off")
#     plt.show()

# # Przykładowe wyświetlenie par obrazów
# for image_pair, label in train_data.take(3):  # Weź pierwsze 3 pary z danych treningowych
#     show_image_pair(image_pair)
#     print("Label:", label.numpy())

for key, val in label_images.items():
    print(key)
    x
len(label_images["Peach"])

Apple\Apple_A
Apple\Apple_B
Apple\Apple_C
Apple\Apple_D
Apple\Apple_E
Apple\Apple_F
Apple\Total_Number_of_Apples
Banana
Carambola
Guava\guava A
Guava\guava B
Guava\Guava total
Guava\guava total final
Kiwi\kiwi A
Kiwi\Kiwi B
Kiwi\Kiwi C
Kiwi\Total Number of Kiwi fruit
Mango
muskmelon
Orange
Peach
Pear
Persimmon
Pitaya
Plum
Pomegranate
Tomatoes


2629

KeyboardInterrupt: 

In [29]:
history = siamese.fit(
    train_data,
    validation_data=val_data,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_steps=VALIDATION_STEPS,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
siamese.save_weights("/kaggle/working/fruitrecognition.weights.h5", overwrite=True)

FileNotFoundError: [Errno 2] Unable to create file (unable to open file: name = '/kaggle/working/fruitrecognition.weights.h5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 302)

In [None]:
# first=tf.keras.Model(inputs=[input_1, input_2], outputs=output_layer)
# first.load_weights(
#    "/kaggle/working/360.weights.h5"
# )