In [5]:
import os, sys
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import random
import matplotlib.pyplot as plt

In [6]:
class Pair(object):
    def __init__(self,data):
        x, y = data
        self.x, self.y = np.array(x), np.array(y)

    def decode_img(self, img):
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.convert_image_dtype(img, tf.float32)
        return img

    def get_pairs(self):
        x, y = self.x, self.y
        pairs, labels = self.makePairs(len(np.unique(y)))
        element_1, element_2 = tf.data.Dataset.from_tensor_slices(pairs[:, 0]), tf.data.Dataset.from_tensor_slices(pairs[:, 1])
        labels = tf.data.Dataset.from_tensor_slices(labels)
        return (element_1, element_2, labels)

    def makePairs(self, num_classes):
        num_classes = num_classes
        x, y = self.x, self.y
        digit_indices = [np.where(y == i)[0] for i in range(num_classes)]

        pairs = list()
        labels = list()

        for idx1 in range(len(x)):
            x1 = x[idx1]
            label1 = y[idx1]
            idx2 = random.choice(digit_indices[label1])
            x2 = x[idx2]
            
            labels += list([1])
            pairs += [[x1, x2]]

            label2 = random.randint(0, num_classes-1)
            while label2 == label1:
                label2 = random.randint(0, num_classes-1)

            idx2 = random.choice(digit_indices[label2])
            x2 = x[idx2]
            
            labels += list([0])
            pairs += [[x1, x2]]
        
        return np.array(pairs), np.array(labels)

class Augment(object):

    def rotate_img(img):
        img = tf.keras.layers.RandomRotation(0.2)(img)
        return img
    
    def zoom_img(img):
        img = tf.keras.layers.RandomZoom(0.5)(img)
        return img

    def shift_img(img):
        img = tf.keras.layers.RandomShift(0.5)(img)
        return img

    def flip_img(img):
        img = tf.keras.layers.RandomFlip()(img)
        return img

    def shear_img(img):
        img = tf.keras.preprocessing.image.random_shear(img, 0.2)
        return img



In [7]:
def generate_data_for_siamese(DATA_DIR = "C:\\Users\\aeternum\\Documents\\GitHub\\Gesture-Recognition\\my_robot\\python_scripts\\data"):

    # DATA_DIR = 'data'
    IMAGE_DIR = f'{DATA_DIR}\\gestures'

    images = []
    labels = []

    # Map paths to images

    def augment_pairs(image_data_1,image_data_2,labels,augmentation_config):

        augmented_image_data_1 = image_data_1
        augmented_image_data_2 = image_data_2
        augmented_labels = labels

        if "rotation_range" in augmentation_config:
            rotated_1 = image_data_1.map(Augment.rotate_img)
            rotated_2 = image_data_2.map(Augment.rotate_img)

            augmented_image_data_1 = augmented_image_data_1.concatenate(rotated_1)
            augmented_image_data_2 = augmented_image_data_2.concatenate(rotated_2)
            augmented_labels = augmented_labels.concatenate(labels)

        if "width_shift_range" in augmentation_config:
            w_shifted_1 = image_data_1.map(Augment.shift_img)
            w_shifted_2 = image_data_2.map(Augment.shift_img)

            augmented_image_data_1 = augmented_image_data_1.concatenate(w_shifted_1)
            augmented_image_data_2 = augmented_image_data_2.concatenate(w_shifted_2)
            augmented_labels = augmented_labels.concatenate(labels)

        if "height_shift_range" in augmentation_config:
            h_shifted_1 = image_data_1.map(Augment.shift_img)
            h_shifted_2 = image_data_2.map(Augment.shift_img)

            augmented_image_data_1 = augmented_image_data_1.concatenate(h_shifted_1)
            augmented_image_data_2 = augmented_image_data_2.concatenate(h_shifted_2)
            augmented_labels = augmented_labels.concatenate(labels)

        if "zoom_range" in augmentation_config:
            zoomed_1 = image_data_1.map(Augment.zoom_img)
            zoomed_2 = image_data_2.map(Augment.zoom_img)

            augmented_image_data_1 = augmented_image_data_1.concatenate(zoomed_1)
            augmented_image_data_2 = augmented_image_data_2.concatenate(zoomed_2)
            augmented_labels = augmented_labels.concatenate(labels)

        if "flip_horizontal" in augmentation_config:
            flipped_1 = image_data_1.map(Augment.flip_img)
            flipped_2 = image_data_2.map(Augment.flip_img)

            augmented_image_data_1 = augmented_image_data_1.concatenate(flipped_1)
            augmented_image_data_2 = augmented_image_data_2.concatenate(flipped_2)
            augmented_labels = augmented_labels.concatenate(labels)

        if "shear_range" in augmentation_config:
            sheared_1 = image_data_1.map(Augment.shear_img)
            sheared_2 = image_data_2.map(Augment.shear_img)

            augmented_image_data_1 = augmented_image_data_1.concatenate(sheared_1)
            augmented_image_data_2 = augmented_image_data_2.concatenate(sheared_2)
            augmented_labels = augmented_labels.concatenate(labels)

        return (augmented_image_data_1,augmented_image_data_2,augmented_labels)


    def decode_img(img):
        img = tf.io.read_file(img)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, (224, 224))
        # img = tf.image.convert_image_dtype(img, tf.float32)
        return img

    
    for folder in os.listdir(IMAGE_DIR):
        for image in os.listdir(f'{IMAGE_DIR}/{folder}'):
            images.append(f'{IMAGE_DIR}/{folder}/{image}')
            labels.append(int(folder))

    pair_generator = Pair((images, labels))
    element_set_1, element_set_2, pair_labels =  pair_generator.get_pairs()

    # Evaluate the dataset

    element_set_1 = element_set_1.map(decode_img)
    element_set_2 = element_set_2.map(decode_img)
    pair_labels = pair_labels.map(lambda x: tf.one_hot(x, 2))

    return (element_set_1, element_set_2, pair_labels)

In [8]:
i1,i2,l = generate_data_for_siamese()

In [9]:
z = Augment()

In [16]:
def generate_data_for_classifier(DATA_DIR, batch_size = 32, target_size = (128,128), augmentation_config = {
    "rotation_range":30,
    "width_shift_range":0.2,
    "height_shift_range":0.2,
    "shear_range":0.2,
    "zoom_range":0.5,
    "horizontal_flip":False
}):

    # DATA_DIR = 'data'
    IMAGE_DIR = f'{DATA_DIR}/gestures'

    image_generator = ImageDataGenerator(
        rescale=1./255,
        rotation_range=augmentation_config['rotation_range'],
        width_shift_range=augmentation_config['width_shift_range'],
        height_shift_range=augmentation_config['height_shift_range'],
        shear_range=augmentation_config['shear_range'],
        zoom_range=augmentation_config['zoom_range'],
        horizontal_flip=augmentation_config['horizontal_flip'],
        )
    image_data = image_generator.flow_from_directory(
        IMAGE_DIR,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
    )

    return image_data

In [17]:
data = generate_data_for_classifier("C:/Users/aeternum/Documents/GitHub/Gesture-Recognition/my_robot/python_scripts/data")

Found 100 images belonging to 5 classes.


In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D

In [20]:
model = Sequential()

In [21]:
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128,128,3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))

In [22]:
for layer in model.layers:
    print(layer.output_shape)

(None, 126, 126, 32)
(None, 63, 63, 32)
(None, 61, 61, 64)
