This notebook was done by Felix Hagenbrock (it contains some duplicate code with the neural network notebooks)

**Loading images and labels**

In [26]:
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from collections import Counter
import math
from sklearn.model_selection import train_test_split

def count_classes(data_dir):
    labels = []
    for label in os.listdir(data_dir):
        for image_file in os.listdir(os.path.join(data_dir, label)):
            labels.append(label)
    return Counter(labels)

def load_data(data_dir):
    images, labels = [], []
    for label in os.listdir(data_dir):
        for image_file in os.listdir(os.path.join(data_dir, label)):
            img = image.load_img(os.path.join(data_dir, label, image_file), target_size=(256, 256))
            img_tensor = image.img_to_array(img)
            img_tensor = np.expand_dims(img_tensor, axis=0)
            img_tensor /= 255.  # normalize to [0,1] range

            images.append(img_tensor)
            labels.extend([label] * len(img_tensor))

    return np.concatenate(images, axis=0), np.array(labels)

def augment_data(images, labels, class_counts):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True)

    augmented_images, augmented_labels = [], []
    max_images = max(class_counts.values())
    for img_tensor, label in zip(images, labels):
        aug_images = [img_tensor]
        augmentation_factor = max_images / class_counts[label]
        for batch in datagen.flow(np.expand_dims(img_tensor, axis=0), batch_size=1):
            aug_images.append(np.squeeze(batch, axis=0))
            if len(aug_images) >= math.ceil(20 * augmentation_factor):  # original + augmented images
                break
        aug_images = np.stack(aug_images, axis=0)  # stack images along a new axis
        augmented_images.append(aug_images)
        augmented_labels.extend([label] * len(aug_images))

    return np.concatenate(augmented_images, axis=0), np.array(augmented_labels)

# Load all data without augmentation
images, labels = load_data('ds')

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels)

class_counts = count_classes('ds')
# Augment the training data
# X_train, y_train = augment_data(X_train, y_train, class_counts)

print(X_train.shape, y_train.shape)
print(Counter(y_train))
print(X_test.shape, X_test.shape)
print(Counter(y_test))

(536, 256, 256, 3) (536,)
Counter({'Palm': 299, 'Fist': 163, 'Thumb': 74})
(134, 256, 256, 3) (134, 256, 256, 3)
Counter({'Palm': 75, 'Fist': 41, 'Thumb': 18})


In [36]:
print(y_train)
print(y_test)


['Thumb' 'Thumb' 'Thumb' ... 'Palm' 'Palm' 'Palm']
['Fist' 'Fist' 'Palm' 'Fist' 'Fist' 'Palm' 'Palm' 'Palm' 'Palm' 'Thumb'
 'Fist' 'Fist' 'Thumb' 'Fist' 'Fist' 'Thumb' 'Fist' 'Thumb' 'Palm' 'Thumb'
 'Thumb' 'Palm' 'Palm' 'Palm' 'Palm' 'Palm' 'Palm' 'Fist' 'Fist' 'Palm'
 'Fist' 'Palm' 'Palm' 'Palm' 'Palm' 'Fist' 'Palm' 'Palm' 'Fist' 'Palm'
 'Palm' 'Palm' 'Thumb' 'Palm' 'Fist' 'Palm' 'Palm' 'Thumb' 'Thumb' 'Thumb'
 'Palm' 'Palm' 'Fist' 'Palm' 'Palm' 'Thumb' 'Palm' 'Fist' 'Palm' 'Fist'
 'Palm' 'Palm' 'Palm' 'Fist' 'Palm' 'Fist' 'Palm' 'Palm' 'Fist' 'Palm'
 'Fist' 'Palm' 'Thumb' 'Thumb' 'Palm' 'Palm' 'Palm' 'Fist' 'Fist' 'Fist'
 'Fist' 'Palm' 'Palm' 'Palm' 'Palm' 'Fist' 'Fist' 'Palm' 'Palm' 'Palm'
 'Thumb' 'Palm' 'Fist' 'Fist' 'Palm' 'Palm' 'Palm' 'Palm' 'Palm' 'Palm'
 'Fist' 'Palm' 'Palm' 'Palm' 'Thumb' 'Palm' 'Palm' 'Fist' 'Fist' 'Fist'
 'Palm' 'Palm' 'Fist' 'Palm' 'Fist' 'Palm' 'Thumb' 'Palm' 'Palm' 'Palm'
 'Fist' 'Fist' 'Palm' 'Palm' 'Thumb' 'Palm' 'Palm' 'Palm' 'Thumb' 'Fist'
 'Palm' 

**DS split**

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, stratify=y_train)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
y_val = le.transform(y_val)



**Model teaching**

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
import tensorflow as tf
from keras.callbacks import EarlyStopping
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D, Dropout

K.clear_session(free_memory=True)

tf.random.set_seed(42)


model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25)) 
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25)) 
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5)) 
model.add(Dense(3, activation='softmax'))

#tf.keras.utils.plot_model(model)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stopping])





  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 683ms/step - accuracy: 0.4572 - loss: 5.4429 - val_accuracy: 0.7290 - val_loss: 0.9967
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 684ms/step - accuracy: 0.5802 - loss: 0.9605 - val_accuracy: 0.7383 - val_loss: 0.8788
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 722ms/step - accuracy: 0.6742 - loss: 0.8533 - val_accuracy: 0.7009 - val_loss: 0.7130
Epoch 4/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 666ms/step - accuracy: 0.7167 - loss: 0.6528 - val_accuracy: 0.7103 - val_loss: 0.5208
Epoch 5/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 676ms/step - accuracy: 0.7689 - loss: 0.5976 - val_accuracy: 0.7944 - val_loss: 0.4307
Epoch 6/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 687ms/step - accuracy: 0.8358 - loss: 0.3822 - val_accuracy: 0.9439 - val_loss: 0.3182
Epoch 7/100

<keras.src.callbacks.history.History at 0x2102cd58c10>

**Predict and evaluate**

In [39]:
from sklearn.metrics import accuracy_score, classification_report
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
accuracy = accuracy_score(y_test, y_pred)
unique, counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique, counts)))

unique, counts = np.unique(y_test, return_counts=True)
print(dict(zip(unique, counts)))

print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred, target_names=le.classes_))



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 128ms/step
{0: 130, 1: 239, 2: 59}
{0: 41, 1: 75, 2: 18}
Accuracy: 0.9701492537313433
              precision    recall  f1-score   support

        Fist       0.93      0.98      0.95        41
        Palm       0.99      1.00      0.99        75
       Thumb       1.00      0.83      0.91        18

    accuracy                           0.97       134
   macro avg       0.97      0.94      0.95       134
weighted avg       0.97      0.97      0.97       134



Save the Model

In [36]:
# Evaluate the model on the test set
_, accuracy = model.evaluate(X_test, y_test, verbose=0)

# Get the number of Conv2D and Dense layers in the model
num_conv_layers = len([layer for layer in model.layers if type(layer) is Conv2D])
num_dense_layers = len([layer for layer in model.layers if type(layer) is Dense])

# Create the filename
filename = f"cnn_{num_conv_layers}_conv_{num_dense_layers}_dense_with_dropout_without_augmentation{accuracy:.2f}_accuracy.keras"

# Save the model
model.save(f"./saved_models/{filename}")

Load a model

In [38]:
import tensorflow as tf
from tensorflow.keras.models import load_model
filename = "cnn_3_conv_2_dense_with_dropout_without_augmentation_test_set0.96_accuracy.keras"
model = tf.keras.models.load_model(f"./saved_models/{filename}")
model.summary()

Solution for second pipeline with conv layers:


Convolution Net

First Try:

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(3, activation='softmax'))

20 Minimum Augmentation 256x256, 96% accuracy -> saved this one

2nd try with dropout:

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))  # Dropout after first pooling layer
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))  # Dropout after second pooling layer
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))  # Dropout after third pooling layer
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  # Dropout before the final layer
model.add(Dense(3, activation='softmax'))

20 Minimum Augmentation 256x256, 98% -> saved

Same Model but without augmentated test_data:
97% accuracy -> saved

3rd try without augmenting any data, same model as in 2nd try:
92.5% accuracy -> saved