In [None]:
#!/usr/bin/env python
# coding: utf-8

# # Gesture UI Recognition
# 
# Goal: Create a model that identifies hand gestures from images
# 
# This code closely follows the structure and libraries used in the Pokemon classification example

# I have this included to suppress some tensorflow warnings and errors

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import tensorflow as tf
import numpy as np

import pandas as pd

In [None]:
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

In [None]:
STUDENT_ID = 385433

In [None]:
# ## Setting Up the Dataset
# 
# Starting with a smaller image size (128x128) for faster training, just like the Pokemon example

IMG_SIZE = (128, 128)
batch_size = 32

# For HAGRID dataset:
train_dir = "HAGRID_dataset"

In [None]:
# Training dataset (70%)
train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    image_size=IMG_SIZE, 
    validation_split=0.3,  # 30% will be split between validation and test
    subset='training', 
    label_mode='categorical',
    seed=STUDENT_ID, 
    batch_size=batch_size
)

In [None]:
# Validation dataset (10% of total)
validation_dataset = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    image_size=IMG_SIZE, 
    validation_split=0.3,
    subset='validation',
    label_mode='categorical',
    seed=STUDENT_ID, 
    batch_size=batch_size
)

# In this case we're using the validation set as both validation and test
# We'll split it further as was done in the Pokemon example
val_batches = tf.data.experimental.cardinality(validation_dataset) // 3
test_dataset = validation_dataset.skip(val_batches)
validation_dataset = validation_dataset.take(val_batches)

In [None]:
num_classes = len(train_dataset.class_names)

In [None]:
class_names = train_dataset.class_names
print(f"Found {num_classes} classes: {class_names}")

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.cache().prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.cache().prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.cache().prefetch(buffer_size=AUTOTUNE)


In [None]:

# Basic dataset info
print(train_dataset)


In [None]:

from tensorflow.keras import layers


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_dataset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        label_index = np.argmax(labels[i])
        plt.title(class_names[label_index])
        plt.axis("off")
plt.show()

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
model.add(layers.Rescaling(1./255))
model.add(layers.Conv2D(16, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

In [None]:
model.summary()

In [None]:
# Early stopping callback to prevent overfitting
callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=5, 
    restore_best_weights=True
)

In [None]:
# Train the model
history = model.fit(
    train_dataset, 
    validation_data=validation_dataset, 
    epochs=100, 
    callbacks=[callback]
)



In [None]:
# ## Model 2: CNN with Data Augmentation
# 
# Adding data augmentation to improve generalization, following the Pokemon example
data_augmentation_layers = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
])

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
model.add(data_augmentation_layers)
model.add(layers.Rescaling(1./255))
model.add(layers.Conv2D(16, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

In [None]:
history_aug = model.fit(
    train_dataset, 
    validation_data=validation_dataset, 
    epochs=100, 
    callbacks=[callback]
)

In [None]:
# ## Model 3: CNN with Data Augmentation and Dropout
# 
# Adding dropout to further combat overfitting, just like in the Pokemon example


model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
model.add(data_augmentation_layers)
model.add(layers.Rescaling(1./255))
model.add(layers.Conv2D(16, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Dropout(0.2))
model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Flatten())
model.add(layers.Dropout(0.2))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)


callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=5, 
    restore_best_weights=True
)

history_dropout = model.fit(
    train_dataset, 
    validation_data=validation_dataset, 
    epochs=100, 
    callbacks=[callback]
)

In [None]:
# ## Model 4: Grayscale Images
# 
# Let's create a dataset with grayscale images for comparison
# This is an addition to the Pokemon example which only used RGB


# Load grayscale images
train_dataset_gray = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    image_size=IMG_SIZE, 
    validation_split=0.3,
    subset='training', 
    label_mode='categorical',
    seed=STUDENT_ID, 
    batch_size=batch_size,
    color_mode='grayscale'
)

validation_dataset_gray = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    image_size=IMG_SIZE, 
    validation_split=0.3,
    subset='validation',
    label_mode='categorical',
    seed=STUDENT_ID, 
    batch_size=batch_size,
    color_mode='grayscale'
)

# Split validation into validation and test
val_batches = tf.data.experimental.cardinality(validation_dataset_gray) // 3
test_dataset_gray = validation_dataset_gray.skip(val_batches)
validation_dataset_gray = validation_dataset_gray.take(val_batches)

# Optimize loading
train_dataset_gray = train_dataset_gray.cache().prefetch(buffer_size=AUTOTUNE)
validation_dataset_gray = validation_dataset_gray.cache().prefetch(buffer_size=AUTOTUNE)
test_dataset_gray = test_dataset_gray.cache().prefetch(buffer_size=AUTOTUNE)

# We need to repeat the grayscale channel 3 times to match our model input shape
def expand_grayscale(images, labels):
    images = tf.repeat(images, 3, axis=-1)
    return images, labels

train_dataset_gray = train_dataset_gray.map(expand_grayscale)
validation_dataset_gray = validation_dataset_gray.map(expand_grayscale)
test_dataset_gray = test_dataset_gray.map(expand_grayscale)


In [None]:
# Let's see what a grayscale image looks like
plt.figure(figsize=(10, 5))
for images, labels in train_dataset_gray.take(1):
    for i in range(5):
        plt.subplot(1, 5, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        label_index = np.argmax(labels[i])
        plt.title(class_names[label_index])
        plt.axis("off")
plt.show()


In [None]:
# Build a CNN model for grayscale
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
model.add(data_augmentation_layers)
model.add(layers.Rescaling(1./255))
model.add(layers.Conv2D(16, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)


In [None]:
history_gray = model.fit(
    train_dataset_gray, 
    validation_data=validation_dataset_gray, 
    epochs=100, 
    callbacks=[callback]
)


In [None]:
# ## Transfer Learning
# 
# Following the Pokemon example, we'll use transfer learning with a pre-trained model

# VGG16 requires images of at least 32x32
# Let's use 224x224 which is more standard for VGG16
IMG_SIZE_TL = (224, 224)

# Reload datasets with new image size
train_dataset_tl = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    image_size=IMG_SIZE_TL, 
    validation_split=0.3,
    subset='training', 
    label_mode='categorical',
    seed=STUDENT_ID, 
    batch_size=batch_size
)

validation_dataset_tl = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    image_size=IMG_SIZE_TL, 
    validation_split=0.3,
    subset='validation',
    label_mode='categorical',
    seed=STUDENT_ID, 
    batch_size=batch_size
)

# Split validation into validation and test
val_batches = tf.data.experimental.cardinality(validation_dataset_tl) // 3
test_dataset_tl = validation_dataset_tl.skip(val_batches)
validation_dataset_tl = validation_dataset_tl.take(val_batches)

# Optimize loading
train_dataset_tl = train_dataset_tl.cache().prefetch(buffer_size=AUTOTUNE)
validation_dataset_tl = validation_dataset_tl.cache().prefetch(buffer_size=AUTOTUNE)
test_dataset_tl = test_dataset_tl.cache().prefetch(buffer_size=AUTOTUNE)


In [None]:
# Load VGG16 pre-trained on ImageNet (same as Pokemon example)
base_model = tf.keras.applications.VGG16(
    weights='imagenet',
    input_shape=(IMG_SIZE_TL[0], IMG_SIZE_TL[1], 3),
    include_top=False
)

# Freeze the base model
base_model.trainable = False


In [None]:

# Define input layer
inputs = tf.keras.Input(shape=(IMG_SIZE_TL[0], IMG_SIZE_TL[1], 3))

# For data augmentation
data_augmentation_layers_tl = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
])
x = data_augmentation_layers_tl(inputs)

# Preprocess input for VGG16
x = tf.keras.applications.vgg16.preprocess_input(x)

# Pass through base model
x = base_model(x, training=False)

# Add classification head
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(num_classes)(x)  # No softmax, following Pokemon example

# Create model
model = tf.keras.Model(inputs, outputs)


In [None]:

# Using from_logits=True since we're not using softmax
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)


In [None]:

model.summary()

In [None]:

callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=5, 
    restore_best_weights=True
)


In [None]:

history_tl = model.fit(
    train_dataset_tl, 
    validation_data=validation_dataset_tl, 
    epochs=100, 
    callbacks=[callback]
)


In [None]:
# ## Transfer Learning with Dropout
# Continuing to follow the Pokemon example, we'll add dropout to the transfer learning model


# Define input layer
inputs = tf.keras.Input(shape=(IMG_SIZE_TL[0], IMG_SIZE_TL[1], 3))

# For data augmentation
x = data_augmentation_layers_tl(inputs)

# Preprocess input for VGG16
x = tf.keras.applications.vgg16.preprocess_input(x)

# Pass through base model
x = base_model(x, training=False)

# Add classification head with dropout
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(num_classes)(x)  # No softmax

# Create model
model = tf.keras.Model(inputs, outputs)


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)


In [None]:
model.summary()


In [None]:
callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=5, 
    restore_best_weights=True
)


In [None]:
history_tl_dropout = model.fit(
    train_dataset_tl, 
    validation_data=validation_dataset_tl, 
    epochs=100, 
    callbacks=[callback]
)

In [1]:
# ## Transfer Learning with Larger Images
# 
# Following the Pokemon example, which found that 299x299 images gave the best results

# In[44]:

IMG_SIZE_LARGE = (299, 299)

# Reload datasets with larger image size
train_dataset_large = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    image_size=IMG_SIZE_LARGE, 
    validation_split=0.3,
    subset='training', 
    label_mode='categorical',
    seed=STUDENT_ID, 
    batch_size=batch_size
)

validation_dataset_large = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    shuffle=True,
    image_size=IMG_SIZE_LARGE, 
    validation_split=0.3,
    subset='validation',
    label_mode='categorical',
    seed=STUDENT_ID, 
    batch_size=batch_size
)

# Split validation into validation and test
val_batches = tf.data.experimental.cardinality(validation_dataset_large) // 3
test_dataset_large = validation_dataset_large.skip(val_batches)
validation_dataset_large = validation_dataset_large.take(val_batches)

# Optimize loading
train_dataset_large = train_dataset_large.cache().prefetch(buffer_size=AUTOTUNE)
validation_dataset_large = validation_dataset_large.cache().prefetch(buffer_size=AUTOTUNE)
test_dataset_large = test_dataset_large.cache().prefetch(buffer_size=AUTOTUNE)

# In[45]:

# Load VGG16 for larger images
base_model_large = tf.keras.applications.VGG16(
    weights='imagenet',
    input_shape=(IMG_SIZE_LARGE[0], IMG_SIZE_LARGE[1], 3),
    include_top=False
)
base_model_large.trainable = False

# In[44]:

# Define input layer
inputs = tf.keras.Input(shape=(IMG_SIZE_LARGE[0], IMG_SIZE_LARGE[1], 3))

# For data augmentation
data_augmentation_layers_large = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
])
x = data_augmentation_layers_large(inputs)

# Preprocess input for VGG16
x = tf.keras.applications.vgg16.preprocess_input(x)

# Pass through base model
x = base_model_large(x, training=False)

# Add classification head
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(num_classes)(x)  # No softmax

# Create model
model = tf.keras.Model(inputs, outputs)

# In[45]:

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

# In[46]:

model.summary()

# In[47]:

callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=8, 
    restore_best_weights=True
)

# In[48]:

history_large = model.fit(
    train_dataset_large, 
    validation_data=validation_dataset_large, 
    epochs=100, 
    callbacks=[callback]
)

# ## Evaluation
# 
# Let's evaluate our best model on the test set

# In[49]:

# Create a probability model for predictions
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])

# In[50]:

# Evaluate on test dataset
test_loss, test_accuracy = model.evaluate(test_dataset_large)
print(f"Test accuracy: {test_accuracy:.4f}")

# ## Confusion Matrix
# Let's create a confusion matrix to understand per-class performance

# In[51]:

import matplotlib.pyplot as plt

# Get predictions
predictions = np.array([])
true_labels = np.array([])

for x, y in test_dataset_large:
    pred = model(x)
    pred = tf.nn.softmax(pred)
    pred_class = tf.argmax(pred, axis=1).numpy()
    true_class = tf.argmax(y, axis=1).numpy()
    
    predictions = np.concatenate([predictions, pred_class]) if predictions.size else pred_class
    true_labels = np.concatenate([true_labels, true_class]) if true_labels.size else true_class

# Create confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(true_labels, predictions)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45, ha='right')
plt.yticks(tick_marks, class_names)

# Add text annotations to the confusion matrix
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.show()

# ## Testing on Custom Images
# 
# Let's test our model on some custom images of gestures
# This part is specific to the project requirements

# In[ ]:

from PIL import Image

# Replace these with paths to your own gesture images
custom_image_paths = [
    "gesture1.jpg",
    "gesture2.jpg",
    "gesture3.jpg",
    "gesture4.jpg"
]

# In[ ]:

plt.figure(figsize=(15, 10))

for i, image_path in enumerate(custom_image_paths):
    # Load and resize image
    try:
        img = Image.open(image_path).resize((IMG_SIZE_LARGE[0], IMG_SIZE_LARGE[1]))
        img_array = np.array(img)
        img_array = np.expand_dims(img_array, 0)  # Add batch dimension
        
        # Make prediction
        predictions = probability_model.predict(img_array)
        predicted_class = np.argmax(predictions[0])
        confidence = predictions[0][predicted_class]
        
        # Display image and prediction
        plt.subplot(2, 2, i + 1)
        plt.imshow(img)
        plt.title(f"Predicted: {class_names[predicted_class]}\nConfidence: {confidence:.2f}")
        plt.axis("off")
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")

plt.tight_layout()
plt.show()

# ## Summary of Results
# 
# Let's create a summary table to compare all our models, following the assignment requirements

# In[ ]:

# Create a dictionary to store results
model_results = {
    "Model": [
        "Basic CNN", 
        "CNN with Data Augmentation", 
        "CNN with Augmentation & Dropout",
        "CNN with Grayscale Images",
        "Transfer Learning (VGG16, 224x224)",
        "Transfer Learning with Dropout",
        "Transfer Learning (VGG16, 299x299)"
    ],
    "Training Accuracy": [
        max(history.history['accuracy']),
        max(history_aug.history['accuracy']),
        max(history_dropout.history['accuracy']),
        max(history_gray.history['accuracy']),
        max(history_tl.history['accuracy']),
        max(history_tl_dropout.history['accuracy']),
        max(history_large.history['accuracy'])
    ],
    "Validation Accuracy": [
        max(history.history['val_accuracy']),
        max(history_aug.history['val_accuracy']),
        max(history_dropout.history['val_accuracy']),
        max(history_gray.history['val_accuracy']),
        max(history_tl.history['val_accuracy']),
        max(history_tl_dropout.history['val_accuracy']),
        max(history_large.history['val_accuracy'])
    ],
    "Test Accuracy": [
        "-", "-", "-", "-", "-", "-", test_accuracy
    ],
    "Training Time (epochs)": [
        len(history.history['accuracy']),
        len(history_aug.history['accuracy']),
        len(history_dropout.history['accuracy']),
        len(history_gray.history['accuracy']),
        len(history_tl.history['accuracy']),
        len(history_tl_dropout.history['accuracy']),
        len(history_large.history['accuracy'])
    ],
    "Parameters": [
        model.count_params(),
        model.count_params(),
        model.count_params(),
        model.count_params(),
        model.count_params(),
        model.count_params(),
        model.count_params()
    ]
}

# Create DataFrame and display
results_df = pd.DataFrame(model_results)
print(results_df)

# Save results to CSV for the paper
results_df.to_csv("model_comparison_results.csv", index=False)

# ## Save the Best Model

# In[ ]:

# Save the model for future use
model.save("best_gesture_recognition_model")

print("Training and evaluation complete!")

NameError: name 'tf' is not defined