# CNN-based Keypoint Classification

This notebook trains a CNN (Convolutional Neural Network) model for hand gesture keypoint classification.
Unlike the original MLP-based approach, this model uses Conv1D layers to process the keypoint coordinates as a sequence,
followed by dense layers for classification.

## Overview
1. **Data Preprocessing**: Load and preprocess keypoint data
2. **Model Building**: Create a CNN model using Conv1D layers
3. **Training**: Train the model with early stopping and checkpointing
4. **Evaluation**: Evaluate model performance with confusion matrix
5. **Export**: Convert to TensorFlow Lite format for deployment

In [None]:
import csv

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

RANDOM_SEED = 42

# Specify each path

In [None]:
dataset = 'model/cnn_keypoint_classifier/keypoint.csv'
model_save_path = 'model/cnn_keypoint_classifier/cnn_keypoint_classifier.keras'
model_hdf5_path = 'model/cnn_keypoint_classifier/cnn_keypoint_classifier.hdf5'
tflite_save_path = 'model/cnn_keypoint_classifier/cnn_keypoint_classifier.tflite'

# Set number of classes

In [None]:
NUM_CLASSES = 4

# Dataset reading

In [None]:
X_dataset = np.loadtxt(dataset, delimiter=',', dtype='float32', usecols=list(range(1, (21 * 2) + 1)))

In [None]:
y_dataset = np.loadtxt(dataset, delimiter=',', dtype='int32', usecols=(0))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_dataset, y_dataset, train_size=0.75, random_state=RANDOM_SEED)

# Reshape data for CNN

The Conv1D layer expects input shape (batch_size, sequence_length, features).
We reshape the 42 features (21 keypoints Ã— 2 coordinates) into (21, 2) representing 21 keypoints with x, y coordinates.

In [None]:
# Reshape data for CNN input: (samples, 21 keypoints, 2 coordinates)
X_train_cnn = X_train.reshape(-1, 21, 2)
X_test_cnn = X_test.reshape(-1, 21, 2)

print(f"Training data shape: {X_train_cnn.shape}")
print(f"Test data shape: {X_test_cnn.shape}")

# Model building

We build a CNN model using Conv1D layers to process the keypoint sequence.
The architecture consists of:
- Conv1D layers to extract local features from keypoint sequences
- BatchNormalization for stable training
- MaxPooling1D for dimensionality reduction
- Dense layers for final classification

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Input((21, 2)),
    
    # First Conv1D block
    tf.keras.layers.Conv1D(32, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Dropout(0.2),
    
    # Second Conv1D block
    tf.keras.layers.Conv1D(64, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Dropout(0.3),
    
    # Flatten and Dense layers
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

In [None]:
model.summary()

In [None]:
# Model checkpoint callback
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    model_save_path, verbose=1, save_weights_only=False)
# Callback for early stopping
es_callback = tf.keras.callbacks.EarlyStopping(patience=20, verbose=1)

In [None]:
# Model compilation
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Model training

In [None]:
model.fit(
    X_train_cnn,
    y_train,
    epochs=1000,
    batch_size=128,
    validation_data=(X_test_cnn, y_test),
    callbacks=[cp_callback, es_callback]
)

In [None]:
# Model evaluation
val_loss, val_acc = model.evaluate(X_test_cnn, y_test, batch_size=128)

In [None]:
# Loading the saved model
model = tf.keras.models.load_model(model_save_path)

In [None]:
# Inference test
predict_result = model.predict(np.array([X_test_cnn[0]]))
print(np.squeeze(predict_result))
print(np.argmax(np.squeeze(predict_result)))

# Confusion matrix

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

def print_confusion_matrix(y_true, y_pred, report=True):
    labels = sorted(list(set(y_true)))
    cmx_data = confusion_matrix(y_true, y_pred, labels=labels)
    
    df_cmx = pd.DataFrame(cmx_data, index=labels, columns=labels)
 
    fig, ax = plt.subplots(figsize=(7, 6))
    sns.heatmap(df_cmx, annot=True, fmt='g' ,square=False)
    ax.set_ylim(len(set(y_true)), 0)
    plt.show()
    
    if report:
        print('Classification Report')
        print(classification_report(y_true, y_pred))

Y_pred = model.predict(X_test_cnn)
y_pred = np.argmax(Y_pred, axis=1)

print_confusion_matrix(y_test, y_pred)

# Convert to model for Tensorflow-Lite

In [None]:
# Save model in Keras format (for checkpointing)
model.save(model_save_path)

# Also save in HDF5 format for legacy compatibility
model.save(model_hdf5_path)

In [None]:
# Transform model (quantization)

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quantized_model = converter.convert()

open(tflite_save_path, 'wb').write(tflite_quantized_model)

# Inference test with TFLite model

In [None]:
interpreter = tf.lite.Interpreter(model_path=tflite_save_path)
interpreter.allocate_tensors()

In [None]:
# Get I / O tensor
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Input details:", input_details)
print("Output details:", output_details)

In [None]:
interpreter.set_tensor(input_details[0]['index'], np.array([X_test_cnn[0]]))

In [None]:
%%time
# Inference implementation
interpreter.invoke()
tflite_results = interpreter.get_tensor(output_details[0]['index'])

In [None]:
print(np.squeeze(tflite_results))
print(np.argmax(np.squeeze(tflite_results)))