In [None]:
import numpy as np
X = np.load('../train_data/train_feature_matrix.npy')
y = np.load("../train_data/train_gesture_labels.npy")

(7500, 128)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv1D, MaxPooling1D, Flatten, 
                                    Dense, Dropout, BatchNormalization)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load data
X = np.load('../train_data/train_feature_matrix.npy')
y = np.load("../train_data/train_gesture_labels.npy")

# Verify data
print(f"X shape: {X.shape}, y shape: {y.shape}")
print("Class distribution:", np.bincount(y))

# Preprocessing
X = np.nan_to_num(X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Better Feature Selection - Use mutual information
from sklearn.feature_selection import mutual_info_classif

# Get top 384 features (reduced from 512 to prevent overfitting)
print("Selecting best features...")
mi_scores = mutual_info_classif(X_scaled, y, random_state=42)
top_features_idx = np.argsort(mi_scores)[-384:]  # Using 384 features (64x6)
X_selected = X_scaled[:, top_features_idx]

# Reshape to (samples, 64, 6) - better than (64,8)
X_reshaped = X_selected.reshape(-1, 64, 6)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_reshaped, y, test_size=0.2, random_state=42, stratify=y
)
y_train = tf.keras.utils.to_categorical(y_train, 6)
y_test = tf.keras.utils.to_categorical(y_test, 6)

# Optimized CNN Model
def build_optimized_cnn():
    model = Sequential([
        # Conv Block 1
        Conv1D(64, 5, activation='relu', input_shape=(64, 6), padding='same'),
        BatchNormalization(),
        MaxPooling1D(2),
        Dropout(0.2),
        
        # Conv Block 2
        Conv1D(128, 5, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(2),
        Dropout(0.3),
        
        # Conv Block 3
        Conv1D(256, 3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(2),
        Dropout(0.4),
        
        Flatten(),
        
        # Dense layers
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        
        Dense(6, activation='softmax')
    ])
    
    optimizer = Adam(learning_rate=0.00075)
    model.compile(optimizer=optimizer,
                loss='categorical_crossentropy',
                metrics=['accuracy'])
    return model

model = build_optimized_cnn()
model.summary()

# Enhanced callbacks
callbacks = [
    EarlyStopping(patience=12, monitor='val_accuracy', 
                 restore_best_weights=True, min_delta=0.001),
    ReduceLROnPlateau(factor=0.2, patience=6, min_lr=1e-6)
]

# Training
history = model.fit(
    X_train, y_train,
    epochs=80,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=callbacks,
    verbose=1
)

# Evaluation
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_acc:.4f}")

# Plot training history
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.tight_layout()
plt.show()

In [31]:
# Look for strange values
import pandas as pd
print("Feature statistics:")
print(pd.DataFrame(X).describe())

Feature statistics:
               0            1            2            3            4    \
count  7500.000000  7500.000000  7500.000000  7500.000000  7500.000000   
mean      0.036374    38.155156     2.302027     0.764887     0.035192   
std       0.013591     2.828549     0.000407     0.035307     0.014225   
min       0.005167    29.296875     2.298264     0.607000     0.006414   
25%       0.027108    35.937500     2.301877     0.741000     0.025549   
50%       0.033248    37.890625     2.302128     0.765000     0.031604   
75%       0.042682    40.625000     2.302296     0.791000     0.040492   
max       0.126178    44.531250     2.302575     0.865000     0.139137   

               5            6            7            8            9    ...  \
count  7500.000000  7500.000000  7500.000000  7500.000000  7500.000000  ...   
mean     33.004740     2.301947     0.651065     0.029273    24.975729  ...   
std       2.872149     0.000456     0.043059     0.013923     4.261392  ... 