# New Training

In [None]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Paths
LABELS_PATH = "/home/haggenmueller/asl_detection/machine_learning/models/lstm/label_to_index.json"
DATA_DIR = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/keypoints_npy"
MODEL_PATH = "/home/haggenmueller/asl_detection/machine_learning/models/lstm/best_lstm_model.pth"

# Parameters
SEQUENCE_LENGTH = 102  # Number of frames per sequence
INPUT_SIZE = 300  # Number of extracted keypoints (adjusted based on feature extraction)
BATCH_SIZE = 32
EPOCHS = 200
LR = 0.0001
PATIENCE = 5  # Early stopping patience

# Load labels
with open(LABELS_PATH, "r") as f:
    label_to_index = json.load(f)
    index_to_label = {v: k for k, v in label_to_index.items()}
    NUM_CLASSES = len(label_to_index)

# Dataset class
class ASLDataset(Dataset):
    def __init__(self, data_dir, labels_map, sequence_length):
        self.data_dir = data_dir
        self.sequence_length = sequence_length
        self.samples = []
        
        for file in os.listdir(data_dir):
            if file.endswith(".npy"):
                filename_parts = file.split("_")
                label_name = filename_parts[1]  # Extract label from filename structure
                if label_name in labels_map:
                    self.samples.append((os.path.join(data_dir, file), labels_map[label_name]))
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        file_path, label = self.samples[idx]
        keypoints = np.load(file_path)
        
        # Ignore face keypoints (first 1872 values)
        keypoints = keypoints[:, 468 * 4:]  # 4 values per keypoint (x, y, z, visibility)
        
        # Ensure all sequences are of the same length
        if keypoints.shape[0] < self.sequence_length:
            pad = np.zeros((self.sequence_length - keypoints.shape[0], keypoints.shape[1]))
            keypoints = np.vstack((keypoints, pad))
        else:
            keypoints = keypoints[:self.sequence_length]
        
        return torch.tensor(keypoints, dtype=torch.float32), torch.tensor(label, dtype=torch.long)
    
# LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.batch_norm = nn.BatchNorm1d(hidden_size)  # Batch Normalization
        self.dropout = nn.Dropout(0.3)  # Dropout to prevent overfitting
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.batch_norm(out[:, -1, :])  # Apply Batch Normalization
        out = self.dropout(out)  # Apply Dropout
        out = self.fc(out)
        return out

# Load data
dataset = ASLDataset(DATA_DIR, label_to_index, SEQUENCE_LENGTH)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Initialize model
model = LSTMModel(input_size=INPUT_SIZE, hidden_size=512, num_layers=3, num_classes=NUM_CLASSES)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Apply Xavier Initialization
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight.data)
    elif isinstance(m, nn.LSTM):
        for name, param in m.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param.data)

model.apply(init_weights)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

# Early stopping
best_loss = float("inf")
stopping_counter = 0

# Training loop
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for keypoints, labels in dataloader:
        keypoints, labels = keypoints.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(keypoints)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    
    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * correct / float(total)
    print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
    
    # Adjust learning rate
    scheduler.step(avg_loss)
    
    # Save best model if it improves
    if avg_loss < best_loss:
        best_loss = avg_loss
        stopping_counter = 0
        torch.save(model.state_dict(), MODEL_PATH)
        print(f"✅ Best model saved at epoch {epoch+1} with loss {avg_loss:.4f}")
    else:
        stopping_counter += 1
        if stopping_counter >= PATIENCE:
            print("Early stopping triggered.")
            break

print("Training completed! Best model saved at:", MODEL_PATH)

Epoch [1/200], Loss: 5.3631, Accuracy: 0.67%
✅ Best model saved at epoch 1 with loss 5.3631
Epoch [2/200], Loss: 5.3167, Accuracy: 0.96%
✅ Best model saved at epoch 2 with loss 5.3167
Epoch [3/200], Loss: 5.2880, Accuracy: 1.26%
✅ Best model saved at epoch 3 with loss 5.2880
Epoch [4/200], Loss: 5.1410, Accuracy: 1.50%
✅ Best model saved at epoch 4 with loss 5.1410
Epoch [5/200], Loss: 5.0037, Accuracy: 1.95%
✅ Best model saved at epoch 5 with loss 5.0037
Epoch [6/200], Loss: 4.9226, Accuracy: 2.66%
✅ Best model saved at epoch 6 with loss 4.9226
Epoch [7/200], Loss: 4.8310, Accuracy: 3.59%
✅ Best model saved at epoch 7 with loss 4.8310
Epoch [8/200], Loss: 4.6754, Accuracy: 4.68%
✅ Best model saved at epoch 8 with loss 4.6754
Epoch [9/200], Loss: 4.4711, Accuracy: 6.59%
✅ Best model saved at epoch 9 with loss 4.4711
Epoch [10/200], Loss: 4.3216, Accuracy: 8.04%
✅ Best model saved at epoch 10 with loss 4.3216
Epoch [11/200], Loss: 4.2373, Accuracy: 9.46%
✅ Best model saved at epoch 11 w

In [29]:
# Model evaluation
def evaluate_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for keypoints, labels in dataloader:
            keypoints, labels = keypoints.to(device), labels.to(device)
            outputs = model(keypoints)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

evaluate_model(model, dataloader)

Test Accuracy: 99.20%


99.19856459330144