In [2]:
import os
import glob
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from sklearn.preprocessing import LabelEncoder

In [3]:
# Set folder path containing JSON files
folder_path = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/keypoints" 

# List all JSON files in the folder
json_files = glob.glob(os.path.join(folder_path, "*.json"))

# Function to get maximum length for a part within keypoints of one file
def get_max_length(keypoints, part):
    lengths = []
    for kp in keypoints:
        if kp.get(part) is not None:
            lengths.append(np.array(kp[part]).flatten().shape[0])
    return max(lengths) if lengths else 0

# Compute global expected lengths for each part across all files
parts = ['pose', 'face', 'left_hand', 'right_hand']
global_expected = {part: 0 for part in parts}
all_data = []
all_labels = []
for file in json_files:
    with open(file, "r") as f:
        data = json.load(f)
    all_data.append(data)
    all_labels.append(data["gloss"])
    for part in parts:
        max_len = get_max_length(data["keypoints"], part)
        if max_len > global_expected[part]:
            global_expected[part] = max_len

print("Global expected lengths:", global_expected)
print("Number of files loaded:", len(all_data))

Global expected lengths: {'pose': 0, 'face': 0, 'left_hand': 0, 'right_hand': 0}
Number of files loaded: 0


In [4]:
# Extract features from keypoints using global expected lengths
def extract_features(keypoints, expected_lengths):
    features = []
    for kp in keypoints:
        frame_features = []
        for part in parts:
            # Check if the part exists and is non-empty
            if kp.get(part) is not None and len(kp[part]) > 0:
                vals = np.array(kp[part]).flatten().tolist()
                frame_features.extend(vals)
            else:
                frame_features.extend([0] * expected_lengths[part])
        features.append(frame_features)
    return np.array(features)

# Process each file to get a list of feature tensors (timesteps x feature_dim)
feature_list = []
filtered_labels = []
for data in all_data:
    if not data["keypoints"]:
        continue
    features = extract_features(data["keypoints"], global_expected)
    if features.size == 0:
        continue
    tensor_feat = torch.tensor(features, dtype=torch.float32)
    feature_list.append(tensor_feat)
    filtered_labels.append(data["gloss"])

# Pad sequences (batch, seq, feature)
X_tensor = pad_sequence(feature_list, batch_first=True)
print("Padded features shape:", X_tensor.shape)

# Normalize features (using overall mean and std)
mean = X_tensor.mean()
std = X_tensor.std() + 1e-5  # avoid division by zero
X_tensor = (X_tensor - mean) / std
print("Feature tensor normalized")

RuntimeError: received an empty list of sequences

In [None]:
# Encode labels from filtered files
le = LabelEncoder()
labels_encoded = le.fit_transform(filtered_labels)
y_tensor = torch.tensor(labels_encoded, dtype=torch.long)
print("Encoded labels:", labels_encoded)

In [None]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out, _ = self.lstm(x)  # out: (batch, timesteps, hidden_size)
        # Use the output from the last timestep
        out = out[:, -1, :]
        out = self.dropout(out)
        out = self.fc(out)
        return out

input_size = X_tensor.shape[2]
hidden_size = 64
num_layers = 2
num_classes = len(le.classes_)
model = LSTMClassifier(input_size, hidden_size, num_layers, num_classes)
print(model)

In [None]:
criterion = nn.CrossEntropyLoss()
# Use a higher learning rate to see faster changes (adjust as needed)
optimizer = optim.Adam(model.parameters(), lr=0.01)
num_epochs = 30

model.train()
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_tensor)
    loss = criterion(outputs, y_tensor)
    loss.backward()
    optimizer.step()

# Calculate accuracy
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == y_tensor).sum().item()
    accuracy = correct / y_tensor.size(0)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy*100:.2f}%")

# Save the model
torch.save(model.state_dict(), "lstm_model.pth")

In [None]:
model.eval()
with torch.no_grad():
    pred = model(X_tensor)
    predicted_classes = torch.argmax(pred, dim=1).numpy()
    predicted_labels = le.inverse_transform(predicted_classes)
    print("Predicted labels:", predicted_labels)