In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import os
from sklearn.metrics import precision_score
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from src.models.lstm import ExerciseLSTM
from src.data.exercise_data import ExerciseDataset
import numpy as np

X = np.load(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\landmarks_data.npy', allow_pickle=True)
y = np.load(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\labels_data.npy',    allow_pickle=True)

print(f"Loaded X shape: {X.shape}")
print(f"Loaded y shape: {y.shape}")

if y.ndim == 2 and y.shape[1] == 1:
    y = y.reshape(-1)
    
print(f"Loaded X shape: {X.shape}")
print(f"Loaded y shape: {y.shape}")

labels_to_keep = [0, 1, 4, 7, 9]
mask = np.isin(y, labels_to_keep)
X_filtered = X[mask]
y_filtered = y[mask]

print(f"Filtered X shape: {X_filtered.shape}")
print(f"Filtered y shape: {y_filtered.shape}")
print(f"Unique labels in filtered data: {np.unique(y_filtered)}")
label_mapping = {original_label: new_label for new_label, original_label in enumerate(labels_to_keep)}
y_mapped = np.array([label_mapping[label] for label in y_filtered], dtype=np.int64)
print(f"Mapped labels: {np.unique(y_mapped)}")
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_filtered, y_mapped, test_size=0.2, random_state=42, stratify=y_mapped)

train_dataset = ExerciseDataset(X_train, y_train)
test_dataset = ExerciseDataset(X_test, y_test)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define hyperparameters
hidden_sizes = [128, 256, 512]
learning_rates = [0.01, 0.001, 0.0001]
num_layers = [1]
num_epochs = 20

# File to save results
results_file = "hyperparameter_tuning_progress.csv"

# Load existing results
if os.path.exists(results_file):
    results_df = pd.read_csv(results_file)
    results = results_df.to_dict("records")
else:
    results = []

# Helper to check if hyperparameters have already been processed
def is_completed(hidden_size, lr, layers):
    return any(
        res["hidden_size"] == hidden_size and 
        res["learning_rate"] == lr and 
        res["num_layers"] == layers
        for res in results
    )

# Iterate over hyperparameters
for hidden_size in hidden_sizes:
    for lr in learning_rates:
        for layers in num_layers:
            # Skip if this configuration is already processed
            if is_completed(hidden_size, lr, layers):
                print(f"Skipping already completed: hidden_size={hidden_size}, learning_rate={lr}, num_layers={layers}")
                continue
            
            print(f"Processing: hidden_size={hidden_size}, learning_rate={lr}, num_layers={layers}")
            
            # Define model
            model = ExerciseLSTM(132, hidden_size, layers, len(labels_to_keep))
            model.to(device)
            
            # Define criterion and optimizer
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)

            # Track learning curves
            epoch_losses = []
            epoch_accuracies = []
            epoch_precisions = []

            # Train the model
            for epoch in range(num_epochs):
                model.train()
                total_loss = 0
                correct_train = 0
                total_train = 0

                for sequences, labels in train_loader:
                    sequences, labels = sequences.to(device), labels.to(device)
                    
                    # Forward pass
                    outputs = model(sequences)
                    loss = criterion(outputs, labels)

                    # Backward pass
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    # Track loss
                    total_loss += loss.item()

                    # Track accuracy during training
                    _, predicted = torch.max(outputs.data, 1)
                    total_train += labels.size(0)
                    correct_train += (predicted == labels).sum().item()
                
                # Calculate metrics for this epoch
                train_accuracy = 100 * correct_train / total_train
                epoch_losses.append(total_loss / len(train_loader))
                epoch_accuracies.append(train_accuracy)

                # Evaluate precision
                all_labels = []
                all_predictions = []
                model.eval()
                with torch.no_grad():
                    for sequences, labels in test_loader:
                        sequences, labels = sequences.to(device), labels.to(device)
                        outputs = model(sequences)
                        _, predicted = torch.max(outputs.data, 1)
                        all_labels.extend(labels.cpu().numpy())
                        all_predictions.extend(predicted.cpu().numpy())

                precision = precision_score(all_labels, all_predictions, average='weighted')
                epoch_precisions.append(precision)

            # Evaluate final test accuracy
            correct = 0
            total = 0
            model.eval()
            with torch.no_grad():
                for sequences, labels in test_loader:
                    sequences, labels = sequences.to(device), labels.to(device)
                    outputs = model(sequences)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            test_accuracy = 100 * correct / total
            result = {
                "hidden_size": hidden_size,
                "learning_rate": lr,
                "num_layers": layers,
                "test_accuracy": test_accuracy,
                "epoch_losses": epoch_losses,
                "epoch_accuracies": epoch_accuracies,
                "epoch_precisions": epoch_precisions,
            }
            results.append(result)

            print(f"Final Test Accuracy: {test_accuracy:.2f}%\n")

            # Save results after each iteration
            results_df = pd.DataFrame(results)
            results_df.to_csv(results_file, index=False)


Loaded X shape: (761, 300, 132)
Loaded y shape: (761, 1)
Loaded X shape: (761, 300, 132)
Loaded y shape: (761,)
Filtered X shape: (227, 300, 132)
Filtered y shape: (227,)
Unique labels in filtered data: [0 1 4 7 9]
Mapped labels: [0 1 2 3 4]
Using device: cuda
Skipping already completed: hidden_size=128, learning_rate=0.01, num_layers=1
Skipping already completed: hidden_size=128, learning_rate=0.001, num_layers=1
Skipping already completed: hidden_size=128, learning_rate=0.0001, num_layers=1
Skipping already completed: hidden_size=256, learning_rate=0.01, num_layers=1
Skipping already completed: hidden_size=256, learning_rate=0.001, num_layers=1
Skipping already completed: hidden_size=256, learning_rate=0.0001, num_layers=1
Skipping already completed: hidden_size=512, learning_rate=0.01, num_layers=1
Skipping already completed: hidden_size=512, learning_rate=0.001, num_layers=1
Skipping already completed: hidden_size=512, learning_rate=0.0001, num_layers=1
