In [1]:
from src.data.exercise_data import ExerciseDataset
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

# Load the data
X = np.load(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\landmarks_data.npy', allow_pickle=True)
y = np.load(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\labels_data.npy', allow_pickle=True)

print(f"Loaded X shape: {X.shape}")
print(f"Loaded y shape: {y.shape}")

if y.ndim == 2 and y.shape[1] == 1:
    y = y.reshape(-1)

print(f"Loaded X shape: {X.shape}")
print(f"Loaded y shape: {y.shape}")

# Filter labels to keep
labels_to_keep = [0, 1, 4, 7, 9]
mask = np.isin(y, labels_to_keep)
X_filtered = X[mask]
y_filtered = y[mask]

print(f"Filtered X shape: {X_filtered.shape}")
print(f"Filtered y shape: {y_filtered.shape}")
print(f"Unique labels in filtered data: {np.unique(y_filtered)}")

# Map labels to sequential integers
label_mapping = {original_label: new_label for new_label, original_label in enumerate(labels_to_keep)}
y_mapped = np.array([label_mapping[label] for label in y_filtered], dtype=np.int64)
print(f"Mapped labels: {np.unique(y_mapped)}")

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_filtered, y_mapped, test_size=0.2, random_state=42, stratify=y_mapped)

print(f"Training data shape: {X_train.shape}, Testing data shape: {X_test.shape}")

# Define the Dataset class

# Prepare DataLoaders
batch_size = 32
train_dataset = ExerciseDataset(X_train, y_train)
test_dataset = ExerciseDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define Autoencoder Model
class PoseAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(PoseAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )

    def forward(self, x):
        batch_size, seq_length, input_dim = x.size()
        x = x.view(batch_size * seq_length, -1)  # Flatten for fully connected layers
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        decoded = decoded.view(batch_size, seq_length, input_dim)  # Reshape back
        return decoded

# Model parameters
sequence_length = 300  # Sequence length
input_size = 132       # Number of features per frame (33 landmarks * 3)
hidden_size = 256      # Hidden layer size
latent_size = 64       # Latent space size
epochs = 100




Loaded X shape: (761, 300, 132)
Loaded y shape: (761, 1)
Loaded X shape: (761, 300, 132)
Loaded y shape: (761,)
Filtered X shape: (227, 300, 132)
Filtered y shape: (227,)
Unique labels in filtered data: [0 1 4 7 9]
Mapped labels: [0 1 2 3 4]
Training data shape: (181, 300, 132), Testing data shape: (46, 300, 132)


In [9]:
import csv
import itertools
import torch.nn as nn
from torch.utils.data import DataLoader

# Assuming ExerciseDataset, PoseAutoencoder, and datasets (train_dataset, test_dataset) are already defined

# CSV file to store hyperparameter tuning results
csv_file = "autoencoder_hyperparameter_tuning_live.csv"

# Define header for the CSV file
header = [
    "hidden_size", "latent_size", "learning_rate", "batch_size",
    "test_loss", "epoch_losses"
]

# Initialize the CSV file if it doesn't exist
try:
    with open(csv_file, mode='x', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writeheader()
except FileExistsError:
    pass  # File already exists, so we append results

# Function to append results to CSV
def append_result_to_csv(result):
    with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writerow(result)

# Function to check if a configuration is already in the CSV
def config_exists_in_csv(hidden_size, latent_size, lr, batch_size):
    with open(csv_file, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            if (
                int(row["hidden_size"]) == hidden_size
                and int(row["latent_size"]) == latent_size
                and float(row["learning_rate"]) == lr
                and int(row["batch_size"]) == batch_size
            ):
                return True
    return False

# Hyperparameter tuning loop
hidden_sizes = [128, 256, 512, 1024, 2048]
latent_sizes = [32, 64, 128]
learning_rates = [0.001, 0.0005, 0.0001]
batch_sizes = [32, 64]

param_combinations = list(itertools.product(hidden_sizes, latent_sizes, learning_rates, batch_sizes))

# GPU device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Training function
def train_autoencoder_with_losses(hidden_size, latent_size, lr, batch_size):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    autoencoder = PoseAutoencoder(input_size, hidden_size, latent_size).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=lr)
    epochs = 300
    epoch_losses = []

    autoencoder.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for batch in train_loader:
            sequences, _ = batch
            sequences = sequences.to(device, non_blocking=True)
            optimizer.zero_grad()
            reconstruction = autoencoder(sequences)
            loss = criterion(reconstruction, sequences)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        epoch_losses.append(epoch_loss / len(train_loader))  # Average loss for this epoch

    autoencoder.eval()
    test_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            sequences, _ = batch
            sequences = sequences.to(device, non_blocking=True)
            reconstruction = autoencoder(sequences)
            loss = criterion(reconstruction, sequences)
            test_loss += loss.item()
    test_loss /= len(test_loader)

    return test_loss, epoch_losses

# Main hyperparameter tuning loop
for hidden_size, latent_size, lr, batch_size in param_combinations:
    if config_exists_in_csv(hidden_size, latent_size, lr, batch_size):
        print(f"Skipping already completed config: Hidden Size={hidden_size}, Latent Size={latent_size}, LR={lr}, Batch Size={batch_size}")
        continue

    print(f"Training with Hidden Size: {hidden_size}, Latent Size: {latent_size}, Learning Rate: {lr}, Batch Size: {batch_size}")
    test_loss, epoch_losses = train_autoencoder_with_losses(hidden_size, latent_size, lr, batch_size)
    print(f"Test Loss: {test_loss}")

    # Save the result to CSV
    result = {
        "hidden_size": hidden_size,
        "latent_size": latent_size,
        "learning_rate": lr,
        "batch_size": batch_size,
        "test_loss": test_loss,
        "epoch_losses": epoch_losses,
    }
    append_result_to_csv(result)

print(f"Results saved incrementally to {csv_file}")


Skipping already completed config: Hidden Size=128, Latent Size=32, LR=0.001, Batch Size=32
Skipping already completed config: Hidden Size=128, Latent Size=32, LR=0.001, Batch Size=64
Skipping already completed config: Hidden Size=128, Latent Size=32, LR=0.0005, Batch Size=32
Skipping already completed config: Hidden Size=128, Latent Size=32, LR=0.0005, Batch Size=64
Skipping already completed config: Hidden Size=128, Latent Size=32, LR=0.0001, Batch Size=32
Skipping already completed config: Hidden Size=128, Latent Size=32, LR=0.0001, Batch Size=64
Skipping already completed config: Hidden Size=128, Latent Size=64, LR=0.001, Batch Size=32
Skipping already completed config: Hidden Size=128, Latent Size=64, LR=0.001, Batch Size=64
Skipping already completed config: Hidden Size=128, Latent Size=64, LR=0.0005, Batch Size=32
Skipping already completed config: Hidden Size=128, Latent Size=64, LR=0.0005, Batch Size=64
Skipping already completed config: Hidden Size=128, Latent Size=64, LR=0.0