In [None]:
# Initialize KFold
from sklearn.model_selection import KFold
from imblearn.over_sampling import SMOTE
from torch.utils.data import TensorDataset, DataLoader

kf = KFold(n_splits=5, shuffle=True, random_state=SEED)

# Generate fold indices
kf_splits = list(kf.split(unique_users))

fold_results = [] # List to store results for each fold

fold = 0
for train_users_fold, val_users_fold in kf_splits:
    print(f"Processing Fold {fold + 1}")

    # Get the actual user IDs for this fold
    train_user_ids = unique_users[train_users_fold]
    val_user_ids = unique_users[val_users_fold]

    # Create training and validation dataframes for the current fold
    df_fold_train = df[df['sample_index'].isin(train_user_ids)]
    df_fold_val = df[df['sample_index'].isin(val_users_fold)]

    # Build sequences for each fold
    X_train_fold, y_train_fold = build_sequences(df_fold_train, WINDOW_SIZE, STRIDE)
    X_val_fold, y_val_fold = build_sequences(df_fold_val, WINDOW_SIZE, STRIDE)

    # Flatten the sequences for SMOTE
    X_train_fold_flat = X_train_fold.reshape(X_train_fold.shape[0], -1)

    # Apply SMOTE
    smote = SMOTE(random_state=SEED)
    X_train_fold_resampled, y_train_fold_resampled = smote.fit_resample(X_train_fold_flat, y_train_fold)

    # Reshape the resampled data back to sequences
    X_train_fold_resampled = X_train_fold_resampled.reshape(X_train_fold_resampled.shape[0], WINDOW_SIZE, X_train_fold.shape[2])

    # Convert numpy arrays to PyTorch datasets
    train_ds_fold = TensorDataset(torch.from_numpy(X_train_fold_resampled), torch.from_numpy(y_train_fold_resampled))
    val_ds_fold   = TensorDataset(torch.from_numpy(X_val_fold), torch.from_numpy(y_val_fold))

    # Create data loaders
    train_loader_fold = make_loader(train_ds_fold, batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
    val_loader_fold   = make_loader(val_ds_fold, batch_size=BATCH_SIZE, shuffle=False, drop_last=False)

    input_shape = X_train_fold_resampled.shape
    num_classes = len(np.unique(y_train_fold_resampled))

    # Initialize a new model for each fold
    fold_model = RecurrentClassifier(
        input_size=input_shape[-1],
        hidden_size=128,
        num_layers=2,
        num_classes=num_classes,
        dropout_rate=0.1,
        bidirectional=False,
        rnn_type='GRU'
    ).to(device)

    # Set up optimizer and scaler for the current fold
    fold_optimizer = torch.optim.AdamW(fold_model.parameters(), lr=1e-3, weight_decay=0)
    fold_scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))
    fold_criterion = nn.CrossEntropyLoss(weight=WEIGHTS) # Use the defined criterion

    # Train model for the current fold
    print("  Training model for current fold...")
    trained_fold_model, fold_training_history = fit(
        model=fold_model,
        train_loader=train_loader_fold,
        val_loader=val_loader_fold,
        epochs=EPOCHS,
        criterion=fold_criterion,
        optimizer=fold_optimizer,
        scaler=fold_scaler,
        device=device,
        writer=None, # Disable TensorBoard logging for individual folds to avoid clutter
        verbose=50, # Print less frequently during fold training
        experiment_name=f"fold_{fold+1}_rnn", # Save models with fold number
        patience=PATIENCE
    )

    # Store the training history for this fold
    fold_results.append(fold_training_history)

    print(f"  Finished training for Fold {fold + 1}")
    print("-" * 30)


    fold += 1

# After the loop, you can aggregate the results from fold_results

Processing Fold 1
  Training model for current fold...
Training 500 epochs...


: 

: 

In [None]:
import numpy as np

# Aggregate results from all folds
all_val_loss = [history['val_loss'][-1] for history in fold_results]
all_val_f1 = [history['val_f1'][-1] for history in fold_results]

average_val_loss = np.mean(all_val_loss)
average_val_f1 = np.mean(all_val_f1)

print(f"Average Validation Loss across folds: {average_val_loss:.4f}")
print(f"Average Validation F1 Score across folds: {average_val_f1:.4f}")

Average Validation Loss across folds: 0.7079
Average Validation F1 Score across folds: 0.5624


In [None]:
def grid_search_cv(train_loader, val_loader, input_size, num_classes, device):
    learning_rates = [1e-3, 1e-4]
    hidden_sizes = [128, 256]
    hidden_layers = [1, 2]
    dropout_rates = [0.1, 0.2]
    l1_lambdas = [0, 1e-4]
    l2_lambdas = [0, 1e-4]

    best_f1 = 0
    best_params = {}

    # Set up loss function
    criterion = nn.CrossEntropyLoss(weight=WEIGHTS)

    total_combinations = (
      len(learning_rates) * len(hidden_sizes) * len(hidden_layers) *
      len(dropout_rates) * len(l2_lambdas) * len(l1_lambdas)
    )
    print(f"Starting Grid Search with {total_combinations} combinations...")
    i = 0
    for lr in learning_rates:
        for hs in hidden_sizes:
            for hl in hidden_layers:
                for dr in dropout_rates:
                  for l1 in l1_lambdas:
                    for l2 in l2_lambdas:
                      print(f"\nCombination no.{i+1}/{total_combinations}")
                      print(f"\nTraining with params: LR={lr}, HS={hs}, HL={hl}, DR={dr}, L1={l1}, L2={l2}")

                      # Initialize a new model for each combination
                      model = RecurrentClassifier(
                          input_size=input_size,
                          hidden_size=hs,
                          num_layers=hl,
                          num_classes=num_classes,
                          dropout_rate=dr,
                          bidirectional=False, # Keep consistent for now
                          rnn_type='GRU'       # Keep consistent for now
                      ).to(device)

                      # Set up optimizer and scaler for the current fold
                      optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=l2)
                      scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))

                      # Train model
                      # Using a reduced number of epochs and patience for grid search speed
                      trained_model, training_history = fit(
                          model=model,
                          train_loader=train_loader,
                          val_loader=val_loader,
                          epochs=150,
                          criterion=criterion,
                          optimizer=optimizer,
                          scaler=scaler,
                          device=device,
                          writer=None,
                          verbose=10,
                          experiment_name=f"grid_search_lr{lr}_hs{hs}_hl_{hl}_dr{dr}_l1{l1}_l2{l2}",
                          patience=40,
                          l1_lambda=l1,
                          l2_lambda=l2
                      )

                      # Evaluate on validation set
                      val_loss, val_f1 = validate_one_epoch(
                          trained_model, val_loader, criterion, device
                      )

                      print(f"  Validation F1 Score: {val_f1:.4f}")
                      i += 1
                      # Check if current model is the best
                      if val_f1 > best_f1:
                          best_f1 = val_f1
                          best_params = {
                            'learning_rate': lr,
                            'hidden_size': hs,
                            'num_layers': hl,
                            'dropout_rate': dr,
                            'l1_lambda': l1,
                            'l2_lambda': l2
                          }
                          print(f"  New best F1 found: {best_f1:.4f} with params {best_params}")

    print("\nGrid Search Finished.")
    print(f"Best Validation F1 Score: {best_f1:.4f}")
    print(f"Best Parameters: {best_params}")

    return best_params

# Assuming train_loader, val_loader, input_shape, num_classes, and device are defined
# Call the grid search function
best_hyperparameters = grid_search_cv(
    train_loader,
    val_loader,
    input_shape[-1], # Input size is the last dimension of input_shape
    num_classes,
    device
)

print("\nBest hyperparameters found by grid search:")
print(best_hyperparameters)

In [None]:
# Load the test data
test_df = pd.read_csv("pirate_pain_test.csv")

# üîß FIX: Load the label encoders fitted on TRAINING data
import pickle

with open('label_encoders.pkl', 'rb') as f:
    label_encoders = pickle.load(f)

# Apply the SAME categorical encoding learned from training data
for col in number_cols:
    test_df[col] = label_encoders[col].transform(test_df[col]).astype(np.float32)  # Ensure float32

print("‚úÖ Categorical features encoded using training data mappings")

# List of joint columns to normalize
joint_cols = ["joint_" + str(i).zfill(2) for i in range(31)]

for col in joint_cols:
  test_df[col] = test_df[col].astype(np.float32)

# üîß FIX: Load the scaler fitted on TRAINING data
# DO NOT call fit_transform() on test data - this causes data leakage!
import pickle

with open('minmax_scaler.pkl', 'rb') as f:
    minmax_scaler = pickle.load(f)

# Apply the SAME normalization learned from training data
test_df[joint_cols] = minmax_scaler.transform(test_df[joint_cols])  # ‚úÖ transform only!

print("‚úÖ Test data normalized using training data statistics")
print(f"Using scaler with Min: {minmax_scaler.data_min_[:5]}")
print(f"Using scaler with Max: {minmax_scaler.data_max_[:5]}")

X_test = build_test_sequences(test_df)

# üîß FIX: Ensure the data type is float32 (not float64)
X_test = X_test.astype(np.float32)

test_ds = TensorDataset(torch.from_numpy(X_test))
test_loader = make_loader(test_ds, BATCH_SIZE, True, False)

print(f"‚úÖ Test sequences created with shape: {X_test.shape}, dtype: {X_test.dtype}")

ValueError: invalid literal for int() with base 10: 'two'

In [None]:
# üîç Verify data types are correct
print("Data type verification:")
print(f"test_df categorical columns dtype: {test_df[number_cols].dtypes.unique()}")
print(f"test_df joint columns dtype: {test_df[joint_cols].dtypes.unique()}")
print(f"X_test dtype: {X_test.dtype}")

# Check a sample tensor from the test loader
sample_batch = next(iter(test_loader))
print(f"Test loader batch dtype: {sample_batch[0].dtype}")
print("\n‚úÖ All dtypes should be float32 for model compatibility")

In [None]:
best_model.eval()
predictions = []
with torch.no_grad():
    for xb in test_loader:
        xb = xb[0].to(device)
        outputs = best_model(xb)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().numpy())

In [None]:
from datetime import datetime

#Re-map prediction indexes to the labels
predicted_pains = []
for pred in predictions:
  if pred == 0:
    predicted_pains.append("no_pain")
  elif pred == 1:
    predicted_pains.append("low_pain")
  else:
    predicted_pains.append("high_pain")

predictions_csv = pd.DataFrame({'sample_index': np.arange(len(test_ds)), 'label': predicted_pains})
today_date = 'predictions_' + datetime.now().strftime("%Y%m%d_%H%M") + '.csv'
predictions_csv.to_csv(today_date, index=False)