In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
def load_prepared_data(data_dir='/Users/akashmurali/Documents/capstone/project/capture24/preprocessed', schema='WillettsSpecific2018'):
    print(f"Loading prepared data from: {data_dir}")
    
    X = np.load(os.path.join(data_dir, 'X.npy'))
    Y = np.load(os.path.join(data_dir, f'Y_{schema}.npy'), allow_pickle=True)
    T = np.load(os.path.join(data_dir, 'T.npy'), allow_pickle=True)
    P = np.load(os.path.join(data_dir, 'P.npy'), allow_pickle=True)
    
    print(f"\nLoaded data:")
    print(f"  X shape: {X.shape}")
    print(f"  Y shape: {Y.shape}")
    print(f"  Number of participants: {len(np.unique(P))}")
    
    return X, Y, T, P


# Usage
X, Y, T, P = load_prepared_data(schema='WillettsSpecific2018')

Loading prepared data from: /Users/akashmurali/Documents/capstone/project/capture24/preprocessed

Loaded data:
  X shape: (934762, 1000, 3)
  Y shape: (934762,)
  Number of participants: 151


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from tqdm import tqdm
from sklearn.metrics import classification_report, f1_score

# Dataset class
class ActivityDataset(Dataset):
    def __init__(self, X, Y, label_encoder=None):
        self.X = torch.FloatTensor(X)
        
        if label_encoder is None:
            unique_labels = np.unique(Y)
            self.label_encoder = {label: idx for idx, label in enumerate(unique_labels)}
        else:
            self.label_encoder = label_encoder
        
        self.Y = torch.LongTensor([self.label_encoder[label] for label in Y])
        self.idx_to_label = {idx: label for label, idx in self.label_encoder.items()}
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]


# Simple 1-layer LSTM
class SimpleLSTM(nn.Module):
    def __init__(self, input_size=3, hidden_size=256, num_classes=10, dropout=0.3):
        super(SimpleLSTM, self).__init__()
        
        # Single LSTM layer
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=1,
            batch_first=True,
            dropout=0  # No dropout for single layer
        )
        
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # x: (batch, 1000, 3)
        lstm_out, (h_n, c_n) = self.lstm(x)
        
        # Use last hidden state
        out = self.dropout(h_n[-1])  # (batch, hidden_size)
        out = self.fc(out)  # (batch, num_classes)
        
        return out


# Training function
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for inputs, labels in tqdm(train_loader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    return total_loss / len(train_loader), 100 * correct / total


# Validation function
def validate(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc="Validation"):
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    f1 = f1_score(all_labels, all_preds, average='macro')
    
    return total_loss / len(val_loader), 100 * correct / total, f1


# Main training function
def train_lstm(X, Y, P, n_participants=100):
    """
    Train LSTM on N participants
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Device: {device}")
    
    # Filter for N participants
    train_pids = [f'P{i:03d}' for i in range(1, n_participants + 1)]
    test_pids = [f'P{i:03d}' for i in range(101, 150)]  # P101-P110 for testing
    
    train_mask = np.isin(P, train_pids)
    test_mask = np.isin(P, test_pids)
    
    # Remove NaN labels
    valid_train = train_mask & (Y != 'nan') & (~pd.isna(Y))
    valid_test = test_mask & (Y != 'nan') & (~pd.isna(Y))
    
    X_train = X[valid_train]
    Y_train = Y[valid_train]
    X_test = X[valid_test]
    Y_test = Y[valid_test]
    
    print(f"\nData split:")
    print(f"  Train: {len(X_train):,} windows from {n_participants} participants")
    print(f"  Test: {len(X_test):,} windows from 10 participants")
    
    # Create datasets
    train_dataset = ActivityDataset(X_train, Y_train)
    test_dataset = ActivityDataset(X_test, Y_test, label_encoder=train_dataset.label_encoder)
    
    # Train/val split
    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(
        train_dataset, [train_size, val_size],
        generator=torch.Generator().manual_seed(42)
    )
    
    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    
    # Model
    num_classes = len(train_dataset.dataset.label_encoder)
    model = SimpleLSTM(input_size=3, hidden_size=256, num_classes=num_classes)
    model = model.to(device)
    
    print(f"\nModel: {sum(p.numel() for p in model.parameters()):,} parameters")
    
    # Training setup
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0005)
    
    # Training loop
    best_val_acc = 0
    epochs = 50
    
    print("\n" + "="*70)
    print("Training LSTM")
    print("="*70)
    
    for epoch in range(1, epochs + 1):
        print(f"\nEpoch {epoch}/{epochs}")
        
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc, val_f1 = validate(model, val_loader, criterion, device)
        
        print(f"Train: Loss={train_loss:.4f}, Acc={train_acc:.2f}%")
        print(f"Val: Loss={val_loss:.4f}, Acc={val_acc:.2f}%, F1={val_f1:.4f}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), f'lstm_{n_participants}p.pth')
            print(f"✓ Saved best model")
    
    # Final test
    print("\n" + "="*70)
    print("Test Evaluation")
    print("="*70)
    
    model.load_state_dict(torch.load(f'lstm_{n_participants}p.pth'))
    test_loss, test_acc, test_f1 = validate(model, test_loader, criterion, device)
    
    print(f"\nTest Results:")
    print(f"  Accuracy: {test_acc:.2f}%")
    print(f"  F1-Score: {test_f1:.4f}")
    
    return model, test_acc, test_f1


# Run training
import pandas as pd

model, test_acc, test_f1 = train_lstm(X, Y, P, n_participants=100)

Device: cpu

Data split:
  Train: 63,445 windows from 10 participants
  Test: 61,592 windows from 10 participants

Model: 69,386 parameters

Training LSTM

Epoch 1/20


Training: 100%|██████████| 794/794 [05:17<00:00,  2.50it/s]
Validation: 100%|██████████| 199/199 [00:45<00:00,  4.41it/s]


Train: Loss=1.4716, Acc=47.13%
Val: Loss=1.5263, Acc=39.88%, F1=0.0831
✓ Saved best model

Epoch 2/20


Training: 100%|██████████| 794/794 [05:09<00:00,  2.57it/s]
Validation: 100%|██████████| 199/199 [00:43<00:00,  4.52it/s]


Train: Loss=1.4170, Acc=49.64%
Val: Loss=1.4552, Acc=44.98%, F1=0.1118
✓ Saved best model

Epoch 3/20


Training: 100%|██████████| 794/794 [05:15<00:00,  2.52it/s]
Validation: 100%|██████████| 199/199 [00:39<00:00,  5.04it/s]


Train: Loss=1.3726, Acc=51.50%
Val: Loss=1.3533, Acc=52.49%, F1=0.1255
✓ Saved best model

Epoch 4/20


Training: 100%|██████████| 794/794 [05:09<00:00,  2.56it/s]
Validation: 100%|██████████| 199/199 [00:37<00:00,  5.30it/s]


Train: Loss=1.3558, Acc=52.21%
Val: Loss=1.3158, Acc=53.07%, F1=0.1229
✓ Saved best model

Epoch 5/20


Training: 100%|██████████| 794/794 [05:16<00:00,  2.51it/s]
Validation: 100%|██████████| 199/199 [00:36<00:00,  5.38it/s]


Train: Loss=1.3957, Acc=50.75%
Val: Loss=1.5260, Acc=40.30%, F1=0.1254

Epoch 6/20


Training: 100%|██████████| 794/794 [05:18<00:00,  2.49it/s]
Validation: 100%|██████████| 199/199 [00:40<00:00,  4.92it/s]


Train: Loss=1.4201, Acc=49.96%
Val: Loss=1.4059, Acc=51.07%, F1=0.1178

Epoch 7/20


Training: 100%|██████████| 794/794 [05:17<00:00,  2.50it/s]
Validation: 100%|██████████| 199/199 [00:37<00:00,  5.37it/s]


Train: Loss=1.3820, Acc=51.35%
Val: Loss=1.3210, Acc=53.27%, F1=0.1236
✓ Saved best model

Epoch 8/20


Training: 100%|██████████| 794/794 [05:22<00:00,  2.47it/s]
Validation: 100%|██████████| 199/199 [00:31<00:00,  6.30it/s]


Train: Loss=1.3538, Acc=52.20%
Val: Loss=1.3434, Acc=52.93%, F1=0.1227

Epoch 9/20


Training: 100%|██████████| 794/794 [05:10<00:00,  2.56it/s]
Validation: 100%|██████████| 199/199 [00:37<00:00,  5.30it/s]


Train: Loss=1.3682, Acc=52.32%
Val: Loss=1.3582, Acc=54.00%, F1=0.1471
✓ Saved best model

Epoch 10/20


Training: 100%|██████████| 794/794 [05:11<00:00,  2.55it/s]
Validation: 100%|██████████| 199/199 [00:38<00:00,  5.21it/s]


Train: Loss=1.4102, Acc=50.33%
Val: Loss=1.3854, Acc=51.87%, F1=0.1212

Epoch 11/20


Training: 100%|██████████| 794/794 [05:08<00:00,  2.57it/s]
Validation: 100%|██████████| 199/199 [00:41<00:00,  4.80it/s]


Train: Loss=1.3619, Acc=52.00%
Val: Loss=1.3185, Acc=53.87%, F1=0.1303

Epoch 12/20


Training: 100%|██████████| 794/794 [05:16<00:00,  2.51it/s]
Validation: 100%|██████████| 199/199 [00:37<00:00,  5.36it/s]


Train: Loss=1.3575, Acc=52.42%
Val: Loss=1.3523, Acc=54.92%, F1=0.1352
✓ Saved best model

Epoch 13/20


Training: 100%|██████████| 794/794 [05:10<00:00,  2.55it/s]
Validation: 100%|██████████| 199/199 [00:36<00:00,  5.51it/s]


Train: Loss=1.3599, Acc=52.31%
Val: Loss=1.3486, Acc=51.90%, F1=0.1203

Epoch 14/20


Training: 100%|██████████| 794/794 [05:10<00:00,  2.55it/s]
Validation: 100%|██████████| 199/199 [00:35<00:00,  5.57it/s]


Train: Loss=1.3345, Acc=51.90%
Val: Loss=1.2497, Acc=56.62%, F1=0.1763
✓ Saved best model

Epoch 15/20


Training: 100%|██████████| 794/794 [05:11<00:00,  2.55it/s]
Validation: 100%|██████████| 199/199 [00:42<00:00,  4.67it/s]


Train: Loss=1.2583, Acc=54.71%
Val: Loss=1.2174, Acc=55.37%, F1=0.1871

Epoch 16/20


Training: 100%|██████████| 794/794 [05:14<00:00,  2.52it/s]
Validation: 100%|██████████| 199/199 [00:40<00:00,  4.95it/s]


Train: Loss=1.1995, Acc=56.75%
Val: Loss=1.1307, Acc=58.55%, F1=0.2217
✓ Saved best model

Epoch 17/20


Training: 100%|██████████| 794/794 [05:13<00:00,  2.53it/s]
Validation: 100%|██████████| 199/199 [00:40<00:00,  4.92it/s]


Train: Loss=1.1529, Acc=57.05%
Val: Loss=1.1448, Acc=57.34%, F1=0.2193

Epoch 18/20


Training: 100%|██████████| 794/794 [05:16<00:00,  2.51it/s]
Validation: 100%|██████████| 199/199 [00:38<00:00,  5.10it/s]


Train: Loss=1.0897, Acc=60.38%
Val: Loss=1.0839, Acc=60.42%, F1=0.2435
✓ Saved best model

Epoch 19/20


Training: 100%|██████████| 794/794 [07:21<00:00,  1.80it/s]
Validation: 100%|██████████| 199/199 [01:08<00:00,  2.92it/s]


Train: Loss=1.0389, Acc=62.29%
Val: Loss=0.9787, Acc=64.61%, F1=0.3104
✓ Saved best model

Epoch 20/20


Training: 100%|██████████| 794/794 [08:04<00:00,  1.64it/s]
Validation: 100%|██████████| 199/199 [05:26<00:00,  1.64s/it]  


Train: Loss=0.9841, Acc=65.21%
Val: Loss=0.9720, Acc=64.94%, F1=0.3541
✓ Saved best model

Test Evaluation


Validation: 100%|██████████| 963/963 [03:37<00:00,  4.44it/s]



Test Results:
  Accuracy: 55.17%
  F1-Score: 0.2371
