In [None]:
# -*- coding: utf-8 -*-
"""unified_transition_transformer.py
Unified Model Comparison with Transformer Encoder Backbone
Compare three models on 41 pre-augmented transition datasets
(1 original + 40 transition datasets)
"""

from google.colab import drive
drive.mount('/content/drive')

import os, random, time, copy, json
import numpy as np
from typing import Tuple, Dict, List
from dataclasses import dataclass

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# ========================
# Config & Reproducibility
# ========================
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

@dataclass
class Config:
    data_dir: str = "/content/drive/MyDrive/AI_data/TPA2/wisdm_transition_datasets"
    save_dir: str = "/content/drive/MyDrive/AI_data/TPA2"

    epochs: int = 100
    batch_size: int = 128
    lr: float = 1e-4
    weight_decay: float = 1e-4
    grad_clip: float = 1.0
    label_smoothing: float = 0.05

    patience: int = 20
    min_delta: float = 0.0001
    val_split: float = 0.2

    d_model: int = 128

    # Transformer hyperparameters
    num_layers: int = 2
    n_heads: int = 4
    ff_dim: int = 256
    dropout: float = 0.1

    # TPA hyperparameters
    tpa_num_prototypes: int = 16
    tpa_heads: int = 4
    tpa_dropout: float = 0.1
    tpa_temperature: float = 0.07
    tpa_topk_ratio: float = 0.25

    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    num_workers: int = 2

cfg = Config()

# ========================
# Dataset Class
# ========================
class PreloadedDataset(Dataset):
    """Dataset for pre-loaded numpy arrays"""
    def __init__(self, X: np.ndarray, y: np.ndarray):
        super().__init__()
        self.X = torch.from_numpy(X).float()

        # Label 범위 확인 및 조정 (1-6 -> 0-5)
        if y.min() >= 1:
            y = y - 1

        self.y = torch.from_numpy(y).long()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ========================
# Data Loading Functions
# ========================
def load_dataset(base_dir: str, dataset_name: str):
    """
    Load pre-augmented dataset
    Args:
        base_dir: base directory containing all datasets
        dataset_name: e.g., "ORIGINAL", "STANDING_TO_SITTING_10pct", etc.
    Returns:
        train_dataset, test_dataset
    """
    dataset_dir = os.path.join(base_dir, dataset_name)

    print(f"\nLoading {dataset_name}...")
    print(f"  Path: {dataset_dir}")

    # Load data
    X_train = np.load(os.path.join(dataset_dir, "X_train.npy"))
    y_train = np.load(os.path.join(dataset_dir, "y_train.npy"))
    X_test = np.load(os.path.join(dataset_dir, "X_test.npy"))
    y_test = np.load(os.path.join(dataset_dir, "y_test.npy"))

    print(f"  Train: {X_train.shape}, Test: {X_test.shape}")

    train_dataset = PreloadedDataset(X_train, y_train)
    test_dataset = PreloadedDataset(X_test, y_test)

    return train_dataset, test_dataset

# ========================
# Transformer Backbone Components
# ========================
class PositionalEncoding(nn.Module):
    """Sinusoidal Positional Encoding"""
    def __init__(self, d_model: int, max_len: int = 5000, dropout: float = 0.1):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        # Create positional encoding matrix
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)  # [1, max_len, d_model]
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x: [B, T, D]
        Returns:
            [B, T, D]
        """
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

class TransformerBackbone(nn.Module):
    """
    Lightweight Transformer Encoder Backbone
    - 2 layers
    - d_model=128
    - n_heads=4
    - ff_dim=256
    - Dropout=0.1
    """
    def __init__(self,
                 in_channels: int = 3,
                 d_model: int = 128,
                 num_layers: int = 2,
                 n_heads: int = 4,
                 ff_dim: int = 256,
                 dropout: float = 0.1,
                 max_seq_len: int = 200):
        super().__init__()

        self.d_model = d_model

        # Input projection: [B, C, T] -> [B, T, D]
        self.input_projection = nn.Linear(in_channels, d_model)

        # Positional encoding
        self.pos_encoder = PositionalEncoding(d_model, max_seq_len, dropout)

        # Transformer Encoder layers
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=ff_dim,
            dropout=dropout,
            activation='gelu',
            batch_first=True,
            norm_first=True  # Pre-LN for better stability
        )

        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers
        )

        # Output normalization
        self.norm = nn.LayerNorm(d_model)

    def forward(self, x):
        """
        Args:
            x: [B, C, T] - input sensor data
        Returns:
            [B, T, D] - transformed sequence
        """
        # [B, C, T] -> [B, T, C]
        # x = x.transpose(1, 2)

        # Project to d_model: [B, T, C] -> [B, T, D]
        x = self.input_projection(x)

        # Add positional encoding: [B, T, D]
        x = self.pos_encoder(x)

        # Transformer encoding: [B, T, D]
        x = self.transformer_encoder(x)

        # Final normalization: [B, T, D]
        x = self.norm(x)

        return x

# ========================
# GAP Model with Transformer
# ========================
class GAPModel(nn.Module):
    """Baseline: Global Average Pooling with Transformer Backbone"""
    def __init__(self,
                 in_channels: int = 3,
                 d_model: int = 128,
                 num_layers: int = 2,
                 n_heads: int = 4,
                 ff_dim: int = 256,
                 dropout: float = 0.1,
                 num_classes: int = 6):
        super().__init__()
        self.backbone = TransformerBackbone(
            in_channels=in_channels,
            d_model=d_model,
            num_layers=num_layers,
            n_heads=n_heads,
            ff_dim=ff_dim,
            dropout=dropout
        )
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        features = self.backbone(x)  # [B, T, D]
        pooled = features.mean(dim=1)  # [B, D]
        logits = self.fc(pooled)
        return logits

# ========================
# Pure-TPA with Transformer
# ========================
class ProductionTPA(nn.Module):
    """Pure TPA"""
    def __init__(self, dim, num_prototypes=16, heads=4, dropout=0.1,
                 temperature=0.07, topk_ratio=0.25):
        super().__init__()
        assert dim % heads == 0

        self.dim = dim
        self.heads = heads
        self.head_dim = dim // heads
        self.num_prototypes = num_prototypes
        self.temperature = temperature
        self.topk_ratio = topk_ratio

        self.proto = nn.Parameter(torch.randn(num_prototypes, dim) * 0.02)

        self.pre_norm = nn.LayerNorm(dim)

        self.q_proj = nn.Linear(dim, dim, bias=False)
        self.k_proj = nn.Linear(dim, dim, bias=False)
        self.v_proj = nn.Linear(dim, dim, bias=False)

        self.fuse = nn.Sequential(
            nn.Linear(dim, dim),
            nn.SiLU(),
            nn.Dropout(dropout),
            nn.Linear(dim, dim)
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, T, D = x.shape
        P = self.num_prototypes

        x_norm = self.pre_norm(x)

        K = self.k_proj(x_norm)
        V = self.v_proj(x_norm)
        Qp = self.q_proj(self.proto).unsqueeze(0).expand(B, -1, -1)

        def split_heads(t, length):
            return t.view(B, length, self.heads, self.head_dim).transpose(1, 2)

        Qh = split_heads(Qp, P)
        Kh = split_heads(K, T)
        Vh = split_heads(V, T)

        Qh = F.normalize(Qh, dim=-1)
        Kh = F.normalize(Kh, dim=-1)

        scores = torch.matmul(Qh, Kh.transpose(-2, -1)) / self.temperature
        attn = F.softmax(scores, dim=-1)
        attn = torch.nan_to_num(attn, nan=0.0)
        attn = self.dropout(attn)

        proto_tokens = torch.matmul(attn, Vh)
        proto_tokens = proto_tokens.transpose(1, 2).contiguous().view(B, P, D)

        z_tpa = proto_tokens.mean(dim=1)

        z = self.fuse(z_tpa)

        return z

class TPAModel(nn.Module):
    def __init__(self,
                 in_channels: int = 3,
                 d_model: int = 128,
                 num_layers: int = 2,
                 n_heads: int = 4,
                 ff_dim: int = 256,
                 dropout: float = 0.1,
                 num_classes: int = 6,
                 tpa_config=None):
        super().__init__()
        self.backbone = TransformerBackbone(
            in_channels=in_channels,
            d_model=d_model,
            num_layers=num_layers,
            n_heads=n_heads,
            ff_dim=ff_dim,
            dropout=dropout
        )
        self.tpa = ProductionTPA(
            dim=d_model,
            num_prototypes=tpa_config['num_prototypes'],
            heads=tpa_config['heads'],
            dropout=tpa_config['dropout'],
            temperature=tpa_config['temperature'],
            topk_ratio=tpa_config['topk_ratio']
        )
        self.classifier = nn.Linear(d_model, num_classes)

    def forward(self, x):
        features = self.backbone(x)  # [B, T, D]
        z = self.tpa(features)  # [B, D]
        logits = self.classifier(z)
        return logits

# ========================
# Gated-TPA with Transformer
# ========================
class GatedTPAModel(nn.Module):
    """Gated-TPA: Hybrid of TPA and GAP"""
    def __init__(self,
                 in_channels: int = 3,
                 d_model: int = 128,
                 num_layers: int = 2,
                 n_heads: int = 4,
                 ff_dim: int = 256,
                 dropout: float = 0.1,
                 num_classes: int = 6,
                 tpa_config=None):
        super().__init__()
        self.backbone = TransformerBackbone(
            in_channels=in_channels,
            d_model=d_model,
            num_layers=num_layers,
            n_heads=n_heads,
            ff_dim=ff_dim,
            dropout=dropout
        )
        self.tpa = ProductionTPA(
            dim=d_model,
            num_prototypes=tpa_config['num_prototypes'],
            heads=tpa_config['heads'],
            dropout=tpa_config['dropout'],
            temperature=tpa_config['temperature'],
            topk_ratio=tpa_config['topk_ratio']
        )

        # Gating mechanism
        self.gate = nn.Sequential(
            nn.Linear(d_model * 2, d_model),
            nn.Sigmoid()
        )

        self.classifier = nn.Linear(d_model, num_classes)

    def forward(self, x):
        features = self.backbone(x)  # [B, T, D]

        # TPA path
        z_tpa = self.tpa(features)  # [B, D]

        # GAP path
        z_gap = features.mean(dim=1)  # [B, D]

        # Gating
        gate_input = torch.cat([z_tpa, z_gap], dim=-1)  # [B, 2D]
        alpha = self.gate(gate_input)  # [B, D]

        # Combine
        z = alpha * z_tpa + (1 - alpha) * z_gap  # [B, D]

        logits = self.classifier(z)
        return logits

# ========================
# Training & Evaluation
# ========================
def train_one_epoch(model, loader, opt, cfg: Config):
    model.train()
    loss_sum = 0
    correct, total = 0, 0
    criterion = nn.CrossEntropyLoss(label_smoothing=cfg.label_smoothing)

    for x, y in loader:
        x, y = x.to(cfg.device), y.to(cfg.device)

        opt.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)

        if torch.isnan(loss) or torch.isinf(loss):
            print("[Warning] NaN/Inf loss detected, skipping batch")
            continue

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), cfg.grad_clip)
        opt.step()

        with torch.no_grad():
            pred = logits.argmax(dim=-1)
            correct += (pred == y).sum().item()
            total += y.size(0)
            loss_sum += loss.item() * y.size(0)

    return {
        "loss": loss_sum / total if total > 0 else 0,
        "acc": correct / total if total > 0 else 0
    }

@torch.no_grad()
def evaluate(model, loader, cfg: Config):
    model.eval()
    ys, ps = [], []

    for x, y in loader:
        x, y = x.to(cfg.device), y.to(cfg.device)
        logits = model(x)
        ps.append(logits.argmax(dim=-1).cpu().numpy())
        ys.append(y.cpu().numpy())

    y_true, y_pred = np.concatenate(ys), np.concatenate(ps)
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro')

    return acc, f1

def train_model(model, train_loader, val_loader, cfg: Config, model_name: str):
    """Train a single model"""
    print(f"\n[Training {model_name}]")

    opt = torch.optim.AdamW(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)

    best_acc, best_wts = 0.0, None
    patience_counter = 0

    for epoch in range(1, cfg.epochs + 1):
        stats = train_one_epoch(model, train_loader, opt, cfg)
        val_acc, val_f1 = evaluate(model, val_loader, cfg)

        if val_acc > best_acc + cfg.min_delta:
            best_acc = val_acc
            best_wts = copy.deepcopy(model.state_dict())
            patience_counter = 0
        else:
            patience_counter += 1

        if epoch % 10 == 0:
            print(f"  Epoch {epoch:3d}: Train Acc={stats['acc']:.4f}, Val Acc={val_acc:.4f}, F1={val_f1:.4f}")

        if patience_counter >= cfg.patience:
            print(f"  Early stopping at epoch {epoch}")
            break

    if best_wts:
        model.load_state_dict(best_wts)

    print(f"  Best Val Acc: {best_acc:.4f}")
    return best_acc

def create_model(model_name: str, cfg: Config):
    """Create model by name"""
    tpa_config = {
        'num_prototypes': cfg.tpa_num_prototypes,
        'heads': cfg.tpa_heads,
        'dropout': cfg.tpa_dropout,
        'temperature': cfg.tpa_temperature,
        'topk_ratio': cfg.tpa_topk_ratio
    }

    if model_name == "GAP":
        return GAPModel(
            d_model=cfg.d_model,
            num_layers=cfg.num_layers,
            n_heads=cfg.n_heads,
            ff_dim=cfg.ff_dim,
            dropout=cfg.dropout
        ).to(cfg.device).float()
    elif model_name == "TPA":
        return TPAModel(
            d_model=cfg.d_model,
            num_layers=cfg.num_layers,
            n_heads=cfg.n_heads,
            ff_dim=cfg.ff_dim,
            dropout=cfg.dropout,
            tpa_config=tpa_config
        ).to(cfg.device).float()
    elif model_name == "Gated-TPA":
        return GatedTPAModel(
            d_model=cfg.d_model,
            num_layers=cfg.num_layers,
            n_heads=cfg.n_heads,
            ff_dim=cfg.ff_dim,
            dropout=cfg.dropout,
            tpa_config=tpa_config
        ).to(cfg.device).float()
    else:
        raise ValueError(f"Unknown model: {model_name}")

# ========================
# Main Experiment
# ========================
def run_experiment(dataset_name: str, cfg: Config):
    """Run complete experiment for one dataset"""

    print(f"\n{'='*80}")
    print(f"EXPERIMENT: {dataset_name}")
    print(f"{'='*80}")

    # Load data
    train_dataset, test_dataset = load_dataset(cfg.data_dir, dataset_name)

    # Split train into train/val using indices
    n_total = len(train_dataset)
    indices = np.arange(n_total)

    # Get labels for stratification
    y_labels = train_dataset.y.numpy()

    train_indices, val_indices = train_test_split(
        indices,
        test_size=cfg.val_split,
        random_state=SEED,
        stratify=y_labels
    )

    # Create subsets using Subset
    from torch.utils.data import Subset
    train_subset = Subset(train_dataset, train_indices)
    val_subset = Subset(train_dataset, val_indices)

    # Create data loaders
    g = torch.Generator(device='cpu').manual_seed(SEED)
    train_loader = DataLoader(train_subset, cfg.batch_size, shuffle=True,
                              num_workers=cfg.num_workers, generator=g)
    val_loader = DataLoader(val_subset, cfg.batch_size, num_workers=cfg.num_workers)
    test_loader = DataLoader(test_dataset, cfg.batch_size, num_workers=cfg.num_workers)

    print(f"\nDataset splits:")
    print(f"  Train: {len(train_subset)}, Val: {len(val_subset)}, Test: {len(test_dataset)}")

    # Train and evaluate all models
    results = []
    model_names = ["GAP", "TPA", "Gated-TPA"]

    for model_name in model_names:
        # Reset seed for each model
        random.seed(SEED)
        np.random.seed(SEED)
        torch.manual_seed(SEED)

        # Create and train model
        model = create_model(model_name, cfg)
        best_val_acc = train_model(model, train_loader, val_loader, cfg, model_name)

        # Evaluate on test set
        test_acc, test_f1 = evaluate(model, test_loader, cfg)

        print(f"\n[{model_name} Results]")
        print(f"  Val Acc: {best_val_acc:.4f}")
        print(f"  Test Acc: {test_acc:.4f}, F1: {test_f1:.4f}")

        results.append({
            'Model': model_name,
            'Dataset': dataset_name,
            'Val_Accuracy': float(best_val_acc),
            'Test_Accuracy': float(test_acc),
            'Test_F1_Score': float(test_f1)
        })

    return results

# ========================
# Run All Experiments
# ========================
if __name__ == "__main__":
    print("\n" + "="*80)
    print("UNIFIED MODEL COMPARISON: GAP vs TPA vs Gated-TPA")
    print("WITH TRANSFORMER ENCODER BACKBONE")
    print("Testing on 41 Datasets (1 Original + 40 Transition)")
    print("="*80)

    datasets = ["SITTING_TO_STANDING_40pct"]

    transitions = [
        "STANDING_TO_WALKING",
        "WALKING_TO_STANDING",
        "WALKING_TO_JOGGING",
        "JOGGING_TO_WALKING",
        "WALKING_TO_UPSTAIRS",
        "WALKING_TO_DOWNSTAIRS",
        "UPSTAIRS_TO_WALKING",
        "DOWNSTAIRS_TO_WALKING"
    ]

    # 모든 전이에 대해 10%, 20%, 30%, 40% 추가
    mix_pcts = [10, 20, 30, 40]

    for transition in transitions:
        for pct in mix_pcts:
            datasets.append(f"{transition}_{pct}pct")

    print(f"\nTotal datasets to test: {len(datasets)}")
    print(f"  - transitions: {len(transitions) * len(mix_pcts) + 1}")

    all_results = []

    # Run experiments
    for i, dataset_name in enumerate(datasets, 1):
        print(f"\n[Progress: {i}/{len(datasets)}]")
        results = run_experiment(dataset_name, cfg)
        all_results.extend(results)

    # Save all results
    print(f"\n{'='*80}")
    print("SAVING RESULTS")
    print(f"{'='*80}")

    results_dict = {
        'experiment_info': {
            'date': time.strftime('%Y-%m-%d %H:%M:%S'),
            'models': ['GAP', 'TPA', 'Gated-TPA'],
            'backbone': 'Transformer Encoder',
            'total_datasets': len(datasets),
            'datasets': datasets,
            'config': {
                'epochs': cfg.epochs,
                'batch_size': cfg.batch_size,
                'lr': cfg.lr,
                'd_model': cfg.d_model,
                'num_layers': cfg.num_layers,
                'n_heads': cfg.n_heads,
                'ff_dim': cfg.ff_dim,
                'dropout': cfg.dropout,
                'tpa_num_prototypes': cfg.tpa_num_prototypes,
                'tpa_heads': cfg.tpa_heads
            }
        },
        'results': all_results
    }

    # Save to JSON
    json_path = os.path.join(cfg.save_dir, "unified_transformer_41datasets_results.json")
    with open(json_path, 'w') as f:
        json.dump(results_dict, f, indent=2)

    print(f"\nResults saved to: {json_path}")

    # Print summary
    print(f"\n{'='*80}")
    print("SUMMARY")
    print(f"{'='*80}")
    print(f"Total experiments: {len(all_results)}")
    print(f"Total datasets tested: {len(datasets)}")
    print(f"Models compared: 3 (GAP, TPA, Gated-TPA)")

    # Calculate average performance per model
    print(f"\n{'='*80}")
    print("AVERAGE PERFORMANCE (All Datasets)")
    print(f"{'='*80}")

    for model_name in ['GAP', 'TPA', 'Gated-TPA']:
        model_results = [r for r in all_results if r['Model'] == model_name]
        avg_acc = np.mean([r['Test_Accuracy'] for r in model_results])
        avg_f1 = np.mean([r['Test_F1_Score'] for r in model_results])
        print(f"{model_name:12s}: Acc={avg_acc:.4f}, F1={avg_f1:.4f}")

    print(f"\n{'='*80}")
    print("EXPERIMENT COMPLETE")
    print(f"{'='*80}")

Mounted at /content/drive

UNIFIED MODEL COMPARISON: GAP vs TPA vs Gated-TPA
WITH TRANSFORMER ENCODER BACKBONE
Testing on 41 Datasets (1 Original + 40 Transition)

Total datasets to test: 33
  - transitions: 33

[Progress: 1/33]

EXPERIMENT: SITTING_TO_STANDING_40pct

Loading SITTING_TO_STANDING_40pct...
  Path: /content/drive/MyDrive/AI_data/TPA2/wisdm_transition_datasets/SITTING_TO_STANDING_40pct
  Train: (24156, 200, 3), Test: (6040, 200, 3)

Dataset splits:
  Train: 19324, Val: 4832, Test: 6040





[Training GAP]
  Epoch  10: Train Acc=0.9063, Val Acc=0.8971, F1=0.8393
  Epoch  20: Train Acc=0.9486, Val Acc=0.9361, F1=0.9063
  Epoch  30: Train Acc=0.9635, Val Acc=0.9433, F1=0.9167
  Epoch  40: Train Acc=0.9716, Val Acc=0.9489, F1=0.9221
  Epoch  50: Train Acc=0.9777, Val Acc=0.9625, F1=0.9440
  Epoch  60: Train Acc=0.9803, Val Acc=0.9592, F1=0.9396
  Epoch  70: Train Acc=0.9835, Val Acc=0.9623, F1=0.9433
  Epoch  80: Train Acc=0.9865, Val Acc=0.9563, F1=0.9306
  Early stopping at epoch 87
  Best Val Acc: 0.9654

[GAP Results]
  Val Acc: 0.9654
  Test Acc: 0.9674, F1: 0.9490

[Training TPA]




  Epoch  10: Train Acc=0.9100, Val Acc=0.9120, F1=0.8632
  Epoch  20: Train Acc=0.9488, Val Acc=0.9371, F1=0.9032
  Epoch  30: Train Acc=0.9650, Val Acc=0.9557, F1=0.9319
  Epoch  40: Train Acc=0.9719, Val Acc=0.9545, F1=0.9293
  Epoch  50: Train Acc=0.9775, Val Acc=0.9572, F1=0.9343
  Epoch  60: Train Acc=0.9815, Val Acc=0.9656, F1=0.9499
  Epoch  70: Train Acc=0.9836, Val Acc=0.9526, F1=0.9275
  Epoch  80: Train Acc=0.9869, Val Acc=0.9638, F1=0.9448
  Early stopping at epoch 88
  Best Val Acc: 0.9706

[TPA Results]
  Val Acc: 0.9706
  Test Acc: 0.9732, F1: 0.9566

[Training Gated-TPA]




  Epoch  10: Train Acc=0.9124, Val Acc=0.9025, F1=0.8504
  Epoch  20: Train Acc=0.9446, Val Acc=0.9427, F1=0.9122
  Epoch  30: Train Acc=0.9623, Val Acc=0.9356, F1=0.9029
  Epoch  40: Train Acc=0.9732, Val Acc=0.9570, F1=0.9365
  Epoch  50: Train Acc=0.9777, Val Acc=0.9594, F1=0.9366
  Epoch  60: Train Acc=0.9829, Val Acc=0.9690, F1=0.9522
  Epoch  70: Train Acc=0.9872, Val Acc=0.9644, F1=0.9482
  Epoch  80: Train Acc=0.9881, Val Acc=0.9659, F1=0.9495
  Early stopping at epoch 85
  Best Val Acc: 0.9754

[Gated-TPA Results]
  Val Acc: 0.9754
  Test Acc: 0.9724, F1: 0.9557

[Progress: 2/33]

EXPERIMENT: STANDING_TO_WALKING_10pct

Loading STANDING_TO_WALKING_10pct...
  Path: /content/drive/MyDrive/AI_data/TPA2/wisdm_transition_datasets/STANDING_TO_WALKING_10pct
  Train: (24156, 200, 3), Test: (6040, 200, 3)

Dataset splits:
  Train: 19324, Val: 4832, Test: 6040

[Training GAP]




  Epoch  10: Train Acc=0.9108, Val Acc=0.9100, F1=0.8700
  Epoch  20: Train Acc=0.9502, Val Acc=0.9325, F1=0.9069
  Epoch  30: Train Acc=0.9645, Val Acc=0.9404, F1=0.9151
  Epoch  40: Train Acc=0.9712, Val Acc=0.9582, F1=0.9416
  Epoch  50: Train Acc=0.9754, Val Acc=0.9580, F1=0.9416
  Epoch  60: Train Acc=0.9810, Val Acc=0.9644, F1=0.9505
  Epoch  70: Train Acc=0.9825, Val Acc=0.9636, F1=0.9491
  Epoch  80: Train Acc=0.9869, Val Acc=0.9698, F1=0.9571
  Epoch  90: Train Acc=0.9892, Val Acc=0.9663, F1=0.9526
  Epoch 100: Train Acc=0.9903, Val Acc=0.9634, F1=0.9509
  Best Val Acc: 0.9708

[GAP Results]
  Val Acc: 0.9708
  Test Acc: 0.9697, F1: 0.9550

[Training TPA]




  Epoch  10: Train Acc=0.9178, Val Acc=0.9133, F1=0.8724
  Epoch  20: Train Acc=0.9532, Val Acc=0.9538, F1=0.9350
  Epoch  30: Train Acc=0.9676, Val Acc=0.9580, F1=0.9425
  Epoch  40: Train Acc=0.9750, Val Acc=0.9663, F1=0.9501
  Epoch  50: Train Acc=0.9818, Val Acc=0.9758, F1=0.9660
  Epoch  60: Train Acc=0.9840, Val Acc=0.9706, F1=0.9559
  Epoch  70: Train Acc=0.9864, Val Acc=0.9776, F1=0.9673
  Epoch  80: Train Acc=0.9880, Val Acc=0.9785, F1=0.9690
  Epoch  90: Train Acc=0.9899, Val Acc=0.9779, F1=0.9697
  Epoch 100: Train Acc=0.9923, Val Acc=0.9832, F1=0.9763
  Best Val Acc: 0.9834

[TPA Results]
  Val Acc: 0.9834
  Test Acc: 0.9763, F1: 0.9646

[Training Gated-TPA]




  Epoch  10: Train Acc=0.9125, Val Acc=0.9176, F1=0.8721
  Epoch  20: Train Acc=0.9492, Val Acc=0.9259, F1=0.8970
  Epoch  30: Train Acc=0.9621, Val Acc=0.9348, F1=0.9117
  Epoch  40: Train Acc=0.9726, Val Acc=0.9567, F1=0.9395
  Epoch  50: Train Acc=0.9792, Val Acc=0.9683, F1=0.9540
  Epoch  60: Train Acc=0.9831, Val Acc=0.9716, F1=0.9605
  Epoch  70: Train Acc=0.9853, Val Acc=0.9764, F1=0.9661
  Epoch  80: Train Acc=0.9871, Val Acc=0.9733, F1=0.9596
  Epoch  90: Train Acc=0.9901, Val Acc=0.9799, F1=0.9714
  Epoch 100: Train Acc=0.9917, Val Acc=0.9756, F1=0.9641
  Best Val Acc: 0.9805

[Gated-TPA Results]
  Val Acc: 0.9805
  Test Acc: 0.9763, F1: 0.9654

[Progress: 3/33]

EXPERIMENT: STANDING_TO_WALKING_20pct

Loading STANDING_TO_WALKING_20pct...
  Path: /content/drive/MyDrive/AI_data/TPA2/wisdm_transition_datasets/STANDING_TO_WALKING_20pct
  Train: (24156, 200, 3), Test: (6040, 200, 3)

Dataset splits:
  Train: 19324, Val: 4832, Test: 6040

[Training GAP]




  Epoch  10: Train Acc=0.9096, Val Acc=0.9062, F1=0.8668
  Epoch  20: Train Acc=0.9482, Val Acc=0.9272, F1=0.8980
  Epoch  30: Train Acc=0.9619, Val Acc=0.9427, F1=0.9172
  Epoch  40: Train Acc=0.9712, Val Acc=0.9507, F1=0.9286
  Epoch  50: Train Acc=0.9757, Val Acc=0.9553, F1=0.9346
  Epoch  60: Train Acc=0.9812, Val Acc=0.9609, F1=0.9421
  Epoch  70: Train Acc=0.9837, Val Acc=0.9607, F1=0.9416
  Epoch  80: Train Acc=0.9843, Val Acc=0.9615, F1=0.9449
  Epoch  90: Train Acc=0.9869, Val Acc=0.9656, F1=0.9512
  Epoch 100: Train Acc=0.9895, Val Acc=0.9692, F1=0.9559
  Best Val Acc: 0.9692

[GAP Results]
  Val Acc: 0.9692
  Test Acc: 0.9672, F1: 0.9542

[Training TPA]




  Epoch  10: Train Acc=0.9217, Val Acc=0.9218, F1=0.8823
  Epoch  20: Train Acc=0.9555, Val Acc=0.9487, F1=0.9243
  Epoch  30: Train Acc=0.9662, Val Acc=0.9576, F1=0.9362
  Epoch  40: Train Acc=0.9739, Val Acc=0.9698, F1=0.9544
  Epoch  50: Train Acc=0.9782, Val Acc=0.9731, F1=0.9593
  Epoch  60: Train Acc=0.9835, Val Acc=0.9721, F1=0.9585
  Epoch  70: Train Acc=0.9867, Val Acc=0.9760, F1=0.9646
  Epoch  80: Train Acc=0.9893, Val Acc=0.9768, F1=0.9660
  Epoch  90: Train Acc=0.9909, Val Acc=0.9770, F1=0.9669
  Early stopping at epoch 93
  Best Val Acc: 0.9785

[TPA Results]
  Val Acc: 0.9785
  Test Acc: 0.9737, F1: 0.9633

[Training Gated-TPA]




  Epoch  10: Train Acc=0.9068, Val Acc=0.9114, F1=0.8604
  Epoch  20: Train Acc=0.9441, Val Acc=0.9317, F1=0.8964
  Epoch  30: Train Acc=0.9618, Val Acc=0.9478, F1=0.9241
  Epoch  40: Train Acc=0.9711, Val Acc=0.9505, F1=0.9298
  Epoch  50: Train Acc=0.9794, Val Acc=0.9675, F1=0.9529
  Epoch  60: Train Acc=0.9815, Val Acc=0.9596, F1=0.9436
  Epoch  70: Train Acc=0.9851, Val Acc=0.9654, F1=0.9496
  Early stopping at epoch 78
  Best Val Acc: 0.9737

[Gated-TPA Results]
  Val Acc: 0.9737
  Test Acc: 0.9724, F1: 0.9617

[Progress: 4/33]

EXPERIMENT: STANDING_TO_WALKING_30pct

Loading STANDING_TO_WALKING_30pct...
  Path: /content/drive/MyDrive/AI_data/TPA2/wisdm_transition_datasets/STANDING_TO_WALKING_30pct
  Train: (24156, 200, 3), Test: (6040, 200, 3)

Dataset splits:
  Train: 19324, Val: 4832, Test: 6040

[Training GAP]




  Epoch  10: Train Acc=0.9045, Val Acc=0.9178, F1=0.8819
  Epoch  20: Train Acc=0.9431, Val Acc=0.9431, F1=0.9192
  Epoch  30: Train Acc=0.9595, Val Acc=0.9530, F1=0.9323
  Epoch  40: Train Acc=0.9692, Val Acc=0.9520, F1=0.9270
  Epoch  50: Train Acc=0.9725, Val Acc=0.9586, F1=0.9370
  Epoch  60: Train Acc=0.9789, Val Acc=0.9576, F1=0.9359
  Epoch  70: Train Acc=0.9816, Val Acc=0.9570, F1=0.9372
  Epoch  80: Train Acc=0.9852, Val Acc=0.9567, F1=0.9376
  Epoch  90: Train Acc=0.9883, Val Acc=0.9692, F1=0.9529
  Epoch 100: Train Acc=0.9897, Val Acc=0.9665, F1=0.9475
  Best Val Acc: 0.9733

[GAP Results]
  Val Acc: 0.9733
  Test Acc: 0.9671, F1: 0.9543

[Training TPA]




  Epoch  10: Train Acc=0.9175, Val Acc=0.9259, F1=0.8889
  Epoch  20: Train Acc=0.9539, Val Acc=0.9377, F1=0.9113
  Epoch  30: Train Acc=0.9682, Val Acc=0.9543, F1=0.9347
  Epoch  40: Train Acc=0.9747, Val Acc=0.9661, F1=0.9497
  Epoch  50: Train Acc=0.9805, Val Acc=0.9719, F1=0.9582
  Epoch  60: Train Acc=0.9843, Val Acc=0.9752, F1=0.9611
  Epoch  70: Train Acc=0.9865, Val Acc=0.9785, F1=0.9664
  Epoch  80: Train Acc=0.9887, Val Acc=0.9793, F1=0.9677
  Epoch  90: Train Acc=0.9902, Val Acc=0.9723, F1=0.9553
  Epoch 100: Train Acc=0.9927, Val Acc=0.9781, F1=0.9658
  Best Val Acc: 0.9822

[TPA Results]
  Val Acc: 0.9822
  Test Acc: 0.9810, F1: 0.9727

[Training Gated-TPA]




  Epoch  10: Train Acc=0.9044, Val Acc=0.9096, F1=0.8591
  Epoch  20: Train Acc=0.9458, Val Acc=0.9269, F1=0.8994
  Epoch  30: Train Acc=0.9650, Val Acc=0.9501, F1=0.9254
  Epoch  40: Train Acc=0.9733, Val Acc=0.9590, F1=0.9426
  Epoch  50: Train Acc=0.9790, Val Acc=0.9690, F1=0.9549
  Epoch  60: Train Acc=0.9823, Val Acc=0.9683, F1=0.9529
  Epoch  70: Train Acc=0.9870, Val Acc=0.9725, F1=0.9593
  Epoch  80: Train Acc=0.9879, Val Acc=0.9646, F1=0.9490
  Epoch  90: Train Acc=0.9886, Val Acc=0.9787, F1=0.9670
  Epoch 100: Train Acc=0.9913, Val Acc=0.9781, F1=0.9677
  Best Val Acc: 0.9818

[Gated-TPA Results]
  Val Acc: 0.9818
  Test Acc: 0.9783, F1: 0.9693

[Progress: 5/33]

EXPERIMENT: STANDING_TO_WALKING_40pct

Loading STANDING_TO_WALKING_40pct...
  Path: /content/drive/MyDrive/AI_data/TPA2/wisdm_transition_datasets/STANDING_TO_WALKING_40pct
  Train: (24156, 200, 3), Test: (6040, 200, 3)

Dataset splits:
  Train: 19324, Val: 4832, Test: 6040

[Training GAP]




  Epoch  10: Train Acc=0.9013, Val Acc=0.9073, F1=0.8674
  Epoch  20: Train Acc=0.9418, Val Acc=0.9259, F1=0.9001
  Epoch  30: Train Acc=0.9566, Val Acc=0.9222, F1=0.9027
  Epoch  40: Train Acc=0.9668, Val Acc=0.9588, F1=0.9424
  Epoch  50: Train Acc=0.9702, Val Acc=0.9559, F1=0.9400
  Epoch  60: Train Acc=0.9764, Val Acc=0.9642, F1=0.9508


KeyboardInterrupt: 