In [None]:
### INTEGRATED HAR BASELINE MODELS WITH TACL-NET ###

import os
import sys
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
import math
from collections import deque, Counter
import warnings
import time
import json
import pickle
import urllib.request
import zipfile
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# 0. HAR DATA LOADER (Enhanced from original)
class UCIHARDataLoader:
    """Load and preprocess UCI Human Activity Recognition dataset with enhanced logging"""

    def __init__(self, data_dir="./HAR_data", download=True):
        self.data_dir = data_dir
        self.dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI%20HAR%20Dataset.zip"
        self.activities = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING']

        if download:
            self.download_and_extract()

    def download_and_extract(self):
        """Download and extract UCI HAR dataset"""
        os.makedirs(self.data_dir, exist_ok=True)
        zip_path = os.path.join(self.data_dir, "UCI_HAR_Dataset.zip")
        extract_path = os.path.join(self.data_dir, "UCI HAR Dataset")

        if not os.path.exists(extract_path):
            if not os.path.exists(zip_path):
                print("Downloading UCI HAR Dataset...")
                try:
                    urllib.request.urlretrieve(self.dataset_url, zip_path)
                    print("Downloaded UCI HAR Dataset")
                except Exception as e:
                    print(f"Failed to download: {e}")
                    print("Please download manually from: https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones")
                    return

            print("Extracting dataset...")
            try:
                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                    zip_ref.extractall(self.data_dir)
                print("Dataset extracted successfully")
            except Exception as e:
                print(f"Failed to extract: {e}")

    def load_data_from_files(self, base_path):
        """Load data from UCI HAR dataset files"""
        try:
            # Try different possible directory structures
            possible_paths = [
                os.path.join(base_path, "UCI HAR Dataset"),
                os.path.join(base_path, "UCI_HAR_Dataset"),
                base_path
            ]

            dataset_path = None
            for path in possible_paths:
                train_path = os.path.join(path, "train")
                test_path = os.path.join(path, "test")
                if os.path.exists(train_path) and os.path.exists(test_path):
                    dataset_path = path
                    break

            if dataset_path is None:
                raise FileNotFoundError("Could not find UCI HAR dataset structure")

            print(f"Loading data from: {dataset_path}")

            # Load training data
            X_train = np.loadtxt(os.path.join(dataset_path, "train", "X_train.txt"))
            y_train = np.loadtxt(os.path.join(dataset_path, "train", "y_train.txt"), dtype=int)
            subject_train = np.loadtxt(os.path.join(dataset_path, "train", "subject_train.txt"), dtype=int)

            # Load test data
            X_test = np.loadtxt(os.path.join(dataset_path, "test", "X_test.txt"))
            y_test = np.loadtxt(os.path.join(dataset_path, "test", "y_test.txt"), dtype=int)
            subject_test = np.loadtxt(os.path.join(dataset_path, "test", "subject_test.txt"), dtype=int)

            # Convert labels to 0-indexed
            y_train = y_train - 1
            y_test = y_test - 1

            # Load feature names (optional)
            try:
                features_path = os.path.join(dataset_path, "features.txt")
                if os.path.exists(features_path):
                    self.feature_names = []
                    with open(features_path, 'r') as f:
                        for line in f:
                            self.feature_names.append(line.strip().split()[1])
                else:
                    self.feature_names = [f"feature_{i}" for i in range(X_train.shape[1])]
            except:
                self.feature_names = [f"feature_{i}" for i in range(X_train.shape[1])]

            print(f"Data loaded successfully:")
            print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
            print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
            print(f"Number of features: {X_train.shape[1]}")
            print(f"Number of subjects: train={len(np.unique(subject_train))}, test={len(np.unique(subject_test))}")
            print(f"Activities: {self.activities}")
            print(f"Train class distribution: {np.bincount(y_train)}")
            print(f"Test class distribution: {np.bincount(y_test)}")

            return X_train, y_train, X_test, y_test, subject_train, subject_test

        except Exception as e:
            print(f"Error loading real UCI HAR data: {e}")
            print("Creating synthetic HAR-like data...")
            return self.create_synthetic_har_data()

    def create_synthetic_har_data(self):
        """Create synthetic HAR-like data for testing"""
        print("Generating synthetic HAR data...")
        np.random.seed(42)

        n_features = 561
        n_train = 7352
        n_test = 2947
        n_classes = 6

        # Generate realistic sensor-like features
        X_train = []
        y_train = []
        X_test = []
        y_test = []

        for class_idx in range(n_classes):
            # Different activity patterns
            if class_idx == 0:  # WALKING
                base_freq = 2.0
                movement_pattern = "periodic"
            elif class_idx == 1:  # WALKING_UPSTAIRS
                base_freq = 1.5
                movement_pattern = "upward"
            elif class_idx == 2:  # WALKING_DOWNSTAIRS
                base_freq = 2.5
                movement_pattern = "downward"
            elif class_idx == 3:  # SITTING
                base_freq = 0.1
                movement_pattern = "static"
            elif class_idx == 4:  # STANDING
                base_freq = 0.2
                movement_pattern = "static"
            else:  # LAYING
                base_freq = 0.05
                movement_pattern = "static"

            # Generate training samples
            n_train_class = n_train // n_classes
            for _ in range(n_train_class):
                features = self.generate_har_features(n_features, base_freq, movement_pattern)
                X_train.append(features)
                y_train.append(class_idx)

            # Generate test samples
            n_test_class = n_test // n_classes
            for _ in range(n_test_class):
                features = self.generate_har_features(n_features, base_freq, movement_pattern)
                X_test.append(features)
                y_test.append(class_idx)

        X_train = np.array(X_train)
        y_train = np.array(y_train)
        X_test = np.array(X_test)
        y_test = np.array(y_test)

        # Create dummy subject arrays
        subject_train = np.random.randint(1, 31, len(y_train))
        subject_test = np.random.randint(1, 31, len(y_test))

        print(f"Generated synthetic HAR data:")
        print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
        print(f"X_test: {X_test.shape}, y_test: {y_test.shape}")

        return X_train, y_train, X_test, y_test, subject_train, subject_test

    def generate_har_features(self, n_features, base_freq, movement_pattern):
        """Generate realistic HAR features based on activity type"""
        features = []

        for i in range(n_features):
            if i < n_features // 3:  # Time domain features
                if movement_pattern == "static":
                    value = np.random.normal(0, 0.1)
                elif movement_pattern == "periodic":
                    value = np.sin(base_freq * np.random.uniform(0, 2*np.pi)) + np.random.normal(0, 0.2)
                elif movement_pattern == "upward":
                    value = abs(np.sin(base_freq * np.random.uniform(0, 2*np.pi))) + np.random.normal(0.2, 0.15)
                else:  # downward
                    value = -abs(np.sin(base_freq * np.random.uniform(0, 2*np.pi))) + np.random.normal(-0.2, 0.15)
            elif i < 2 * n_features // 3:  # Frequency domain features
                value = np.random.exponential(1/(base_freq + 0.1)) * np.random.choice([-1, 1])
            else:  # Statistical features
                value = np.random.normal(0, 1) * (base_freq if movement_pattern != "static" else 0.1)

            features.append(value)

        return np.array(features)

    def load_data(self, custom_dir=None):
        """Main method to load HAR data"""
        if custom_dir:
            return self.load_data_from_files(custom_dir)
        else:
            return self.load_data_from_files(self.data_dir)


# 1. SAM OPTIMIZER FROM TACL-NET
class SAM(torch.optim.Optimizer):
    """Sharpness-Aware Minimization optimizer"""
    def __init__(self, params, base_optimizer, rho=0.05, adaptive=False, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, adaptive=adaptive, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups
        self.defaults.update(self.base_optimizer.defaults)

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                self.state[p]["old_p"] = p.data.clone()
                e_w = (torch.pow(p, 2) if group["adaptive"] else 1.0) * p.grad * scale.to(p)
                p.add_(e_w)

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.data = self.state[p]["old_p"]

        self.base_optimizer.step()
        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device
        norm = torch.norm(
                    torch.stack([
                        ((torch.abs(p) if group["adaptive"] else 1.0) * p.grad).norm(dtype=torch.float32).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    dtype=torch.float32
               )
        return norm

    def load_state_dict(self, state_dict):
        super().load_state_dict(state_dict)
        self.base_optimizer.param_groups = self.param_groups


# 2. TACL-NET COMPONENTS
class BalancedNoiseRobustLoss(nn.Module):
    def __init__(self, alpha=0.8, temp=0.3):
        super().__init__()
        self.alpha = alpha
        self.temp = temp
        self.base_loss = nn.CrossEntropyLoss()

    def forward(self, preds, targets, conf_scores=None):
        probs = F.softmax(preds/self.temp, dim=1)
        ce_loss = self.base_loss(preds, targets)
        cons_loss = -torch.mean(torch.sum(probs * torch.log(probs + 1e-8), dim=1))
        return self.alpha*ce_loss + (1-self.alpha)*cons_loss

class EnhancedMLPEncoder(nn.Module):
    """MLP encoder optimized for HAR feature vectors"""
    def __init__(self, input_dim=561, latent_dim=128):
        super().__init__()

        self.feature_extractor = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(256, latent_dim),
            nn.BatchNorm1d(latent_dim),
            nn.ReLU()
        )

    def forward(self, x):
        return self.feature_extractor(x)

class ActivityAttention(nn.Module):
    """Attention mechanism for activity-specific feature selection"""
    def __init__(self, latent_dim):
        super().__init__()
        self.attention = nn.Sequential(
            nn.Linear(latent_dim, latent_dim // 2),
            nn.Tanh(),
            nn.Linear(latent_dim // 2, latent_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        att_weights = self.attention(x)
        return x * att_weights


# 3. MODEL DEFINITIONS
class CNN1D_HAR_Enhanced(nn.Module):
    """Enhanced 1D CNN for HAR with temporal smoothing and confidence reweighting"""
    def __init__(self, input_size, num_classes=6, num_channels=64):
        super(CNN1D_HAR_Enhanced, self).__init__()

        self.input_reshape = lambda x: x.unsqueeze(1)

        # Adapted for HAR signals (561 features)
        self.conv1 = nn.Conv1d(1, num_channels, kernel_size=9, padding=4)
        self.bn1 = nn.BatchNorm1d(num_channels)
        self.pool1 = nn.MaxPool1d(3, stride=2)

        self.conv2 = nn.Conv1d(num_channels, num_channels*2, kernel_size=7, padding=3)
        self.bn2 = nn.BatchNorm1d(num_channels*2)
        self.pool2 = nn.MaxPool1d(3, stride=2)

        self.conv3 = nn.Conv1d(num_channels*2, num_channels*4, kernel_size=5, padding=2)
        self.bn3 = nn.BatchNorm1d(num_channels*4)
        self.pool3 = nn.MaxPool1d(3, stride=2)

        conv_output_size = self._get_conv_output_size(input_size)

        self.fc1 = nn.Linear(conv_output_size, 256)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(128, num_classes)

        self.relu = nn.ReLU()

    def _get_conv_output_size(self, input_size):
        x = torch.zeros(1, 1, input_size)
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        return x.numel()

    def forward(self, x):
        x = self.input_reshape(x)

        x = self.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.pool3(x)

        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.relu(self.fc2(x))
        x = self.dropout2(x)
        logits = self.fc3(x)

        return logits

class BiLSTM_HAR_Enhanced(nn.Module):
    """Enhanced Bi-LSTM for HAR with attention mechanism"""
    def __init__(self, input_size, hidden_size=128, num_layers=2, num_classes=6):
        super(BiLSTM_HAR_Enhanced, self).__init__()

        self.input_size = input_size  # 561 for HAR

        # Reshape 561 features into smaller chunks for sequential processing
        self.seq_len = 33  # 561 / 17 = 33, so 17 features per time step
        self.feature_per_step = 17

        self.lstm = nn.LSTM(
            input_size=self.feature_per_step,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=0.3 if num_layers > 1 else 0
        )

        # Attention mechanism
        lstm_output_size = hidden_size * 2
        self.attention = nn.Sequential(
            nn.Linear(lstm_output_size, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, 1)
        )

        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(lstm_output_size, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        # x shape: (batch_size, 561)
        batch_size = x.shape[0]

        # Reshape to (batch_size, seq_len, feature_per_step)
        # Take only first 561 features if needed, pad if less
        if x.shape[1] > 561:
            x = x[:, :561]
        elif x.shape[1] < 561:
            padding = torch.zeros(batch_size, 561 - x.shape[1], device=x.device)
            x = torch.cat([x, padding], dim=1)

        # Reshape: 561 -> (33, 17)
        x = x.view(batch_size, self.seq_len, self.feature_per_step)

        # LSTM forward pass
        lstm_out, _ = self.lstm(x)  # (batch_size, seq_len, hidden_size*2)

        # Attention-based aggregation
        attention_weights = self.attention(lstm_out)  # (batch_size, seq_len, 1)
        attention_weights = F.softmax(attention_weights.squeeze(-1), dim=1)  # (batch_size, seq_len)

        # Weighted average
        attended_output = torch.sum(lstm_out * attention_weights.unsqueeze(-1), dim=1)

        # Classification
        logits = self.classifier(attended_output)

        return logits

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=0.1)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        if d_model % 2 == 1:
            pe[:, 1::2] = torch.cos(position * div_term[:-1])
        else:
            pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        seq_len = x.size(0)
        x = x + self.pe[:seq_len, :]
        return self.dropout(x)

class TransformerHAR_Enhanced(nn.Module):
    """Enhanced Transformer for HAR with dynamic confidence scaling"""
    def __init__(self, input_size, d_model=128, nhead=8, num_layers=3, num_classes=6):
        super(TransformerHAR_Enhanced, self).__init__()

        self.input_size = input_size  # 561 for HAR
        self.d_model = d_model

        # For HAR: reshape 561 features into sequences
        self.seq_len = 33  # 561 / 17 = 33
        self.feature_per_step = 17

        # Make sure d_model is divisible by nhead
        if d_model % nhead != 0:
            d_model = nhead * (d_model // nhead)
            self.d_model = d_model

        # Input projection
        self.input_projection = nn.Linear(self.feature_per_step, d_model)

        # Positional encoding
        self.pos_encoder = PositionalEncoding(d_model, max_len=self.seq_len + 10)

        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model * 4,
            dropout=0.1,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(d_model, d_model // 2),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(d_model // 2, num_classes)
        )

    def forward(self, x):
        batch_size = x.shape[0]

        # Handle input size variations
        if x.shape[1] > 561:
            x = x[:, :561]
        elif x.shape[1] < 561:
            padding = torch.zeros(batch_size, 561 - x.shape[1], device=x.device)
            x = torch.cat([x, padding], dim=1)

        # Reshape to (batch_size, seq_len, feature_per_step)
        x = x.view(batch_size, self.seq_len, self.feature_per_step)

        # Project to d_model dimensions
        x = self.input_projection(x)  # (batch_size, seq_len, d_model)

        # Add positional encoding
        x_transposed = x.transpose(0, 1)  # (seq_len, batch_size, d_model)
        x_with_pos = self.pos_encoder(x_transposed)
        x = x_with_pos.transpose(0, 1)  # (batch_size, seq_len, d_model)

        # Transformer
        transformer_out = self.transformer(x)  # (batch_size, seq_len, d_model)

        # Global average pooling
        pooled = torch.mean(transformer_out, dim=1)  # (batch_size, d_model)

        # Classification
        logits = self.classifier(pooled)

        return logits

class TACL_HAR(nn.Module):
    """TACL-Net model for HAR classification"""
    def __init__(self, input_dim=561, n_classes=6, latent_dim=128):
        super().__init__()
        self.encoder = EnhancedMLPEncoder(input_dim, latent_dim)
        self.activity_attention = ActivityAttention(latent_dim)

        self.bottleneck = nn.Sequential(
            nn.Linear(latent_dim, latent_dim//2),
            nn.ReLU(),
            nn.Dropout(0.4)
        )

        self.classifier = nn.Linear(latent_dim//2, n_classes)

        self.confidence = nn.Sequential(
            nn.Linear(latent_dim//2, 32),
            nn.Tanh(),
            nn.Dropout(0.2),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

        self.loss_fn = BalancedNoiseRobustLoss()

    def forward(self, x):
        z = self.encoder(x)
        z_att = self.activity_attention(z)
        z_bottleneck = self.bottleneck(z_att)

        logits = self.classifier(z_bottleneck)
        conf_scores = self.confidence(z_bottleneck).squeeze(-1) * 0.3 + 0.7  # Range [0.7, 1.0]

        return {
            'logits': logits,
            'conf_scores': conf_scores
        }


# 4. ENHANCED TRAINING LOGGER
class EnhancedTrainingLogger:
    """Enhanced training logger with comprehensive metrics for all models"""
    def __init__(self, log_every=10):
        self.log_every = log_every
        self.epoch_logs = []

    def log_epoch(self, epoch, loss, train_acc, val_acc, train_f1=None, val_f1=None,
                  train_precision=None, val_precision=None, train_recall=None, val_recall=None, lr=None):
        """Log comprehensive epoch metrics"""
        log_entry = {
            'epoch': epoch,
            'loss': loss,
            'train_acc': train_acc,
            'val_acc': val_acc,
            'train_f1': train_f1,
            'val_f1': val_f1,
            'train_precision': train_precision,
            'val_precision': val_precision,
            'train_recall': train_recall,
            'val_recall': val_recall,
            'lr': lr
        }
        self.epoch_logs.append(log_entry)

        if epoch == 1 or epoch % self.log_every == 0:
            log_msg = f"Epoch {epoch:3d}: Loss={loss:.4f}, TrainAcc={train_acc:.2f}%, ValAcc={val_acc:.2f}%"
            if train_f1 is not None:
                log_msg += f", TrainF1={train_f1:.3f}, ValF1={val_f1:.3f}"
            if train_precision is not None:
                log_msg += f", TrainP={train_precision:.3f}, ValP={val_precision:.3f}"
            if train_recall is not None:
                log_msg += f", TrainR={train_recall:.3f}, ValR={val_recall:.3f}"
            if lr is not None:
                log_msg += f", LR={lr:.6f}"
            print(log_msg)

    def get_best_epoch(self):
        """Get the epoch with best validation accuracy"""
        if not self.epoch_logs:
            return None
        best_epoch = max(self.epoch_logs, key=lambda x: x['val_acc'])
        return best_epoch


# 5. UTILITY FUNCTIONS
def add_realistic_label_noise(y, subjects, noise_rate=0.1, n_classes=6):
    """Add realistic label noise based on activity confusion patterns"""
    if noise_rate == 0:
        return y.copy(), np.zeros(len(y), dtype=bool)

    y_noisy = y.copy()
    n_samples = len(y)
    n_noisy = int(noise_rate * n_samples)

    if n_noisy > 0:
        # Activity confusion matrix (based on realistic confusions)
        confusion_patterns = {
            0: [1, 2],      # WALKING -> stairs activities
            1: [0, 2],      # WALKING_UPSTAIRS -> walking activities
            2: [0, 1],      # WALKING_DOWNSTAIRS -> walking activities
            3: [4],         # SITTING -> STANDING
            4: [3],         # STANDING -> SITTING
            5: [3]          # LAYING -> SITTING (sensor orientation)
        }

        # Select samples for noise injection
        noisy_indices = np.random.choice(n_samples, n_noisy, replace=False)

        for idx in noisy_indices:
            current_class = y[idx]
            if current_class in confusion_patterns:
                possible_confusions = confusion_patterns[current_class]
                y_noisy[idx] = np.random.choice(possible_confusions)

    noise_mask = np.zeros(n_samples, dtype=bool)
    noise_mask[noisy_indices] = True

    return y_noisy, noise_mask

def calculate_metrics(y_true, y_pred):
    """Calculate comprehensive metrics"""
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')

    return accuracy, f1, precision, recall


# 6. TRAINING FUNCTIONS
def train_tacl_one_epoch(model, data_loader, optimizer, epoch, device):
    """Training function for TACL-Net with SAM optimizer"""
    model.train()
    total_loss = 0
    all_preds = []
    all_targets = []

    for batch_X, batch_y in data_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        # Pseudo-labeling for high-confidence predictions
        if epoch > 8:
            with torch.no_grad():
                outputs = model(batch_X)
                high_conf_mask = (outputs['conf_scores'] > 0.9) & \
                               (F.softmax(outputs['logits'], dim=1).max(1)[0] > 0.85)
                if high_conf_mask.any():
                    pseudo_labels = outputs['logits'].argmax(-1)
                    batch_y[high_conf_mask] = pseudo_labels[high_conf_mask]

        def closure():
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = model.loss_fn(outputs['logits'], batch_y, outputs['conf_scores'])
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            return loss

        loss = closure()
        optimizer.first_step(zero_grad=True)

        closure()
        optimizer.second_step(zero_grad=True)

        total_loss += loss.item()

        # Calculate predictions for metrics
        with torch.no_grad():
            outputs = model(batch_X)
            _, predicted = torch.max(outputs['logits'], 1)
            all_preds.extend(predicted.cpu().numpy())
            all_targets.extend(batch_y.cpu().numpy())

    # Calculate comprehensive metrics
    accuracy, f1, precision, recall = calculate_metrics(all_targets, all_preds)

    return total_loss / len(data_loader), accuracy * 100, f1, precision, recall

def train_baseline_one_epoch(model, data_loader, optimizer, device):
    """Training function for baseline models"""
    model.train()
    total_loss = 0
    all_preds = []
    all_targets = []

    for batch_X, batch_y in data_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        optimizer.zero_grad()
        logits = model(batch_X)
        loss = F.cross_entropy(logits, batch_y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        total_loss += loss.item()

        _, predicted = torch.max(logits.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_targets.extend(batch_y.cpu().numpy())

    # Calculate comprehensive metrics
    accuracy, f1, precision, recall = calculate_metrics(all_targets, all_preds)

    return total_loss / len(data_loader), accuracy * 100, f1, precision, recall

def evaluate_model(model, data_loader, device, model_type='baseline'):
    """Comprehensive evaluation for all models"""
    model.eval()
    all_preds = []
    all_targets = []
    all_conf = []

    with torch.no_grad():
        for batch_X, batch_y in data_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            if model_type == 'tacl':
                outputs = model(batch_X)
                logits = outputs['logits']
                conf_scores = outputs['conf_scores']
            else:
                logits = model(batch_X)
                # Calculate confidence as max softmax probability
                probabilities = torch.softmax(logits, dim=1)
                conf_scores, _ = torch.max(probabilities, 1)

            _, predicted = torch.max(logits, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_targets.extend(batch_y.cpu().numpy())
            all_conf.extend(conf_scores.cpu().numpy())

    # Calculate comprehensive metrics
    accuracy, f1, precision, recall = calculate_metrics(all_targets, all_preds)
    mean_confidence = np.mean(all_conf)

    return accuracy * 100, f1, precision, recall, mean_confidence, all_preds, all_targets

def train_model_enhanced(model, train_loader, val_loader, device, model_type='baseline',
                        num_epochs=100, patience=15, verbose=True):
    """Enhanced training with comprehensive metrics logging"""

    logger = EnhancedTrainingLogger(log_every=20)

    # Setup optimizer based on model type
    if model_type == 'tacl':
        optimizer = SAM(model.parameters(), torch.optim.AdamW, rho=0.05, lr=1e-3, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer.base_optimizer, T_max=40, eta_min=1e-6)
    else:
        if model_type == 'cnn' or model_type == 'lstm':
            optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
        else:  # transformer
            optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-4)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=7, factor=0.5)

    model.to(device)
    best_val_acc = 0
    patience_counter = 0
    best_model_state = None
    start_time = time.time()

    for epoch in range(num_epochs):
        # Training phase
        if model_type == 'tacl':
            train_loss, train_acc, train_f1, train_precision, train_recall = train_tacl_one_epoch(
                model, train_loader, optimizer, epoch + 1, device
            )
        else:
            train_loss, train_acc, train_f1, train_precision, train_recall = train_baseline_one_epoch(
                model, train_loader, optimizer, device
            )

        # Validation phase
        val_acc, val_f1, val_precision, val_recall, val_conf, _, _ = evaluate_model(
            model, val_loader, device, model_type
        )

        # Learning rate scheduling
        current_lr = optimizer.param_groups[0]['lr'] if model_type != 'tacl' else optimizer.base_optimizer.param_groups[0]['lr']

        if model_type == 'tacl':
            scheduler.step()
        else:
            scheduler.step(train_loss)

        # Log comprehensive metrics
        if verbose:
            logger.log_epoch(
                epoch + 1, train_loss, train_acc, val_acc,
                train_f1, val_f1, train_precision, val_precision,
                train_recall, val_recall, current_lr
            )

        # Early stopping
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            best_model_state = model.state_dict().copy()
        else:
            patience_counter += 1

        if patience_counter >= patience:
            if verbose:
                print(f"Early stopping at epoch {epoch + 1}")
            break

    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    training_time = time.time() - start_time
    best_epoch_info = logger.get_best_epoch()

    return model, best_val_acc, {
        'training_time': training_time,
        'best_epoch': best_epoch_info,
        'total_epochs': epoch + 1,
        'logs': logger.epoch_logs
    }

def get_detailed_evaluation(model, test_loader, device, class_labels, model_type='baseline'):
    """Get detailed evaluation with classification report"""
    test_acc, test_f1, test_precision, test_recall, test_conf, all_preds, all_targets = evaluate_model(
        model, test_loader, device, model_type
    )

    report = classification_report(
        all_targets, all_preds,
        target_names=class_labels,
        digits=4
    )

    return {
        'test_acc': test_acc,
        'test_f1': test_f1,
        'test_precision': test_precision,
        'test_recall': test_recall,
        'confidence': test_conf,
        'report': report,
        'predictions': all_preds,
        'targets': all_targets
    }


# 7. MAIN EXPERIMENT FUNCTION
def run_integrated_experiments(X_train, y_train, X_test, y_test, device,
                              class_labels, noise_level=0.0, verbose=True):
    """Run experiments with all four models: CNN, LSTM, Transformer, TACL-Net"""

    if verbose:
        print(f"\n{'='*80}")
        print(f"INTEGRATED HAR EXPERIMENT - NOISE LEVEL: {noise_level}")
        print(f"{'='*80}")

    # Add label noise if specified
    if noise_level > 0:
        subjects_dummy = np.ones(len(y_train), dtype=int)  # Create dummy subjects
        y_train_noisy, noise_mask = add_realistic_label_noise(y_train, subjects_dummy, noise_level)
        if verbose:
            print(f"Added noise to {noise_mask.sum()}/{len(y_train)} training samples")
    else:
        y_train_noisy = y_train.copy()
        noise_mask = np.zeros(len(y_train), dtype=bool)

    # Prepare data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Split training data for validation
    X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
        X_train_scaled, y_train_noisy, test_size=0.2, random_state=42, stratify=y_train_noisy
    )

    # Create data loaders
    train_dataset = TensorDataset(torch.FloatTensor(X_train_split), torch.LongTensor(y_train_split))
    val_dataset = TensorDataset(torch.FloatTensor(X_val_split), torch.LongTensor(y_val_split))
    test_dataset = TensorDataset(torch.FloatTensor(X_test_scaled), torch.LongTensor(y_test))

    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

    input_size = X_train.shape[1]
    num_classes = len(np.unique(y_train))

    results = {}
    models = {}

    # Model configurations
    model_configs = [
        ('1D-CNN', CNN1D_HAR_Enhanced(input_size, num_classes), 'cnn'),
        ('Bi-LSTM', BiLSTM_HAR_Enhanced(input_size, num_classes=num_classes), 'lstm'),
        ('Transformer', TransformerHAR_Enhanced(input_size, num_classes=num_classes), 'transformer'),
        ('TACL-Net', TACL_HAR(input_dim=input_size, n_classes=num_classes, latent_dim=128), 'tacl')
    ]

    for model_name, model, model_type in model_configs:
        if verbose:
            print(f"\n{'-'*70}")
            print(f"Training {model_name}...")
            print(f"{'-'*70}")

        # Train model
        trained_model, val_acc, training_info = train_model_enhanced(
            model, train_loader, val_loader, device, model_type, verbose=verbose
        )

        # Evaluate on test set
        test_results = get_detailed_evaluation(
            trained_model, test_loader, device, class_labels, model_type
        )

        # Store results
        results[model_name] = {
            'val_acc': val_acc,
            'test_acc': test_results['test_acc'],
            'test_f1': test_results['test_f1'],
            'test_precision': test_results['test_precision'],
            'test_recall': test_results['test_recall'],
            'confidence': test_results['confidence'],
            'report': test_results['report'],
            'training_info': training_info,
            'noise_level': noise_level
        }
        models[model_name] = trained_model

        if verbose:
            print(f"\n{model_name} Results:")
            print(f"  Validation Accuracy: {val_acc:.2f}%")
            print(f"  Test Accuracy:      {test_results['test_acc']:.2f}%")
            print(f"  Test F1-Score:      {test_results['test_f1']:.4f}")
            print(f"  Test Precision:     {test_results['test_precision']:.4f}")
            print(f"  Test Recall:        {test_results['test_recall']:.4f}")
            print(f"  Mean Confidence:    {test_results['confidence']:.3f}")
            print(f"  Training Time:      {training_info['training_time']:.1f}s")
            print(f"  Total Epochs:       {training_info['total_epochs']}")

    return results, models

def run_noise_robustness_integrated(X_train, y_train, X_test, y_test, device, class_labels, verbose=True):
    """Run noise robustness experiments with all four models"""

    noise_levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
    all_results = {}

    if verbose:
        print("="*80)
        print("COMPREHENSIVE NOISE ROBUSTNESS EXPERIMENTS (ALL 4 MODELS)")
        print("="*80)

    for noise_level in noise_levels:
        if verbose:
            print(f"\n{'='*20} NOISE LEVEL: {noise_level:.1f} {'='*20}")

        results, models = run_integrated_experiments(
            X_train, y_train, X_test, y_test, device,
            class_labels, noise_level, verbose=verbose
        )

        all_results[noise_level] = results

        # Print summary for this noise level
        if verbose:
            print(f"\nSUMMARY FOR NOISE LEVEL {noise_level:.1f}:")
            print("-" * 60)
            for model_name, metrics in results.items():
                print(f"{model_name:12}: Acc={metrics['test_acc']:6.2f}%, "
                      f"F1={metrics['test_f1']:.3f}, P={metrics['test_precision']:.3f}, "
                      f"R={metrics['test_recall']:.3f}, Conf={metrics['confidence']:.3f}")

    return all_results

def print_comprehensive_comparison(all_results, verbose=True):
    """Print comprehensive comparison of all four models"""

    if verbose:
        print("\n" + "="*120)
        print("COMPREHENSIVE COMPARISON: ALL FOUR MODELS ACROSS NOISE LEVELS")
        print("="*120)

    noise_levels = sorted(all_results.keys())
    model_names = list(all_results[0.0].keys())

    # Accuracy comparison
    if verbose:
        print(f"\nACCURACY COMPARISON:")
        print("-" * 120)
        print(f"{'Model':<12} | {'Clean (0.0)':<12} | {'Noise 0.1':<12} | {'Noise 0.2':<12} | {'Noise 0.3':<12} | {'Avg Time(s)':<12} | {'Robustness':<10}")
        print("-" * 120)

    for model_name in model_names:
        if verbose:
            row = f"{model_name:<12} |"
            accuracies = []
            for noise_level in noise_levels:
                acc = all_results[noise_level][model_name]['test_acc']
                accuracies.append(acc)
                row += f" {acc:>10.2f}% |"

            # Add average training time
            avg_time = np.mean([all_results[noise][model_name]['training_info']['training_time']
                              for noise in noise_levels])
            row += f" {avg_time:>10.1f}s |"

            # Add robustness score (clean vs noisy performance retention)
            robustness = (accuracies[-1] / accuracies[0]) * 100
            row += f" {robustness:>8.1f}%"
            print(row)

    # F1-Score comparison
    if verbose:
        print(f"\nF1-SCORE COMPARISON:")
        print("-" * 120)
        print(f"{'Model':<12} | {'Clean (0.0)':<12} | {'Noise 0.1':<12} | {'Noise 0.2':<12} | {'Noise 0.3':<12} | {'F1 Drop':<12}")
        print("-" * 120)

    for model_name in model_names:
        if verbose:
            row = f"{model_name:<12} |"
            f1_scores = []
            for noise_level in noise_levels:
                f1 = all_results[noise_level][model_name]['test_f1']
                f1_scores.append(f1)
                row += f" {f1:>10.4f} |"

            # Add F1 drop
            f1_drop = f1_scores[0] - f1_scores[-1]
            row += f" {f1_drop:>10.4f}"
            print(row)

    # Model rankings
    if verbose:
        print(f"\nMODEL RANKINGS:")
        print("-" * 60)

        # Rank by clean performance
        clean_ranking = sorted(model_names, key=lambda x: all_results[0.0][x]['test_acc'], reverse=True)
        print(f"Clean Performance:  {' > '.join(clean_ranking)}")

        # Rank by robustness
        robustness_scores = {}
        for model_name in model_names:
            clean_acc = all_results[0.0][model_name]['test_acc']
            noisy_acc = all_results[0.3][model_name]['test_acc']
            robustness_scores[model_name] = (noisy_acc / clean_acc) * 100

        robustness_ranking = sorted(model_names, key=lambda x: robustness_scores[x], reverse=True)
        print(f"Noise Robustness:   {' > '.join(robustness_ranking)}")

        # Rank by training efficiency
        efficiency_scores = {}
        for model_name in model_names:
            avg_time = np.mean([all_results[noise][model_name]['training_info']['training_time']
                              for noise in noise_levels])
            efficiency_scores[model_name] = avg_time

        efficiency_ranking = sorted(model_names, key=lambda x: efficiency_scores[x])
        print(f"Training Efficiency: {' > '.join(efficiency_ranking)} (fastest to slowest)")

def generate_model_recommendations(all_results, verbose=True):
    """Generate comprehensive model recommendations"""
    if not verbose:
        return

    # NOTE: This function now only computes metrics without printing recommendations
    noise_levels = sorted(all_results.keys())
    model_names = list(all_results[0.0].keys())

    # Calculate comprehensive metrics
    model_metrics = {}
    for model_name in model_names:
        clean_acc = all_results[0.0][model_name]['test_acc']
        clean_f1 = all_results[0.0][model_name]['test_f1']
        noisy_acc = all_results[0.3][model_name]['test_acc']
        noisy_f1 = all_results[0.3][model_name]['test_f1']

        avg_time = np.mean([all_results[noise][model_name]['training_info']['training_time']
                          for noise in noise_levels])
        avg_epochs = np.mean([all_results[noise][model_name]['training_info']['total_epochs']
                            for noise in noise_levels])

        robustness_acc = (noisy_acc / clean_acc) * 100
        robustness_f1 = (noisy_f1 / clean_f1) * 100

        clean_conf = all_results[0.0][model_name]['confidence']
        noisy_conf = all_results[0.3][model_name]['confidence']

        model_metrics[model_name] = {
            'clean_acc': clean_acc,
            'clean_f1': clean_f1,
            'noisy_acc': noisy_acc,
            'noisy_f1': noisy_f1,
            'robustness_acc': robustness_acc,
            'robustness_f1': robustness_f1,
            'avg_time': avg_time,
            'avg_epochs': avg_epochs,
            'clean_conf': clean_conf,
            'noisy_conf': noisy_conf
        }

def create_comprehensive_plots(all_results, save_prefix="har_four_models", verbose=True):
    """Create comprehensive visualization plots for all four models"""
    try:
        noise_levels = sorted(all_results.keys())
        model_names = list(all_results[0.0].keys())

        # Set up the plotting style
        plt.style.use('default')
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']  # Blue, Orange, Green, Red

        # Create comprehensive figure with 11 subplots (removed subplot 12)
        fig = plt.figure(figsize=(24, 18))

        # 1. Accuracy vs Noise Level
        plt.subplot(3, 4, 1)
        for i, model_name in enumerate(model_names):
            accuracies = [all_results[noise][model_name]['test_acc'] for noise in noise_levels]
            plt.plot(noise_levels, accuracies, marker='o', label=model_name,
                    linewidth=2.5, markersize=8, color=colors[i])

        plt.xlabel('Noise Level', fontsize=12)
        plt.ylabel('Test Accuracy (%)', fontsize=12)
        plt.title('Test Accuracy vs Label Noise', fontsize=14, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        # 2. F1-Score vs Noise Level
        plt.subplot(3, 4, 2)
        for i, model_name in enumerate(model_names):
            f1_scores = [all_results[noise][model_name]['test_f1'] for noise in noise_levels]
            plt.plot(noise_levels, f1_scores, marker='s', label=model_name,
                    linewidth=2.5, markersize=8, color=colors[i])

        plt.xlabel('Noise Level', fontsize=12)
        plt.ylabel('Test F1-Score', fontsize=12)
        plt.title('F1-Score vs Label Noise', fontsize=14, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        # 3. Precision vs Noise Level
        plt.subplot(3, 4, 3)
        for i, model_name in enumerate(model_names):
            precisions = [all_results[noise][model_name]['test_precision'] for noise in noise_levels]
            plt.plot(noise_levels, precisions, marker='^', label=model_name,
                    linewidth=2.5, markersize=8, color=colors[i])

        plt.xlabel('Noise Level', fontsize=12)
        plt.ylabel('Test Precision', fontsize=12)
        plt.title('Precision vs Label Noise', fontsize=14, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        # 4. Recall vs Noise Level
        plt.subplot(3, 4, 4)
        for i, model_name in enumerate(model_names):
            recalls = [all_results[noise][model_name]['test_recall'] for noise in noise_levels]
            plt.plot(noise_levels, recalls, marker='v', label=model_name,
                    linewidth=2.5, markersize=8, color=colors[i])

        plt.xlabel('Noise Level', fontsize=12)
        plt.ylabel('Test Recall', fontsize=12)
        plt.title('Recall vs Label Noise', fontsize=14, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        # 5. Confidence vs Noise Level
        plt.subplot(3, 4, 5)
        for i, model_name in enumerate(model_names):
            confidences = [all_results[noise][model_name]['confidence'] for noise in noise_levels]
            plt.plot(noise_levels, confidences, marker='d', label=model_name,
                    linewidth=2.5, markersize=8, color=colors[i])

        plt.xlabel('Noise Level', fontsize=12)
        plt.ylabel('Mean Confidence', fontsize=12)
        plt.title('Model Confidence vs Label Noise', fontsize=14, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        # 6. Training Time Comparison
        plt.subplot(3, 4, 6)
        avg_times = []
        for model_name in model_names:
            times = [all_results[noise][model_name]['training_info']['training_time']
                    for noise in noise_levels]
            avg_time = np.mean(times)
            avg_times.append(avg_time)

        bars = plt.bar(model_names, avg_times, alpha=0.7, color=colors)
        plt.ylabel('Average Training Time (s)', fontsize=12)
        plt.title('Average Training Time Comparison', fontsize=14, fontweight='bold')
        plt.xticks(rotation=45)

        for bar, time_val in zip(bars, avg_times):
            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(avg_times)*0.01,
                    f'{time_val:.1f}s', ha='center', va='bottom', fontsize=10)

        # 7. Clean Dataset Performance
        plt.subplot(3, 4, 7)
        clean_accs = [all_results[0.0][model]['test_acc'] for model in model_names]
        clean_f1s = [all_results[0.0][model]['test_f1'] for model in model_names]

        x_pos = np.arange(len(model_names))
        width = 0.35

        bars1 = plt.bar(x_pos - width/2, clean_accs, width, label='Accuracy (%)', alpha=0.7)
        bars2 = plt.bar(x_pos + width/2, [f1*100 for f1 in clean_f1s], width, label='F1-Score (x100)', alpha=0.7)

        plt.xlabel('Model', fontsize=12)
        plt.ylabel('Performance', fontsize=12)
        plt.title('Clean Dataset Performance', fontsize=14, fontweight='bold')
        plt.xticks(x_pos, model_names, rotation=45)
        plt.legend()

        # 8. Robustness Scores
        plt.subplot(3, 4, 8)
        robustness_scores = []
        for model_name in model_names:
            clean_acc = all_results[0.0][model_name]['test_acc']
            noisy_acc = all_results[0.3][model_name]['test_acc']
            robustness = (noisy_acc / clean_acc) * 100
            robustness_scores.append(robustness)

        bars = plt.bar(model_names, robustness_scores, alpha=0.7, color=colors)
        plt.ylabel('Robustness Score (%)', fontsize=12)
        plt.title('Robustness to High Noise (30%)', fontsize=14, fontweight='bold')
        plt.xticks(rotation=45)

        for bar, score in zip(bars, robustness_scores):
            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
                    f'{score:.1f}%', ha='center', va='bottom', fontsize=10)

        # 9. Accuracy Heatmap
        plt.subplot(3, 4, 9)
        accuracy_matrix = []
        for model_name in model_names:
            model_accuracies = [all_results[noise][model_name]['test_acc'] for noise in noise_levels]
            accuracy_matrix.append(model_accuracies)

        sns.heatmap(accuracy_matrix, xticklabels=[f'{n:.1f}' for n in noise_levels],
                   yticklabels=model_names, annot=True, fmt='.1f', cmap='RdYlBu_r', cbar_kws={'label': 'Accuracy (%)'})
        plt.title('Accuracy Heatmap', fontsize=14, fontweight='bold')
        plt.xlabel('Noise Level', fontsize=12)

        # 10. F1-Score Heatmap
        plt.subplot(3, 4, 10)
        f1_matrix = []
        for model_name in model_names:
            model_f1s = [all_results[noise][model_name]['test_f1'] for noise in noise_levels]
            f1_matrix.append(model_f1s)

        sns.heatmap(f1_matrix, xticklabels=[f'{n:.1f}' for n in noise_levels],
                   yticklabels=model_names, annot=True, fmt='.3f', cmap='RdYlBu_r', cbar_kws={'label': 'F1-Score'})
        plt.title('F1-Score Heatmap', fontsize=14, fontweight='bold')
        plt.xlabel('Noise Level', fontsize=12)

        # 11. Epochs to Convergence
        plt.subplot(3, 4, 11)
        for i, model_name in enumerate(model_names):
            epochs = [all_results[noise][model_name]['training_info']['total_epochs']
                     for noise in noise_levels]
            plt.plot(noise_levels, epochs, marker='o', label=model_name,
                    linewidth=2.5, markersize=8, color=colors[i])

        plt.xlabel('Noise Level', fontsize=12)
        plt.ylabel('Epochs to Convergence', fontsize=12)
        plt.title('Training Convergence', fontsize=14, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        plt.tight_layout(pad=3.0)

        # Save the plot
        if save_prefix:
            save_path = f"{save_prefix}_comprehensive_analysis.png"
            plt.savefig(save_path, dpi=300, bbox_inches='tight')
            if verbose:
                print(f"Comprehensive plot saved to: {save_path}")

        plt.show()

        # Create additional training curves plot
        create_training_curves_plot(all_results, f"{save_prefix}_training_curves.png", verbose)

    except ImportError:
        if verbose:
            print("Matplotlib/Seaborn not available. Skipping comprehensive plots.")
    except Exception as e:
        if verbose:
            print(f"Error creating comprehensive plots: {e}")

def create_training_curves_plot(all_results, save_path=None, verbose=True):
    """Create detailed training curves for each model and noise level"""
    try:
        noise_levels = sorted(all_results.keys())
        model_names = list(all_results[0.0].keys())
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

        fig, axes = plt.subplots(len(model_names), len(noise_levels),
                                figsize=(20, 16), sharey=True)

        if len(model_names) == 1:
            axes = axes.reshape(1, -1)

        for i, model_name in enumerate(model_names):
            for j, noise_level in enumerate(noise_levels):
                ax = axes[i, j]

                training_logs = all_results[noise_level][model_name]['training_info']['logs']

                epochs = [log['epoch'] for log in training_logs]
                train_accs = [log['train_acc'] for log in training_logs]
                val_accs = [log['val_acc'] for log in training_logs]
                losses = [log['loss'] for log in training_logs]
                train_f1s = [log['train_f1'] if log['train_f1'] else 0 for log in training_logs]
                val_f1s = [log['val_f1'] if log['val_f1'] else 0 for log in training_logs]

                # Plot accuracy and F1 curves
                ax2 = ax.twinx()
                line1 = ax.plot(epochs, train_accs, 'b-', label='Train Acc', linewidth=2, alpha=0.7)
                line2 = ax.plot(epochs, val_accs, 'g-', label='Val Acc', linewidth=2)
                line3 = ax.plot(epochs, [f1*100 for f1 in train_f1s], 'b--', label='Train F1 (x100)', linewidth=1.5, alpha=0.7)
                line4 = ax.plot(epochs, [f1*100 for f1 in val_f1s], 'g--', label='Val F1 (x100)', linewidth=1.5)
                line5 = ax2.plot(epochs, losses, 'r:', label='Loss', linewidth=2, alpha=0.8)

                ax.set_xlabel('Epoch', fontsize=10)
                ax.set_ylabel('Accuracy (%) / F1 (x100)', color='black', fontsize=10)
                ax2.set_ylabel('Loss', color='red', fontsize=10)
                ax.set_title(f'{model_name} - Noise {noise_level:.1f}', fontsize=11)

                # Mark best epoch
                best_epoch_info = all_results[noise_level][model_name]['training_info']['best_epoch']
                if best_epoch_info:
                    best_epoch = best_epoch_info['epoch']
                    best_val_acc = best_epoch_info['val_acc']
                    ax.axvline(x=best_epoch, color='orange', linestyle=':', alpha=0.7, linewidth=2)
                    ax.plot(best_epoch, best_val_acc, 'o', color='orange', markersize=8)

                ax.grid(True, alpha=0.3)

                # Combine legends
                lines = line1 + line2 + line3 + line4 + line5
                labels = [l.get_label() for l in lines]
                if i == 0 and j == 0:  # Only show legend on first subplot
                    ax.legend(lines, labels, loc='lower right', fontsize=8)

        plt.tight_layout()

        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight')
            if verbose:
                print(f"Training curves plot saved to: {save_path}")

        plt.show()

    except Exception as e:
        if verbose:
            print(f"Error creating training curves plot: {e}")

def print_classification_reports(all_results, noise_level=0.0, verbose=True):
    """Print detailed classification reports for all models"""
    if not verbose:
        return

    print(f"\nDETAILED CLASSIFICATION REPORTS (Noise Level: {noise_level})")
    print("="*80)

    results = all_results[noise_level]
    for model_name, metrics in results.items():
        print(f"\n{model_name} Classification Report:")
        print("-" * 50)
        print(metrics['report'])

def save_comprehensive_results(all_results, models=None, save_dir="./har_four_models_results", verbose=True):
    """Save comprehensive results including all metrics and models"""
    try:
        os.makedirs(save_dir, exist_ok=True)

        # Save all results
        with open(os.path.join(save_dir, "comprehensive_results.pkl"), "wb") as f:
            pickle.dump(all_results, f)

        # Save summary as JSON
        summary_data = {}
        for noise_level, noise_results in all_results.items():
            summary_data[f"noise_{noise_level}"] = {}
            for model_name, metrics in noise_results.items():
                summary_data[f"noise_{noise_level}"][model_name] = {
                    'test_accuracy': metrics['test_acc'],
                    'test_f1': metrics['test_f1'],
                    'test_precision': metrics['test_precision'],
                    'test_recall': metrics['test_recall'],
                    'validation_accuracy': metrics['val_acc'],
                    'confidence': metrics['confidence'],
                    'training_time': metrics['training_info']['training_time'],
                    'total_epochs': metrics['training_info']['total_epochs'],
                    'best_epoch': metrics['training_info']['best_epoch']['epoch'] if metrics['training_info']['best_epoch'] else None
                }

        with open(os.path.join(save_dir, "results_summary.json"), "w") as f:
            json.dump(summary_data, f, indent=2)

        # Save models from clean dataset
        if models and 0.0 in all_results:
            for model_name, model in models.items():
                model_path = os.path.join(save_dir, f"{model_name.lower().replace('-', '_')}_model.pth")
                torch.save(model.state_dict(), model_path)
                if verbose:
                    print(f"Saved {model_name} model to: {model_path}")

        # Save detailed training logs as CSV
        for noise_level, noise_results in all_results.items():
            for model_name, metrics in noise_results.items():
                training_logs = metrics['training_info']['logs']
                df_logs = pd.DataFrame(training_logs)
                log_path = os.path.join(save_dir, f"training_logs_{model_name.lower().replace('-', '_')}_noise_{noise_level}.csv")
                df_logs.to_csv(log_path, index=False)

        if verbose:
            print(f"All comprehensive results saved to: {save_dir}")

    except Exception as e:
        if verbose:
            print(f"Error saving results: {e}")

def create_synthetic_har_data():
    """Create synthetic HAR data for testing when real dataset is not available"""
    print("Creating synthetic HAR data for testing...")

    # Generate synthetic features (561 features like real HAR)
    np.random.seed(42)
    n_train, n_test = 7352, 2947
    n_features = 561
    n_classes = 6

    # Create synthetic training data
    X_train = np.random.randn(n_train, n_features)
    y_train = np.random.randint(0, n_classes, n_train)

    # Create synthetic test data
    X_test = np.random.randn(n_test, n_features)
    y_test = np.random.randint(0, n_classes, n_test)

    # Add some structure to make it more realistic
    for class_idx in range(n_classes):
        train_mask = y_train == class_idx
        test_mask = y_test == class_idx

        # Add class-specific bias to certain features
        feature_bias = np.random.randn(n_features) * 0.5
        X_train[train_mask] += feature_bias
        X_test[test_mask] += feature_bias

    print(f"Generated synthetic data:")
    print(f"X_train shape: {X_train.shape}")
    print(f"X_test shape: {X_test.shape}")
    print(f"Classes: {np.unique(y_train)}")

    return X_train, y_train, X_test, y_test


# 8. MAIN EXECUTION FUNCTIONS
def main_integrated_experiment():
    """Main function for comprehensive four-model experiment"""

    print("HAR Integrated Baseline Models + TACL-Net Experiment")
    print("=" * 70)

    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"\nUsing device: {device}")

    # Initialize data loader
    data_loader = UCIHARDataLoader(data_dir="/content", download=True)

    # Load data
    try:
        X_train, y_train, X_test, y_test, subject_train, subject_test = data_loader.load_data()
        print("Successfully loaded UCI HAR dataset")
    except:
        print("Failed to load UCI HAR dataset. Using synthetic data for demonstration.")
        X_train, y_train, X_test, y_test = create_synthetic_har_data()

    print(f"\nHAR Dataset Summary:")
    print(f"Training samples: {X_train.shape[0]}")
    print(f"Test samples: {X_test.shape[0]}")
    print(f"Features: {X_train.shape[1]}")
    print(f"Classes: {len(np.unique(y_train))}")

    class_labels = data_loader.activities

    print("\nExperiment Options:")
    print("1. Clean data only (all 4 models)")
    print("2. Noise robustness analysis (all noise levels)")
    print("3. Both clean and noise analysis")

    try:
        choice = input("Choose experiment type (1, 2, or 3, default=3): ").strip()
    except:
        choice = '3'  # Default for non-interactive environments

    if choice == '1':
        # Clean data only
        print("\n" + "="*80)
        print("CLEAN HAR DATA EXPERIMENT (ALL 4 MODELS)")
        print("="*80)

        results, models = run_integrated_experiments(
            X_train, y_train, X_test, y_test, device, class_labels,
            noise_level=0.0, verbose=True
        )

        print_classification_reports({0.0: results}, 0.0)

        return {0.0: results}, models

    elif choice == '2':
        # Noise robustness only
        all_results = run_noise_robustness_integrated(
            X_train, y_train, X_test, y_test, device, class_labels
        )

        print_comprehensive_comparison(all_results)
        generate_model_recommendations(all_results)
        create_comprehensive_plots(all_results)

        return all_results, None

    else:
        # Both clean and noise analysis
        print("\n" + "="*80)
        print("COMPREHENSIVE FOUR-MODEL HAR ANALYSIS")
        print("="*80)

        # Run noise robustness analysis
        all_results = run_noise_robustness_integrated(
            X_train, y_train, X_test, y_test, device, class_labels
        )

        # Print classification reports for clean data
        print_classification_reports(all_results, 0.0)

        # Comprehensive analysis
        print_comprehensive_comparison(all_results)
        generate_model_recommendations(all_results)

        # Create visualizations
        try:
            plot_choice = input("\nGenerate comprehensive plots? (y/n, default=y): ").lower()
        except:
            plot_choice = 'y'

        if plot_choice != 'n':
            create_comprehensive_plots(all_results)

        # Save results
        try:
            save_choice = input("Save comprehensive results and models? (y/n, default=y): ").lower()
        except:
            save_choice = 'y'

        if save_choice != 'n':
            clean_models = None
            if 0.0 in all_results:
                # Re-run clean experiment to get models
                _, clean_models = run_integrated_experiments(
                    X_train, y_train, X_test, y_test, device, class_labels,
                    noise_level=0.0, verbose=False
                )
            save_comprehensive_results(all_results, clean_models)

        return all_results, clean_models

def quick_demo():
    """Quick demonstration of the integrated framework"""
    print("HAR Integrated Models (CNN + LSTM + Transformer + TACL-Net) - Quick Demo")
    print("="*75)

    # Create synthetic demo data
    X_train, y_train, X_test, y_test = create_synthetic_har_data()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    class_labels = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING']

    print("\nRunning quick integrated experiment...")

    # Run a single experiment with clean data
    results, models = run_integrated_experiments(
        X_train, y_train, X_test, y_test, device, class_labels,
        noise_level=0.0, verbose=True
    )

    print("\nQuick Demo Results Summary:")
    print("-" * 50)
    for model_name, metrics in results.items():
        print(f"{model_name:12}: Acc={metrics['test_acc']:6.2f}%, "
              f"F1={metrics['test_f1']:.4f}, Time={metrics['training_info']['training_time']:5.1f}s")

    print("\nThis integrated framework provides:")
    print("• All 4 models: 1D-CNN, Bi-LSTM, Transformer, TACL-Net")
    print("• Comprehensive metrics: Accuracy, F1, Precision, Recall, Confidence")
    print("• Enhanced logging with epoch-by-epoch tracking")
    print("• Noise robustness analysis across multiple noise levels")
    print("• Detailed visualizations and comparison plots")
    print("• Complete result saving and model persistence")
    print("• Realistic label noise patterns for HAR activities")

if __name__ == "__main__":
    print("HAR Integrated Baseline Models + TACL-Net")
    print("="*50)
    print("1. Run full integrated experiment")
    print("2. Show quick demo")
    print("3. Run with synthetic data only")

    try:
        choice = input("Choose option (1, 2, or 3, default=1): ").strip()

        if choice == '2':
            quick_demo()
        elif choice == '3':
            # Run with synthetic data
            print("\n" + "="*60)
            print("RUNNING WITH SYNTHETIC HAR DATA")
            print("="*60)

            X_train, y_train, X_test, y_test = create_synthetic_har_data()
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            class_labels = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS',
                          'SITTING', 'STANDING', 'LAYING']

            # Run noise robustness analysis
            all_results = run_noise_robustness_integrated(
                X_train, y_train, X_test, y_test, device, class_labels
            )

            print_comprehensive_comparison(all_results)
            generate_model_recommendations(all_results)
            create_comprehensive_plots(all_results)

        else:
            results, models = main_integrated_experiment()

    except KeyboardInterrupt:
        print("\nExperiment interrupted by user.")
    except Exception as e:
        print(f"Error during execution: {e}")
        # Fallback to synthetic data demo
        print("\nFalling back to synthetic data demo...")
        quick_demo()