## __Check first before starting__

In [1]:
import os

# Change the working directory to the project root
# Working_directory = os.path.normpath("C:/Users/james/OneDrive/文件/Continual_Learning")
Working_directory = os.path.normpath("/mnt/mydisk/Continual_Learning_JL/Continual_Learning/")
os.chdir(Working_directory)
print(f"Working directory: {os.getcwd()}")

Working directory: /mnt/mydisk/Continual_Learning_JL/Continual_Learning


## __All imports__

In [2]:
# Operating system and file management
import os
import shutil
import contextlib
import traceback
import gc
import glob, copy
from collections import defaultdict
import subprocess
import time
import re

# Jupyter notebook widgets and display
import ipywidgets as widgets
from IPython.display import display

# Data manipulation and analysis
import pandas as pd
import numpy as np

# Plotting and visualization
import matplotlib.pyplot as plt
from mpl_interactions import zoom_factory, panhandler

# Machine learning and preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pickle
from ta import trend, momentum, volatility, volume

# Mathematical and scientific computing
import math
from scipy.ndimage import gaussian_filter1d

# Type hinting
from typing import Callable, Tuple

# Deep learning with PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

## __Dataset Setup & Preprocessing__

### 📥 Step 1 — Set dataset path and load features & labels
We will load the 561-dimensional features and original activity labels for both training and testing.

In [3]:
# Set working directory manually before running this cell if needed
BASE_DIR = "Class_Incremental_CL/HAR_CIL/har_dataset"
TRAIN_DIR = os.path.join(BASE_DIR, "train")
TEST_DIR = os.path.join(BASE_DIR, "test")

# 活動對應（原始標籤）
activity_label_map = {
    1: "WALKING",
    2: "WALKING_UPSTAIRS",
    3: "WALKING_DOWNSTAIRS",
    4: "SITTING",
    5: "STANDING",
    6: "LAYING"
}

# Load feature and label data
X_train = np.loadtxt(os.path.join(TRAIN_DIR, "X_train.txt"))
y_train = np.loadtxt(os.path.join(TRAIN_DIR, "y_train.txt")).astype(int)
X_test = np.loadtxt(os.path.join(TEST_DIR, "X_test.txt"))
y_test = np.loadtxt(os.path.join(TEST_DIR, "y_test.txt")).astype(int)

print("Train shape:", X_train.shape, y_train.shape)
print("Test shape:", X_test.shape, y_test.shape)

Train shape: (7352, 561) (7352,)
Test shape: (2947, 561) (2947,)


### 🧩 Step 2 — Define period label groups and final class remapping
We will predefine the class mappings for each period to ensure consistency across the experiment.

In [4]:
# Period-to-original-label mapping
period_label_map = {
    1: [4, 5],           # SITTING, STANDING
    2: [4, 5, 1, 2, 3],  # + WALKING variants (merged)
    3: [4, 5, 1, 2, 3, 6],  # + LAYING
    4: [4, 5, 1, 2, 3, 6]   # same classes, but with walking variants separated
}

# Consistent label remapping across all periods (final label index)
final_class_map = {
    "SITTING": 0,
    "STANDING": 1,
    "WALKING": 2,
    "LAYING": 3,
    "WALKING_UPSTAIRS": 4,
    "WALKING_DOWNSTAIRS": 5
}

# Reverse mapping for readability
label_name_from_id = {v: k for k, v in final_class_map.items()}
activity_label_map = {
    1: "WALKING",
    2: "WALKING_UPSTAIRS",
    3: "WALKING_DOWNSTAIRS",
    4: "SITTING",
    5: "STANDING",
    6: "LAYING"
}


### 🧪 Step 3 — Filter and remap datasets for each period
We will build training and testing splits for each period using consistent class indices.

In [5]:
def map_label(label, period):
    name = activity_label_map[label]
    if period < 4:
        if name.startswith("WALKING"):
            return final_class_map["WALKING"]
        else:
            return final_class_map[name]
    else:
        return final_class_map[name]

def get_period_dataset(X, y, period):
    allowed_labels = period_label_map[period]
    mask = np.isin(y, allowed_labels)
    Xp = X[mask]
    yp = np.array([map_label(label, period) for label in y[mask]])
    return Xp, yp


### 📊 Step 4 — Print artistic class distribution for each period
We show each period's label map and class statistics to confirm correctness.

In [6]:
def print_class_distribution(y, var_name: str, label_map: dict) -> None:
    y = np.array(y).flatten()
    unique, counts = np.unique(y, return_counts=True)
    total = counts.sum()
    print(f"\n📦 Class Distribution for {var_name}")
    for i, c in zip(unique, counts):
        print(f"  ├─ Label {i:<2} ({label_map[i]:<20}) → {c:>5} samples ({(c/total)*100:>5.2f}%)")


In [7]:
period_datasets = {}

for period in range(1, 5):
    Xp_train, yp_train = get_period_dataset(X_train, y_train, period)
    Xp_test, yp_test = get_period_dataset(X_test, y_test, period)

    period_datasets[period] = {
        "train": (Xp_train, yp_train),
        "test": (Xp_test, yp_test)
    }

    print_class_distribution(yp_train, f"Period {period} (Train)", label_name_from_id)
    print_class_distribution(yp_test, f"Period {period} (Test)", label_name_from_id)



📦 Class Distribution for Period 1 (Train)
  ├─ Label 0  (SITTING             ) →  1286 samples (48.35%)
  ├─ Label 1  (STANDING            ) →  1374 samples (51.65%)

📦 Class Distribution for Period 1 (Test)
  ├─ Label 0  (SITTING             ) →   491 samples (48.00%)
  ├─ Label 1  (STANDING            ) →   532 samples (52.00%)

📦 Class Distribution for Period 2 (Train)
  ├─ Label 0  (SITTING             ) →  1286 samples (21.63%)
  ├─ Label 1  (STANDING            ) →  1374 samples (23.11%)
  ├─ Label 2  (WALKING             ) →  3285 samples (55.26%)

📦 Class Distribution for Period 2 (Test)
  ├─ Label 0  (SITTING             ) →   491 samples (20.37%)
  ├─ Label 1  (STANDING            ) →   532 samples (22.07%)
  ├─ Label 2  (WALKING             ) →  1387 samples (57.55%)

📦 Class Distribution for Period 3 (Train)
  ├─ Label 0  (SITTING             ) →  1286 samples (17.49%)
  ├─ Label 1  (STANDING            ) →  1374 samples (18.69%)
  ├─ Label 2  (WALKING             ) →  328

## __Check GPU, CUDA, Pytorch__

### GPU Details

In [8]:
!nvidia-smi

Wed Apr 23 13:58:21 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.86.15              Driver Version: 570.86.15      CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   00000000:2A:00.0 Off |                  Off |
| 54%   78C    P2            295W /  300W |   23769MiB /  49140MiB |    100%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA RTX A6000               Off |   00

### CUDA Details

In [9]:
def check_gpu_config():
    """
    Check GPU availability and display detailed configuration information.
    """
    # Check if GPU is available
    gpu_available = torch.cuda.is_available()
    
    # Print header
    print("=" * 50)
    print("GPU Configuration Check".center(50))
    print("=" * 50)
    
    # Basic GPU availability
    print(f"{'PyTorch Version':<25}: {torch.__version__}")
    print(f"{'GPU Available':<25}: {'Yes' if gpu_available else 'No'}")
    
    # If GPU is available, print detailed info
    if gpu_available:
        print("-" * 50)
        print("GPU Details".center(50))
        print("-" * 50)
        
        # Device info
        print(f"{'Device Name':<25}: {torch.cuda.get_device_name(0)}")
        print(f"{'Number of GPUs':<25}: {torch.cuda.device_count()}")
        print(f"{'Current Device Index':<25}: {torch.cuda.current_device()}")
        
        # Compute capability and CUDA cores
        props = torch.cuda.get_device_properties(0)
        print(f"{'Compute Capability':<25}: {props.major}.{props.minor}")
        print(f"{'Total CUDA Cores':<25}: {props.multi_processor_count * 128}")  # Approx. 128 cores per SM
        
        # Memory info
        total_memory = props.total_memory / (1024 ** 3)  # Convert to GB
        memory_allocated = torch.cuda.memory_allocated(0) / (1024 ** 3)
        memory_reserved = torch.cuda.memory_reserved(0) / (1024 ** 3)
        print(f"{'Total Memory (GB)':<25}: {total_memory:.2f}")
        print(f"{'Allocated Memory (GB)':<25}: {memory_allocated:.2f}")
        print(f"{'Reserved Memory (GB)':<25}: {memory_reserved:.2f}")
    else:
        print("-" * 50)
        print("No GPU detected. Running on CPU.".center(50))
        print("-" * 50)
    
    print("=" * 50)

if __name__ == "__main__":
    check_gpu_config()

             GPU Configuration Check              
PyTorch Version          : 2.5.1
GPU Available            : Yes
--------------------------------------------------
                   GPU Details                    
--------------------------------------------------
Device Name              : NVIDIA RTX A6000
Number of GPUs           : 3
Current Device Index     : 0
Compute Capability       : 8.6
Total CUDA Cores         : 10752
Total Memory (GB)        : 47.41
Allocated Memory (GB)    : 0.00
Reserved Memory (GB)     : 0.00


### PyTorch Details

In [10]:
def print_torch_config():
    """Print PyTorch and CUDA configuration in a formatted manner."""
    print("=" * 50)
    print("PyTorch Configuration".center(50))
    print("=" * 50)
    
    # Basic PyTorch and CUDA info
    print(f"{'PyTorch Version':<25}: {torch.__version__}")
    print(f"{'CUDA Compiled Version':<25}: {torch.version.cuda}")
    print(f"{'CUDA Available':<25}: {'Yes' if torch.cuda.is_available() else 'No'}")
    print(f"{'Number of GPUs':<25}: {torch.cuda.device_count()}")

    # GPU details if available
    if torch.cuda.is_available():
        print(f"{'GPU Name':<25}: {torch.cuda.get_device_name(0)}")

    print("-" * 50)
    
    # Seed setting
    torch.manual_seed(42)
    print(f"{'Random Seed':<25}: 42 (Seeding successful!)")
    
    print("=" * 50)

if __name__ == "__main__":
    print_torch_config()

              PyTorch Configuration               
PyTorch Version          : 2.5.1
CUDA Compiled Version    : 12.1
CUDA Available           : Yes
Number of GPUs           : 3
GPU Name                 : NVIDIA RTX A6000
--------------------------------------------------
Random Seed              : 42 (Seeding successful!)


## __⚙️ GPU Selection — Auto-select the least loaded GPU__
This code automatically scans available GPUs and selects the one with the lowest current memory usage.


In [11]:
def auto_select_cuda_device(verbose=True):
    """
    Automatically selects the CUDA GPU with the least memory usage.
    Falls back to CPU if no GPU is available.
    """
    if not torch.cuda.is_available():
        print("🚫 No CUDA GPU available. Using CPU.")
        return torch.device("cpu")

    try:
        # Run nvidia-smi to get memory usage of each GPU
        smi_output = subprocess.check_output(
            ['nvidia-smi', '--query-gpu=memory.used', '--format=csv,nounits,noheader'],
            encoding='utf-8'
        )
        memory_used = [int(x) for x in smi_output.strip().split('\n')]
        best_gpu = int(np.argmin(memory_used))

        if verbose:
            print("🎯 Automatically selected GPU:")
            print(f"    - CUDA Device ID : {best_gpu}")
            print(f"    - Memory Used    : {memory_used[best_gpu]} MiB")
            print(f"    - Device Name    : {torch.cuda.get_device_name(best_gpu)}")
        return torch.device(f"cuda:{best_gpu}")
    except Exception as e:
        print(f"⚠️ Failed to auto-detect GPU. Falling back to cuda:0. ({e})")
        return torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Execute and assign
device = auto_select_cuda_device()

🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 18 MiB
    - Device Name    : NVIDIA RTX A6000


## __MLP Model__

### HAR_MLP_v1

In [12]:
class HAR_MLP_v1(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, output_size: int, dropout: float = 0.2):
        super(HAR_MLP_v1, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

### HAR_MLP_v2

In [13]:
class HAR_MLP_v2(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, output_size: int, dropout: float = 0.2):
        super(HAR_MLP_v2, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.fc3 = nn.Linear(hidden_size, output_size)
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dropout2(x)

        x = self.fc3(x)
        return x


## __Training Function with MSE__

In [14]:
def compute_classwise_accuracy(preds, targets, class_correct, class_total):
    """
    Computes per-class accuracy statistics from raw logits and ground truth.
    """
    preds = torch.argmax(preds, dim=-1)
    correct_mask = (preds == targets)
    for label in torch.unique(targets):
        label = label.item()
        label_mask = (targets == label)
        class_total[label] = class_total.get(label, 0) + label_mask.sum().item()
        class_correct[label] = class_correct.get(label, 0) + (correct_mask & label_mask).sum().item()

In [15]:
def train_with_mse_distillation(student_model, output_size, criterion, optimizer,
                                X_train, y_train, X_val, y_val,
                                stable_classes=None, teacher_model=None, alpha=0.5,
                                scheduler=None, num_epochs=10, batch_size=64,
                                model_saving_folder=None, model_name=None,
                                stop_signal_file=None, device=auto_select_cuda_device()):
    print("\n🚀 'train_with_mse_distillation' started.")
    model_name = model_name or 'mse_student'
    model_saving_folder = model_saving_folder or './saved_models'

    if model_saving_folder:
        if os.path.exists(model_saving_folder):
            shutil.rmtree(model_saving_folder)
            print(f"✅ Removed existing folder: {model_saving_folder}")
        os.makedirs(model_saving_folder, exist_ok=True)

    device = device
    student_model.to(device)
    if teacher_model:
        teacher_model.to(device)
        teacher_model.eval()

    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
    y_val = torch.tensor(y_val, dtype=torch.long).to(device)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size, shuffle=False)

    print("\n✅ Data Overview:")
    print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"X_val: {X_val.shape}, y_val: {y_val.shape}")

    start_time = time.time()
    best_results = []

    for epoch in range(num_epochs):
        if stop_signal_file and os.path.exists(stop_signal_file):
            print("\n🛑 Stop signal detected. Exiting training loop.")
            break

        student_model.train()
        epoch_loss = 0.0
        class_correct, class_total = {}, {}

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            student_logits = student_model(X_batch)
            student_logits_flat = student_logits.view(-1, output_size)
            y_batch = y_batch.view(-1)
            ce_loss = criterion(student_logits_flat, y_batch)
            compute_classwise_accuracy(student_logits_flat, y_batch, class_correct, class_total)

            if teacher_model and stable_classes:
                with torch.no_grad():
                    teacher_logits = teacher_model(X_batch)
                stable_indices = torch.tensor(stable_classes, device=student_logits.device)
                teacher_stable = teacher_logits.index_select(dim=1, index=stable_indices)
                student_stable = student_logits.index_select(dim=1, index=stable_indices)
                distill_loss = F.mse_loss(student_stable, teacher_stable)
                total_loss = alpha * distill_loss + (1 - alpha) * ce_loss
            else:
                total_loss = ce_loss

            total_loss.backward()
            optimizer.step()
            epoch_loss += total_loss.item() * X_batch.size(0)

        train_loss = epoch_loss / len(train_loader.dataset)
        train_acc = {int(c): f"{(class_correct[c] / class_total[c]) * 100:.2f}%" if class_total[c] > 0 else "0.00%" for c in sorted(class_total.keys())}

        student_model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        val_class_correct, val_class_total = {}, {}

        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = student_model(X_batch).view(-1, output_size)
                y_batch = y_batch.view(-1)
                val_loss += criterion(outputs, y_batch).item() * X_batch.size(0)
                predictions = torch.argmax(outputs, dim=-1)
                val_correct += (predictions == y_batch).sum().item()
                val_total += y_batch.size(0)
                compute_classwise_accuracy(outputs, y_batch, val_class_correct, val_class_total)

        val_loss /= len(val_loader.dataset)
        val_acc = val_correct / val_total
        val_acc_cls = {int(c): f"{(val_class_correct[c] / val_class_total[c]) * 100:.2f}%" if val_class_total[c] > 0 else "0.00%" for c in sorted(val_class_total.keys())}

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.6f}, Train-Class-Acc: {train_acc}")
        print(f"Val Loss: {val_loss:.6f}, Val Acc: {val_acc * 100:.2f}%, Val-Class-Acc: {val_acc_cls}, LR: {optimizer.param_groups[0]['lr']:.6f}")

        model_path = os.path.join(model_saving_folder, f"{model_name}_epoch_{epoch+1}.pth")
        current = {
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'val_accuracy': val_acc,
            'train_classwise_accuracy': train_acc,
            'val_classwise_accuracy': val_acc_cls,
            'model_state_dict': student_model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'learning_rate': optimizer.param_groups[0]['lr'],
            'model_path': model_path
        }

        if len(best_results) < 5 or val_acc > best_results[-1]['val_accuracy']:
            if len(best_results) == 5:
                to_remove = best_results.pop()
                if os.path.exists(to_remove['model_path']):
                    os.remove(to_remove['model_path'])
                    print(f"🗑 Removed: {to_remove['model_path']}")
            best_results.append(current)
            best_results.sort(key=lambda x: (x['val_accuracy'], x['epoch']), reverse=True)
            torch.save(current, model_path)
            print(f"✅ Saved model: {model_path}")

        if scheduler:
            scheduler.step(val_loss)

    elapsed_time = time.time() - start_time
    print(f"\n⏳ Total training time: {elapsed_time:.2f} seconds")

    if best_results:
        best = best_results[0]
        best_model_path = os.path.join(model_saving_folder, f"{model_name}_best.pth")
        torch.save(best, best_model_path)
        print(f"\n🏆 Best model saved as: {best_model_path} (Val Accuracy: {best['val_accuracy'] * 100:.2f}%)")

    final_model_path = os.path.join(model_saving_folder, f"{model_name}_final.pth")
    torch.save(current, final_model_path)
    print(f"\n📌 Final model saved as: {final_model_path}")

    print("\n🎯 Top 5 Best Models:")
    for res in best_results:
        print(f"Epoch {res['epoch']}, Train Loss: {res['train_loss']:.6f}, Train-Acc: {res['train_classwise_accuracy']},\n"
              f"Val Loss: {res['val_loss']:.6f}, Val Acc: {res['val_accuracy']*100:.2f}%, Val-Acc: {res['val_classwise_accuracy']},"
              f" Model Path: {res['model_path']}")

    match = re.search(r'Period_(\d+)', model_saving_folder)
    period_label = match.group(1) if match else "?"
    model_name_str = student_model.__class__.__name__
    best_model = max(best_results, key=lambda x: x['val_accuracy'])

    best_md_summary = f"""
    ---
### Period {period_label} (alpha = {alpha})
+ ##### Total training time: {elapsed_time:.2f} seconds
+ ##### Model: {model_name_str}
+ ##### Training and saving in *'{model_saving_folder}'*
+ ##### Best Epoch: {best_model['epoch']}
#### __Val Accuracy: {best_model['val_accuracy'] * 100:.2f}%__
#### __Val-Class-Acc: {best_model['val_classwise_accuracy']}__
    """
    print(best_md_summary.strip())

    del X_train, y_train, X_val, y_val, train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()


🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 18 MiB
    - Device Name    : NVIDIA RTX A6000


## __📋 Label Mapping (HAR Activities)__

| Label ID | Activity Name       |
|----------|---------------------|
|    0     | SITTING             |
|    1     | STANDING            |
|    2     | WALKING             |
|    3     | LAYING              |
|    4     | WALKING_UPSTAIRS    |
|    5     | WALKING_DOWNSTAIRS  |

## __Common Parameters__

In [17]:
batch_size    = 32
stop_signal_file = os.path.normpath(os.path.join('Class_Incremental_CL', 'HAR_CIL/stop_training.txt'))

## __HAR_MLP_v1 Training (alpha = 0.1)__

---
### Period 1 (alpha = 0.1)
+ ##### Total training time: 206.29 seconds
+ ##### Model: HAR_MLP_v1
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_1'*
+ ##### Best Epoch: 747
#### __Val Accuracy: 94.62%__
#### __Val-Class-Acc: {0: '91.04%', 1: '97.93%'}__

In [None]:
# ================================
# 📌 Period 1 Configuration - HAR_MLP_v1
# ================================
period = 1

# Load dataset for this period
X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

# Auto-select CUDA device
device = auto_select_cuda_device()

# Model and training hyperparameters
input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.1  # Only effective in Period >= 2
teacher_model = None
stable_classes = None  # Not used in Period 1

# Model saving path (change EWC → MSE)
model_name = "HAR_MLP_v1"
model_saving_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v1", f"alpha_{alpha}", f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

# Initialize model, criterion, optimizer
student_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler =None

# Train the model (MSE-aware but no distillation in Period 1)
train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,     # None for Period 1
    teacher_model=teacher_model,       # None for Period 1
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

# ================================
# ✅ Summary & Resource Cleanup
# ================================

unique_classes = np.unique(y_train)
num_classes = len(unique_classes)

print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {num_classes}")

del X_train, y_train, X_val, y_val, teacher_model, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 739 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_1

✅ Data Overview:
X_train: torch.Size([2660, 561]), y_train: torch.Size([2660])
X_val: torch.Size([1023, 561]), y_val: torch.Size([1023])
Epoch 1/1000, Train Loss: 0.626395, Train-Class-Acc: {0: '60.89%', 1: '70.82%'}
Val Loss: 0.557971, Val Acc: 78.98%, Val-Class-Acc: {0: '86.97%', 1: '71.62%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_1/HAR_MLP_v1_epoch_1.pth
Epoch 2/1000, Train Loss: 0.529165, Train-Class-Acc: {0: '72.08%', 1: '80.49%'}
Val Loss: 0.512650, Val Acc: 79.47%, Val-Class-Acc: {0: '96.33%', 1: '63.91%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_1/HAR_MLP_v1_epoch_2.pth
Epoch 3/1000, Train Los

---
### Period 2 (alpha = 0.1)
+ ##### Total training time: 324.47 seconds
+ ##### Model: HAR_MLP_v1
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2'*
+ ##### Best Epoch: 670
#### __Val Accuracy: 97.55%__
#### __Val-Class-Acc: {0: '91.04%', 1: '97.18%', 2: '100.00%'}__

In [None]:
# ================================
# 📌 Period 2 Configuration - HAR_MLP_v1
# ================================
period = 2
prev_period = 1

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.1
stable_classes = [0, 1]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v1"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v1", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 741 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2

✅ Data Overview:
X_train: torch.Size([5945, 561]), y_train: torch.Size([5945])
X_val: torch.Size([2410, 561]), y_val: torch.Size([2410])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 9.416496, Train-Class-Acc: {0: '43.39%', 1: '68.41%', 2: '86.91%'}
Val Loss: 0.423963, Val Acc: 85.89%, Val-Class-Acc: {0: '72.30%', 1: '61.65%', 2: '100.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2/HAR_MLP_v1_epoch_1.pth
Epoch 2/1000, Train Loss: 6.906070, Train-Class-Acc: {0: '73.41%', 1: '58.81%', 2: '96.38%'}
Val Loss: 0.288916, Val Acc: 90.75%, Val-Class-Acc: {0: '63.14%', 1: '92.11%', 2: '100.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2/HAR_MLP_v1_epoch_2.pth
Epoch 3/1000, Train Loss: 5.975560, Train-Class-Acc: {0: '81.42%', 1: '67.39%', 2: '99.00%'}
Val Loss: 0.197785, Val Acc: 92.61%, Val-Class-Acc: {0: '82.89%', 1: '82.33%', 2: '100.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2/HAR_MLP_v1_epoch_3.pth
Epoch 4/1000, Train Loss: 5.289957, Train-Class-Acc: {0: '84.29%

---
### Period 3 (alpha = 0.1)
+ ##### Total training time: 381.06 seconds
+ ##### Model: HAR_MLP_v1
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3'*
+ ##### Best Epoch: 974
#### __Val Accuracy: 97.29%__
#### __Val-Class-Acc: {0: '90.84%', 1: '96.80%', 2: '100.00%', 3: '96.65%'}__

In [None]:
# ================================
# 📌 Period 3 Configuration - HAR_MLP_v1
# ================================
period = 3
prev_period = 2

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.1
stable_classes = [0, 1, 2]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v1"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v1", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 743 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3

✅ Data Overview:
X_train: torch.Size([7352, 561]), y_train: torch.Size([7352])
X_val: torch.Size([2947, 561]), y_val: torch.Size([2947])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 45.375719, Train-Class-Acc: {0: '88.10%', 1: '1.67%', 2: '93.76%', 3: '12.15%'}
Val Loss: 1.335128, Val Acc: 63.18%, Val-Class-Acc: {0: '92.87%', 1: '0.56%', 2: '100.00%', 3: '2.98%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3/HAR_MLP_v1_epoch_1.pth
Epoch 2/1000, Train Loss: 22.693371, Train-Class-Acc: {0: '75.12%', 1: '4.59%', 2: '100.00%', 3: '26.58%'}
Val Loss: 1.600789, Val Acc: 64.61%, Val-Class-Acc: {0: '95.72%', 1: '5.45%', 2: '100.00%', 3: '3.35%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3/HAR_MLP_v1_epoch_2.pth
Epoch 3/1000, Train Loss: 15.253392, Train-Class-Acc: {0: '71.62%', 1: '10.41%', 2: '100.00%', 3: '37.31%'}
Val Loss: 1.062299, Val Acc: 66.34%, Val-Class-Acc: {0: '95.52%', 1: '16.92%', 2: '100.00%', 3: '1.68%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3/HAR_MLP_v1_

---
### Period 4 (alpha = 0.1)
+ ##### Total training time: 382.95 seconds
+ ##### Model: HAR_MLP_v1
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_4'*
+ ##### Best Epoch: 1000
#### __Val Accuracy: 96.27%__
#### __Val-Class-Acc: {0: '90.63%', 1: '96.43%', 2: '98.99%', 3: '96.28%', 4: '97.24%', 5: '98.33%'}__

In [23]:
# ================================
# 📌 Period 4 Configuration - HAR_MLP_v1
# ================================
period = 4
prev_period = 3

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.1
stable_classes = [0, 1, 3]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v1"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v1", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 741 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_4

✅ Data Overview:
X_train: torch.Size([7352, 561]), y_train: torch.Size([7352])
X_val: torch.Size([2947, 561]), y_val: torch.Size([2947])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 131.837802, Train-Class-Acc: {0: '86.55%', 1: '5.97%', 2: '25.29%', 3: '0.00%', 4: '9.13%', 5: '28.09%'}
Val Loss: 3.408179, Val Acc: 17.85%, Val-Class-Acc: {0: '97.35%', 1: '5.26%', 2: '1.21%', 3: '0.00%', 4: '0.00%', 5: '3.33%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_4/HAR_MLP_v1_epoch_1.pth
Epoch 2/1000, Train Loss: 62.091317, Train-Class-Acc: {0: '85.61%', 1: '5.39%', 2: '30.26%', 3: '20.82%', 4: '17.05%', 5: '27.69%'}
Val Loss: 2.394453, Val Acc: 52.63%, Val-Class-Acc: {0: '94.30%', 1: '4.51%', 2: '67.74%', 3: '51.02%', 4: '50.11%', 5: '51.90%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_4/HAR_MLP_v1_epoch_2.pth
Epoch 3/1000, Train Loss: 29.840315, Train-Class-Acc: {0: '68.82%', 1: '5.68%', 2: '31.97%', 3: '63.18%', 4: '32.43%', 5: '34.08%'}
Val Loss: 2.606251, Val Acc: 60.71%, Val-Class-Acc: {0: '78.41%', 1: '1.69%', 2: '73.19%', 3: '72

## __HAR_MLP_v2 Training (alpha = 0.1)__

---
### Period 1 (alpha = 0.1)
+ ##### Total training time: 153.13 seconds
+ ##### Model: HAR_MLP_v2
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.1/Period_1'*
+ ##### Best Epoch: 432
#### __Val Accuracy: 95.50%__
#### __Val-Class-Acc: {0: '95.32%', 1: '95.68%'}__

In [None]:
# ================================
# 📌 Period 1 Configuration - HAR_MLP_v2
# ================================
period = 1

# Load dataset for this period
X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

# Auto-select CUDA device
device = auto_select_cuda_device()

# Model and training hyperparameters
input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.1  # Only effective in Period >= 2
teacher_model = None
stable_classes = None  # Not used in Period 1

# Model saving path (change EWC → MSE)
model_name = "HAR_MLP_v2"
model_saving_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v2", f"alpha_{alpha}", f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

# Initialize model, criterion, optimizer
student_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler =None

# Train the model (MSE-aware but no distillation in Period 1)
train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,     # None for Period 1
    teacher_model=teacher_model,       # None for Period 1
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

# ================================
# ✅ Summary & Resource Cleanup
# ================================

unique_classes = np.unique(y_train)
num_classes = len(unique_classes)

print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {num_classes}")

del X_train, y_train, X_val, y_val, teacher_model, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 743 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_1

✅ Data Overview:
X_train: torch.Size([2660, 561]), y_train: torch.Size([2660])
X_val: torch.Size([1023, 561]), y_val: torch.Size([1023])
Epoch 1/1000, Train Loss: 0.678256, Train-Class-Acc: {0: '53.34%', 1: '75.25%'}
Val Loss: 0.476874, Val Acc: 78.10%, Val-Class-Acc: {0: '77.60%', 1: '78.57%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_1/HAR_MLP_v2_epoch_1.pth
Epoch 2/1000, Train Loss: 0.453474, Train-Class-Acc: {0: '74.81%', 1: '82.24%'}
Val Loss: 0.401195, Val Acc: 82.01%, Val-Class-Acc: {0: '66.80%', 1: '96.05%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_1/HAR_MLP_v2_epoch_2.pth
Epoch 3/1000, Train Los

---
### Period 2 (alpha = 0.1)
+ ##### Total training time: 982.78 seconds
+ ##### Model: HAR_MLP_v2
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.1/Period_2'*
+ ##### Best Epoch: 958
#### __Val Accuracy: 97.84%__
#### __Val-Class-Acc: {0: '93.08%', 1: '96.62%', 2: '100.00%'}__

In [None]:
# ================================
# 📌 Period 2 Configuration - HAR_MLP_v2
# ================================
period = 2
prev_period = 1

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.1
stable_classes = [0, 1]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v2"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v2", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 733 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2

✅ Data Overview:
X_train: torch.Size([5945, 561]), y_train: torch.Size([5945])
X_val: torch.Size([2410, 561]), y_val: torch.Size([2410])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 9.169582, Train-Class-Acc: {0: '44.25%', 1: '77.44%', 2: '56.44%'}
Val Loss: 0.590011, Val Acc: 75.23%, Val-Class-Acc: {0: '22.61%', 1: '99.44%', 2: '84.57%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2/HAR_MLP_v2_epoch_1.pth
Epoch 2/1000, Train Loss: 6.411250, Train-Class-Acc: {0: '55.68%', 1: '93.09%', 2: '79.79%'}
Val Loss: 0.474536, Val Acc: 83.03%, Val-Class-Acc: {0: '44.20%', 1: '98.68%', 2: '90.77%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2/HAR_MLP_v2_epoch_2.pth
Epoch 3/1000, Train Loss: 4.774281, Train-Class-Acc: {0: '69.67%', 1: '94.54%', 2: '85.24%'}
Val Loss: 0.341625, Val Acc: 90.37%, Val-Class-Acc: {0: '59.67%', 1: '98.31%', 2: '98.20%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_2/HAR_MLP_v2_epoch_3.pth
Epoch 4/1000, Train Loss: 3.670697, Train-Class-Acc: {0: '78.30%', 

---
### Period 3 (alpha = 0.1)
+ ##### Total training time: 1726.14 seconds
+ ##### Model: HAR_MLP_v2
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.1/Period_3'*
+ ##### Best Epoch: 841
#### __Val Accuracy: 98.47%__
#### __Val-Class-Acc: {0: '94.30%', 1: '96.80%', 2: '100.00%', 3: '100.00%'}__

In [None]:
# ================================
# 📌 Period 3 Configuration - HAR_MLP_v2
# ================================
period = 3
prev_period = 2

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.1
stable_classes = [0, 1, 2]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v2"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v2", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 737 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3

✅ Data Overview:
X_train: torch.Size([7352, 561]), y_train: torch.Size([7352])
X_val: torch.Size([2947, 561]), y_val: torch.Size([2947])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 15.711265, Train-Class-Acc: {0: '18.51%', 1: '86.75%', 2: '89.35%', 3: '57.78%'}
Val Loss: 0.363201, Val Acc: 87.07%, Val-Class-Acc: {0: '26.27%', 1: '99.25%', 2: '99.93%', 3: '97.39%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3/HAR_MLP_v2_epoch_1.pth
Epoch 2/1000, Train Loss: 11.103612, Train-Class-Acc: {0: '46.35%', 1: '89.74%', 2: '99.54%', 3: '88.84%'}
Val Loss: 0.241851, Val Acc: 92.09%, Val-Class-Acc: {0: '59.27%', 1: '97.18%', 2: '99.93%', 3: '96.83%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3/HAR_MLP_v2_epoch_2.pth
Epoch 3/1000, Train Loss: 8.655552, Train-Class-Acc: {0: '66.72%', 1: '89.88%', 2: '99.60%', 3: '88.70%'}
Val Loss: 0.189612, Val Acc: 94.23%, Val-Class-Acc: {0: '74.54%', 1: '95.11%', 2: '99.93%', 3: '96.65%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_3/HAR_MLP_v2

---
### Period 4 (alpha = 0.1)
+ ##### Total training time: 1100.96 seconds
+ ##### Model: HAR_MLP_v2
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.1/Period_4'*
+ ##### Best Epoch: 798
#### __Val Accuracy: 97.15%__
#### __Val-Class-Acc: {0: '93.08%', 1: '96.62%', 2: '98.99%', 3: '100.00%', 4: '95.33%', 5: '98.81%'}__

In [None]:
# ================================
# 📌 Period 4 Configuration - HAR_MLP_v2
# ================================
period = 4
prev_period = 3

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.1
stable_classes = [0, 1, 3]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v2"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v2", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 737 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_4

✅ Data Overview:
X_train: torch.Size([7352, 561]), y_train: torch.Size([7352])
X_val: torch.Size([2947, 561]), y_val: torch.Size([2947])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 9.338241, Train-Class-Acc: {0: '6.84%', 1: '95.12%', 2: '33.52%', 3: '70.79%', 4: '44.27%', 5: '58.92%'}
Val Loss: 0.796379, Val Acc: 74.24%, Val-Class-Acc: {0: '16.70%', 1: '99.44%', 2: '74.19%', 3: '92.92%', 4: '85.14%', 5: '73.57%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_4/HAR_MLP_v2_epoch_1.pth
Epoch 2/1000, Train Loss: 6.199159, Train-Class-Acc: {0: '44.01%', 1: '95.92%', 2: '68.52%', 3: '97.58%', 4: '75.68%', 5: '61.56%'}
Val Loss: 0.615550, Val Acc: 82.02%, Val-Class-Acc: {0: '44.60%', 1: '98.50%', 2: '89.92%', 3: '95.34%', 4: '91.30%', 5: '68.10%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.1/Period_4/HAR_MLP_v2_epoch_2.pth
Epoch 3/1000, Train Loss: 4.569109, Train-Class-Acc: {0: '64.54%', 1: '92.14%', 2: '79.77%', 3: '99.22%', 4: '82.39%', 5: '65.92%'}
Val Loss: 0.505846, Val Acc: 86.39%, Val-Class-Acc: {0: '63.54%', 1: '96.99%', 2: '91.94%',

## __HAR_MLP_v1 Training (alpha = 0.2)__

---
### Period 1 (alpha = 0.2)
+ ##### Total training time: 1437.20 seconds
+ ##### Model: HAR_MLP_v1
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_1'*
+ ##### Best Epoch: 987
#### __Val Accuracy: 94.43%__
#### __Val-Class-Acc: {0: '91.85%', 1: '96.80%'}__

In [29]:
# ================================
# 📌 Period 1 Configuration - HAR_MLP_v1
# ================================
period = 1

# Load dataset for this period
X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

# Auto-select CUDA device
device = auto_select_cuda_device()

# Model and training hyperparameters
input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.2  # Only effective in Period >= 2
teacher_model = None
stable_classes = None  # Not used in Period 1

# Model saving path (change EWC → MSE)
model_name = "HAR_MLP_v1"
model_saving_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v1", f"alpha_{alpha}", f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

# Initialize model, criterion, optimizer
student_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler =None

# Train the model (MSE-aware but no distillation in Period 1)
train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,     # None for Period 1
    teacher_model=teacher_model,       # None for Period 1
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

# ================================
# ✅ Summary & Resource Cleanup
# ================================

unique_classes = np.unique(y_train)
num_classes = len(unique_classes)

print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {num_classes}")

del X_train, y_train, X_val, y_val, teacher_model, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 0
    - Memory Used    : 31147 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_1

✅ Data Overview:
X_train: torch.Size([2660, 561]), y_train: torch.Size([2660])
X_val: torch.Size([1023, 561]), y_val: torch.Size([1023])
Epoch 1/1000, Train Loss: 0.694657, Train-Class-Acc: {0: '53.73%', 1: '61.64%'}
Val Loss: 0.598394, Val Acc: 65.20%, Val-Class-Acc: {0: '28.92%', 1: '98.68%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_1/HAR_MLP_v1_epoch_1.pth
Epoch 2/1000, Train Loss: 0.566617, Train-Class-Acc: {0: '64.70%', 1: '76.06%'}
Val Loss: 0.498151, Val Acc: 83.77%, Val-Class-Acc: {0: '80.24%', 1: '87.03%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_1/HAR_MLP_v1_epoch_2.pth
Epoch 3/1000, Train L

---
### Period 2 (alpha = 0.2)
+ ##### Total training time: 637.95 seconds
+ ##### Model: HAR_MLP_v1
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_2'*
+ ##### Best Epoch: 727
#### __Val Accuracy: 97.55%__
#### __Val-Class-Acc: {0: '90.84%', 1: '97.37%', 2: '100.00%'}__

In [30]:
# ================================
# 📌 Period 2 Configuration - HAR_MLP_v1
# ================================
period = 2
prev_period = 1

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.2
stable_classes = [0, 1]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v1"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v1", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 11494 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_2

✅ Data Overview:
X_train: torch.Size([5945, 561]), y_train: torch.Size([5945])
X_val: torch.Size([2410, 561]), y_val: torch.Size([2410])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 21.361273, Train-Class-Acc: {0: '18.04%', 1: '77.80%', 2: '42.13%'}
Val Loss: 0.729926, Val Acc: 78.42%, Val-Class-Acc: {0: '56.21%', 1: '75.94%', 2: '87.24%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_2/HAR_MLP_v1_epoch_1.pth
Epoch 2/1000, Train Loss: 14.092276, Train-Class-Acc: {0: '71.23%', 1: '48.76%', 2: '79.67%'}
Val Loss: 0.374666, Val Acc: 87.34%, Val-Class-Acc: {0: '78.82%', 1: '62.41%', 2: '99.93%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_2/HAR_MLP_v1_epoch_2.pth
Epoch 3/1000, Train Loss: 12.086216, Train-Class-Acc: {0: '81.49%', 1: '57.64%', 2: '93.39%'}
Val Loss: 0.266005, Val Acc: 91.83%, Val-Class-Acc: {0: '75.56%', 1: '85.71%', 2: '99.93%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_2/HAR_MLP_v1_epoch_3.pth
Epoch 4/1000, Train Loss: 10.647806, Train-Class-Acc: {0: '83.67

---
### Period 3 (alpha = 0.2)
+ ##### Total training time: 986.93 seconds
+ ##### Model: HAR_MLP_v1
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_3'*
+ ##### Best Epoch: 987
#### __Val Accuracy: 97.39%__
#### __Val-Class-Acc: {0: '91.04%', 1: '96.62%', 2: '100.00%', 3: '97.21%'}__

In [31]:
# ================================
# 📌 Period 3 Configuration - HAR_MLP_v1
# ================================
period = 3
prev_period = 2

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.2
stable_classes = [0, 1, 2]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v1"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v1", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 737 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_3

✅ Data Overview:
X_train: torch.Size([7352, 561]), y_train: torch.Size([7352])
X_val: torch.Size([2947, 561]), y_val: torch.Size([2947])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 102.330604, Train-Class-Acc: {0: '73.87%', 1: '0.36%', 2: '94.37%', 3: '12.79%'}
Val Loss: 2.026981, Val Acc: 62.16%, Val-Class-Acc: {0: '87.98%', 1: '0.00%', 2: '100.00%', 3: '2.42%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_3/HAR_MLP_v1_epoch_1.pth
Epoch 2/1000, Train Loss: 48.575783, Train-Class-Acc: {0: '70.84%', 1: '0.73%', 2: '100.00%', 3: '12.94%'}
Val Loss: 2.576748, Val Acc: 61.05%, Val-Class-Acc: {0: '80.86%', 1: '0.00%', 2: '100.00%', 3: '2.79%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_3/HAR_MLP_v1_epoch_2.pth
Epoch 3/1000, Train Loss: 32.734498, Train-Class-Acc: {0: '64.62%', 1: '3.13%', 2: '100.00%', 3: '13.93%'}
Val Loss: 2.202642, Val Acc: 63.45%, Val-Class-Acc: {0: '92.46%', 1: '5.45%', 2: '100.00%', 3: '0.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_3/HAR_MLP_v1_e

---
### Period 4 (alpha = 0.2)
+ ##### Total training time: 1022.60 seconds
+ ##### Model: HAR_MLP_v1
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_4'*
+ ##### Best Epoch: 716
#### __Val Accuracy: 96.47%__
#### __Val-Class-Acc: {0: '91.24%', 1: '96.43%', 2: '98.59%', 3: '96.09%', 4: '98.73%', 5: '98.10%'}__

In [32]:
# ================================
# 📌 Period 4 Configuration - HAR_MLP_v1
# ================================
period = 4
prev_period = 3

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.2
stable_classes = [0, 1, 3]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v1"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v1", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v1(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 737 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_4

✅ Data Overview:
X_train: torch.Size([7352, 561]), y_train: torch.Size([7352])
X_val: torch.Size([2947, 561]), y_val: torch.Size([2947])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 284.965559, Train-Class-Acc: {0: '83.28%', 1: '2.33%', 2: '20.23%', 3: '0.00%', 4: '31.50%', 5: '9.23%'}
Val Loss: 4.949365, Val Acc: 19.44%, Val-Class-Acc: {0: '98.57%', 1: '0.00%', 2: '11.49%', 3: '0.00%', 4: '6.79%', 5: '0.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_4/HAR_MLP_v1_epoch_1.pth
Epoch 2/1000, Train Loss: 145.106301, Train-Class-Acc: {0: '90.28%', 1: '0.66%', 2: '25.20%', 3: '4.62%', 4: '22.37%', 5: '18.26%'}
Val Loss: 3.994354, Val Acc: 30.84%, Val-Class-Acc: {0: '96.95%', 1: '0.00%', 2: '57.46%', 3: '3.72%', 4: '27.18%', 5: '0.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v1/alpha_0.2/Period_4/HAR_MLP_v1_epoch_2.pth
Epoch 3/1000, Train Loss: 68.017532, Train-Class-Acc: {0: '81.18%', 1: '1.60%', 2: '30.67%', 3: '42.50%', 4: '29.64%', 5: '28.09%'}
Val Loss: 3.360099, Val Acc: 53.99%, Val-Class-Acc: {0: '93.69%', 1: '0.00%', 2: '63.71%', 3: '56.

## __HAR_MLP_v2 Training (alpha = 0.2)__

---
### Period 1 (alpha = 0.2)
+ ##### Total training time: 138.13 seconds
+ ##### Model: HAR_MLP_v2
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_1'*
+ ##### Best Epoch: 162
#### __Val Accuracy: 94.82%__
#### __Val-Class-Acc: {0: '93.08%', 1: '96.43%'}__

In [33]:
# ================================
# 📌 Period 1 Configuration - HAR_MLP_v2
# ================================
period = 1

# Load dataset for this period
X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

# Auto-select CUDA device
device = auto_select_cuda_device()

# Model and training hyperparameters
input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.2  # Only effective in Period >= 2
teacher_model = None
stable_classes = None  # Not used in Period 1

# Model saving path (change EWC → MSE)
model_name = "HAR_MLP_v2"
model_saving_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v2", f"alpha_{alpha}", f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

# Initialize model, criterion, optimizer
student_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler =None

# Train the model (MSE-aware but no distillation in Period 1)
train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,     # None for Period 1
    teacher_model=teacher_model,       # None for Period 1
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

# ================================
# ✅ Summary & Resource Cleanup
# ================================

unique_classes = np.unique(y_train)
num_classes = len(unique_classes)

print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {num_classes}")

del X_train, y_train, X_val, y_val, teacher_model, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 737 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_1

✅ Data Overview:
X_train: torch.Size([2660, 561]), y_train: torch.Size([2660])
X_val: torch.Size([1023, 561]), y_val: torch.Size([1023])
Epoch 1/1000, Train Loss: 0.627070, Train-Class-Acc: {0: '64.23%', 1: '69.80%'}
Val Loss: 0.480916, Val Acc: 75.07%, Val-Class-Acc: {0: '56.21%', 1: '92.48%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_1/HAR_MLP_v2_epoch_1.pth
Epoch 2/1000, Train Loss: 0.436159, Train-Class-Acc: {0: '75.66%', 1: '82.31%'}
Val Loss: 0.355372, Val Acc: 86.31%, Val-Class-Acc: {0: '82.89%', 1: '89.47%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_1/HAR_MLP_v2_epoch_2.pth
Epoch 3/1000, Train Los

---
### Period 2 (alpha = 0.2)
+ ##### Total training time: 383.32 seconds
+ ##### Model: HAR_MLP_v2
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_2'*
+ ##### Best Epoch: 186
#### __Val Accuracy: 97.76%__
#### __Val-Class-Acc: {0: '93.28%', 1: '96.05%', 2: '100.00%'}__

In [34]:
# ================================
# 📌 Period 2 Configuration - HAR_MLP_v2
# ================================
period = 2
prev_period = 1

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.2
stable_classes = [0, 1]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v2"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v2", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 735 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_2

✅ Data Overview:
X_train: torch.Size([5945, 561]), y_train: torch.Size([5945])
X_val: torch.Size([2410, 561]), y_val: torch.Size([2410])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 11.478703, Train-Class-Acc: {0: '49.92%', 1: '77.73%', 2: '80.09%'}
Val Loss: 0.508737, Val Acc: 81.37%, Val-Class-Acc: {0: '48.68%', 1: '98.12%', 2: '86.52%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_2/HAR_MLP_v2_epoch_1.pth
Epoch 2/1000, Train Loss: 8.446601, Train-Class-Acc: {0: '72.47%', 1: '88.36%', 2: '80.58%'}
Val Loss: 0.373922, Val Acc: 88.09%, Val-Class-Acc: {0: '69.45%', 1: '95.30%', 2: '91.93%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_2/HAR_MLP_v2_epoch_2.pth
Epoch 3/1000, Train Loss: 6.360105, Train-Class-Acc: {0: '83.90%', 1: '87.34%', 2: '83.74%'}
Val Loss: 0.337198, Val Acc: 89.96%, Val-Class-Acc: {0: '79.23%', 1: '93.23%', 2: '92.50%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_2/HAR_MLP_v2_epoch_3.pth
Epoch 4/1000, Train Loss: 4.809904, Train-Class-Acc: {0: '87.17%',

---
### Period 3 (alpha = 0.2)
+ ##### Total training time: 483.45 seconds
+ ##### Model: HAR_MLP_v2
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_3'*
+ ##### Best Epoch: 733
#### __Val Accuracy: 98.17%__
#### __Val-Class-Acc: {0: '92.06%', 1: '97.18%', 2: '100.00%', 3: '100.00%'}__

In [35]:
# ================================
# 📌 Period 3 Configuration - HAR_MLP_v2
# ================================
period = 3
prev_period = 2

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.2
stable_classes = [0, 1, 2]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v2"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v2", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 737 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_3

✅ Data Overview:
X_train: torch.Size([7352, 561]), y_train: torch.Size([7352])
X_val: torch.Size([2947, 561]), y_val: torch.Size([2947])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 22.315128, Train-Class-Acc: {0: '54.82%', 1: '43.96%', 2: '95.07%', 3: '41.72%'}
Val Loss: 0.450881, Val Acc: 80.12%, Val-Class-Acc: {0: '90.22%', 1: '54.70%', 2: '100.00%', 3: '44.69%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_3/HAR_MLP_v2_epoch_1.pth
Epoch 2/1000, Train Loss: 15.951611, Train-Class-Acc: {0: '77.68%', 1: '61.06%', 2: '99.73%', 3: '46.84%'}
Val Loss: 0.338244, Val Acc: 84.02%, Val-Class-Acc: {0: '81.67%', 1: '88.16%', 2: '100.00%', 3: '40.78%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_3/HAR_MLP_v2_epoch_2.pth
Epoch 3/1000, Train Loss: 12.664662, Train-Class-Acc: {0: '79.00%', 1: '80.93%', 2: '99.76%', 3: '43.57%'}
Val Loss: 0.304651, Val Acc: 83.34%, Val-Class-Acc: {0: '79.84%', 1: '91.17%', 2: '100.00%', 3: '35.75%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_3/HAR_ML

---
### Period 4 (alpha = 0.2)
+ ##### Total training time: 506.00 seconds
+ ##### Model: HAR_MLP_v2
+ ##### Training and saving in *'Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_4'*
+ ##### Best Epoch: 433
#### __Val Accuracy: 97.05%__
#### __Val-Class-Acc: {0: '93.28%', 1: '95.30%', 2: '99.60%', 3: '100.00%', 4: '96.82%', 5: '97.14%'}__

In [36]:
# ================================
# 📌 Period 4 Configuration - HAR_MLP_v2
# ================================
period = 4
prev_period = 3

X_train, y_train = period_datasets[period]["train"]
X_val, y_val     = period_datasets[period]["test"]

device = auto_select_cuda_device()

input_size    = X_train.shape[1]
hidden_size   = 128
output_size   = len(set(y_train))
dropout       = 0.2
learning_rate = 0.0001
weight_decay  = 1e-5
num_epochs    = 1000
alpha         = 0.2
stable_classes = [0, 1, 3]

teacher_output_size = len(set(period_datasets[prev_period]["train"][1]))

model_name = "HAR_MLP_v2"
base_folder = os.path.join("Class_Incremental_CL", "HAR_CIL", "Trained_models", "MSE_CIL_v2", f"alpha_{alpha}")
model_saving_folder = os.path.join(base_folder, f"Period_{period}")
os.makedirs(model_saving_folder, exist_ok=True)

teacher_path = os.path.join(base_folder, f"Period_{prev_period}", f"{model_name}_best.pth")
assert os.path.exists(teacher_path), f"❌ Teacher model not found at {teacher_path}"
teacher_checkpoint = torch.load(teacher_path, map_location=device)
teacher_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=teacher_output_size,
    dropout=dropout
).to(device)
teacher_model.load_state_dict(teacher_checkpoint['model_state_dict'])
teacher_model.eval()

student_model = HAR_MLP_v2(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    dropout=dropout
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = None

train_with_mse_distillation(
    student_model=student_model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    stable_classes=stable_classes,
    teacher_model=teacher_model,
    alpha=alpha,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name=model_name,
    stop_signal_file=stop_signal_file,
    device=device
)

unique_classes = np.unique(y_train)
print(f"\n✅ Period {period} Training Complete. Final model architecture:")
print(student_model)
print(f"📊 Class Summary → unique_classes = {unique_classes}, num_classes = {len(unique_classes)}")

del X_train, y_train, X_val, y_val, teacher_model, teacher_checkpoint, student_model
gc.collect()
torch.cuda.empty_cache()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 737 MiB
    - Device Name    : NVIDIA RTX A6000

🚀 'train_with_mse_distillation' started.
✅ Removed existing folder: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_4

✅ Data Overview:
X_train: torch.Size([7352, 561]), y_train: torch.Size([7352])
X_val: torch.Size([2947, 561]), y_val: torch.Size([2947])


  teacher_checkpoint = torch.load(teacher_path, map_location=device)


Epoch 1/1000, Train Loss: 14.043217, Train-Class-Acc: {0: '62.44%', 1: '36.54%', 2: '44.62%', 3: '77.04%', 4: '34.30%', 5: '20.28%'}
Val Loss: 0.833913, Val Acc: 74.89%, Val-Class-Acc: {0: '51.93%', 1: '94.55%', 2: '79.84%', 3: '98.51%', 4: '67.09%', 5: '49.52%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_4/HAR_MLP_v2_epoch_1.pth
Epoch 2/1000, Train Loss: 9.195168, Train-Class-Acc: {0: '72.55%', 1: '73.36%', 2: '53.02%', 3: '98.58%', 4: '50.14%', 5: '34.58%'}
Val Loss: 0.702685, Val Acc: 80.15%, Val-Class-Acc: {0: '65.17%', 1: '95.11%', 2: '81.65%', 3: '100.00%', 4: '72.61%', 5: '60.00%'}, LR: 0.000100
✅ Saved model: Class_Incremental_CL/HAR_CIL/Trained_models/MSE_CIL_v2/alpha_0.2/Period_4/HAR_MLP_v2_epoch_2.pth
Epoch 3/1000, Train Loss: 7.032066, Train-Class-Acc: {0: '75.51%', 1: '82.02%', 2: '60.11%', 3: '99.50%', 4: '56.10%', 5: '41.48%'}
Val Loss: 0.617199, Val Acc: 82.90%, Val-Class-Acc: {0: '69.86%', 1: '95.68%', 2: '90.52

---

## 📊 Summary: HAR - MSE Distillation (`HAR_MLP_v2`)

### ✔️ Alpha = 0.1

| Period | Training Time (s) | Validation Accuracy | Class-wise Accuracy                                                     |
|--------|-------------------|---------------------|--------------------------------------------------------------------------|
| 1      | 153.13            | **95.50%**          | {0: 95.32%, 1: 95.68%}                                                  |
| 2      | 982.78            | **97.84%**          | {0: 93.08%, 1: 96.62%, 2: 100.00%}                                      |
| 3      | 1726.14           | **98.47%**          | {0: 94.30%, 1: 96.80%, 2: 100.00%, 3: 100.00%}                          |
| 4      | 1100.96           | **97.15%**          | {0: 93.08%, 1: 96.62%, 2: 98.99%, 3: 100.00%, 4: 95.33%, 5: 98.81%}     |

### Alpha = 0.2

| Period | Training Time (s) | Validation Accuracy | Class-wise Accuracy                                                     |
|--------|-------------------|---------------------|--------------------------------------------------------------------------|
| 1      | 138.13            | **94.82%**          | {0: 93.08%, 1: 96.43%}                                                  |
| 2      | 383.32            | **97.76%**          | {0: 93.28%, 1: 96.05%, 2: 100.00%}                                      |
| 3      | 483.45            | **98.17%**          | {0: 92.06%, 1: 97.18%, 2: 100.00%, 3: 100.00%}                          |
| 4      | 506.00            | **97.05%**          | {0: 93.28%, 1: 95.30%, 2: 99.60%, 3: 100.00%, 4: 96.82%, 5: 97.14%}     |

---


### ✔️ HAR - MSE Distillation

| Period | Training Time (s) | Validation Accuracy | Class-wise Accuracy                                                     |
|--------|-------------------|---------------------|--------------------------------------------------------------------------|
| 1      | 153.13            | **95.50%**          | {0: 95.32%, 1: 95.68%}                                                  |
| 2      | 982.78            | **97.84%**          | {0: 93.08%, 1: 96.62%, 2: 100.00%}                                      |
| 3      | 1726.14           | **98.47%**          | {0: 94.30%, 1: 96.80%, 2: 100.00%, 3: 100.00%}                          |
| 4      | 1100.96           | **97.15%**          | {0: 93.08%, 1: 96.62%, 2: 98.99%, 3: 100.00%, 4: 95.33%, 5: 98.81%}     |