In [None]:
!pip install torch torchvision torchaudio
!pip install scipy pandas scikit-learn matplotlib

In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os
from glob import glob
from sklearn.metrics import f1_score, accuracy_score
import scipy.io
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [28]:
class HAR_Dataset(Dataset):
    def __init__(self, X, y=None, window_size=128):
        self.X = torch.FloatTensor(X)
        self.y = None if y is None else torch.LongTensor(y)
        self.window_size = window_size

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Ensure the sequence length is correct
        x = self.X[idx]
        if x.shape[1] > self.window_size:
            # If sequence is too long, take a random window
            start = np.random.randint(0, x.shape[1] - self.window_size)
            x = x[:, start:start + self.window_size]
        elif x.shape[1] < self.window_size:
            # If sequence is too short, pad with zeros
            padding = torch.zeros(x.shape[0], self.window_size - x.shape[1])
            x = torch.cat([x, padding], dim=1)

        if self.y is None:
            return x
        return x, self.y[idx]

In [29]:
def load_mobiact_data(base_path, window_size=128):
    print("Loading MobiAct dataset...")
    all_data = []
    all_labels = []

    for activity_folder in glob(os.path.join(base_path, "*")):
        if os.path.isdir(activity_folder):
            activity_name = os.path.basename(activity_folder)

            csv_files = glob(os.path.join(activity_folder, "*.csv"))
            if not csv_files:
                continue

            for data_file in csv_files:
                try:
                    # Read CSV with header handling
                    data = pd.read_csv(data_file, header=0)  # Changed to header=0
                    sensor_data = data.iloc[:, 1:4].values.astype(np.float32).T  # Skip timestamp column
                    
                    # Process to fixed window_size
                    time_steps = sensor_data.shape[1]
                    if time_steps > window_size:
                        sensor_data = sensor_data[:, :window_size]
                    elif time_steps < window_size:
                        pad_width = ((0, 0), (0, window_size - time_steps))
                        sensor_data = np.pad(sensor_data, pad_width, mode='constant', constant_values=0)
                    
                    all_data.append(sensor_data)
                    all_labels.append(activity_name)
                except Exception as e:
                    print(f"Error loading file {data_file}: {str(e)}")

    # Convert to properly typed numpy array
    X = np.array(all_data, dtype=np.float32)
    unique_labels = sorted(set(all_labels))
    label_to_id = {label: idx for idx, label in enumerate(unique_labels)}
    y = np.array([label_to_id[label] for label in all_labels], dtype=np.int64)

    print(f"Loaded {len(X)} samples with {len(unique_labels)} activities")
    return X, y


In [30]:
def load_uci_har_data(base_path):
    print("Loading UCI-HAR dataset...")

    def load_signals(folder_path, data_type='train'):
        signals = []
        # Load accelerometer data
        for signal_type in ['body_acc_x', 'body_acc_y', 'body_acc_z']:
            signal_path = os.path.join(folder_path, f'Inertial Signals/{signal_type}_{data_type}.txt')
            if os.path.exists(signal_path):
                signal = pd.read_csv(signal_path, header=None, delim_whitespace=True).values
                signals.append(signal)
            else:
                print(f"Warning: {signal_path} does not exist.")
                return None  # Return None if a file is missing
        return np.array(signals)

    # Load training data
    train_path = os.path.join(base_path, 'train')
    X_train = load_signals(train_path, data_type='train')
    y_train = pd.read_csv(os.path.join(train_path, 'y_train.txt'), header=None).values.ravel() if X_train is not None else None

    # Load test data
    test_path = os.path.join(base_path, 'test')
    X_test = load_signals(test_path, data_type='test')
    y_test = pd.read_csv(os.path.join(test_path, 'y_test.txt'), header=None).values.ravel() if X_test is not None else None

    # Check if either X_train or X_test is None
    if X_train is None and X_test is None:
        raise ValueError("Both training and testing data failed to load.")

    # Handle the case where X_train is None
    if X_train is None:
        X = np.transpose(X_test, (1, 0, 2))
        y = y_test - 1
        print("Only test data loaded.")
    elif X_test is None:
        X = np.transpose(X_train, (1, 0, 2))
        y = y_train - 1
        print("Only train data loaded.")
    else:
        # Combine train and test
        X = np.concatenate([X_train, X_test], axis=1)  # Shape: (3, n_samples, time_steps)
        X = np.transpose(X, (1, 0, 2))  # Shape: (n_samples, 3, time_steps)
        y = np.concatenate([y_train, y_test]) - 1  # Make labels 0-based
        print("Both train and test data loaded.")

    print(f"Loaded {len(X)} samples")
    return X, y

In [31]:
def load_usc_had_data(base_path, window_size=128):  # Added window_size parameter
    print("Loading USC-HAD dataset...")
    all_data = []
    all_labels = []

    for subject_folder in sorted(glob(os.path.join(base_path, "Subject*"))):
        subject_id = os.path.basename(subject_folder)
        
        for data_file in glob(os.path.join(subject_folder, "*.mat")):
            try:
                # Load .mat file
                mat_data = scipy.io.loadmat(data_file)
                sensor_readings = mat_data['sensor_readings']
                
                # CORRECTED: Extract first 3 sensors (columns) and transpose to (3, N)
                acc_data = sensor_readings[:, :3].T  # Fixed slicing and transposed
                
                # CORRECTED: Standardize to window_size
                time_steps = acc_data.shape[1]
                if time_steps > window_size:
                    # Random truncation
                    start = np.random.randint(0, time_steps - window_size)
                    acc_data = acc_data[:, start:start + window_size]
                elif time_steps < window_size:
                    # Zero-padding
                    pad_width = ((0, 0), (0, window_size - time_steps))
                    acc_data = np.pad(acc_data, pad_width, mode='constant')
                
                all_data.append(acc_data)
                all_labels.append(int(os.path.basename(data_file)[1:].split('t')[0]) - 1)

            except Exception as e:
                print(f"Error loading {data_file}: {str(e)}")

    X = np.array(all_data, dtype=np.float32)
    y = np.array(all_labels, dtype=np.int64)
    print(f"Loaded {len(X)} samples with window_size={window_size}")
    return X, y


In [32]:
class CNN_GRN_Transformer(nn.Module):
    def __init__(self, input_channels=3, seq_length=128, num_classes=12):
        super(CNN_GRN_Transformer, self).__init__()

        # Parameters
        self.hidden_dim = 256

        # CNN layers
        self.cnn = nn.Sequential(
            nn.Conv1d(input_channels, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )

        # Calculate sequence length after CNN
        self.seq_length_after_cnn = seq_length // 2

        # GRN (Gated Recurrent Network)
        self.gru = nn.GRU(
            input_size=128,
            hidden_size=self.hidden_dim // 2,
            num_layers=2,
            batch_first=True,
            bidirectional=True
        )

        # Transformer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.hidden_dim,
            nhead=8,
            dim_feedforward=1024,
            dropout=0.1,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)

        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(self.hidden_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )
    def forward(self, x):
        # CNN feature extraction
        x = self.cnn(x)

        # Prepare for GRU (batch_size, seq_len, features)
        x = x.transpose(1, 2)

        # GRU processing
        x, _ = self.gru(x)

        # Transformer processing
        x = self.transformer(x)

        # Global average pooling
        x = torch.mean(x, dim=1)

        # Classification
        x = self.classifier(x)

        return x

In [33]:
class CCLoss(nn.Module):
    def __init__(self, temperature=0.07):
        super(CCLoss, self).__init__()
        self.temperature = temperature

    def forward(self, features):
        # Normalize features
        features = nn.functional.normalize(features, dim=1)

        # Compute similarity matrix
        similarity = torch.mm(features, features.t()) / self.temperature

        # Remove diagonal elements
        mask = torch.eye(similarity.shape[0], device=similarity.device)
        similarity = similarity * (1 - mask)

        # Compute contrastive loss
        exp_sim = torch.exp(similarity)
        loss = -torch.log(exp_sim.sum(dim=1) / (exp_sim.sum() - exp_sim.diag()))

        return loss.mean()

In [34]:
def pretrain(model, train_loader, device, epochs=50):
    print("Starting pretraining phase...")
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    cc_loss = CCLoss()

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch_idx, data in enumerate(train_loader):
            data = data.to(device)

            # Forward pass
            features = model(data)

            # Compute loss
            loss = cc_loss(features)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            if batch_idx % 100 == 0:
                print(f'Epoch [{epoch+1}/{epochs}] Batch [{batch_idx}/{len(train_loader)}] Loss: {loss.item():.4f}')

        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{epochs}] Average Loss: {avg_loss:.4f}')

    return model
def finetune(model, train_loader, val_loader, device, epochs=30):
    print("Starting fine-tuning phase...")
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)

    best_val_acc = 0

    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            if batch_idx % 100 == 0:
                print(f'Epoch [{epoch+1}/{epochs}] Batch [{batch_idx}/{len(train_loader)}] Loss: {loss.item():.4f}')

        train_acc = 100 * correct / total
        train_loss = train_loss / len(train_loader)

        # Validation
        model.eval()
        val_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

        val_acc = 100 * correct / total
        val_loss = val_loss / len(val_loader)

        scheduler.step(val_loss)

        print(f'Epoch [{epoch+1}/{epochs}]')
        print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%')

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print(f'Saved new best model with validation accuracy: {val_acc:.2f}%')

    # Add return values for validation predictions
    all_val_preds = []
    all_val_labels = []
    
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            all_val_preds.extend(preds.cpu().numpy())
            all_val_labels.extend(targets.cpu().numpy())
    
    return model, val_loss  
    

    

In [43]:
def evaluate_dataset(model, X, y, name, device):  # Add device parameter
    dataset = HAR_Dataset(X, y, 128)
    loader = DataLoader(dataset, batch_size=32)
    
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs, targets = inputs.to(device), targets.to(device)  # Now has device
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())
    
    f1 = f1_score(all_labels, all_preds, average='weighted')
    acc = accuracy_score(all_labels, all_preds)
    
    print(f"\n{name} Dataset Evaluation:")
    print(f"F1 Score: {f1:.4f}")
    print(f"Accuracy: {acc:.4f}")
   # print(classification_report(all_labels, all_preds, target_names=class_names)) 


In [47]:
def main():
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Parameters
    WINDOW_SIZE = 128
    BATCH_SIZE = 32
    NUM_CLASSES = 12  # Adjust based on your combined dataset

    # Dataset paths - you'll need to set these based on your local setup
    MOBIACT_PATH = r"C:\Users\ASUS\Desktop\CSSHAR\MobiAct_Dataset_v2.0\MobiAct_Dataset_v2.0\Annotated Data"
    UCI_HAR_PATH = r"C:\Users\ASUS\Desktop\CSSHAR\human+activity+recognition+using+smartphones\UCI HAR Dataset\UCI HAR Dataset"
    USC_HAD_PATH = r"C:\Users\ASUS\Desktop\CSSHAR\USC-HAD\USC-HAD"

    # Load datasets
    print("\nLoading datasets...")

    # MobiAct for pretraining
    X_mobiact, y_mobiact = load_mobiact_data(MOBIACT_PATH, window_size=WINDOW_SIZE)

    # Check if MobiAct dataset is empty
    if len(X_mobiact) == 0:
        raise ValueError("MobiAct dataset is empty. Please check your data loading process.")
    print("mobiact done")
    # Load other datasets for fine-tuning
    X_uci, y_uci = load_uci_har_data(UCI_HAR_PATH)
    print("UCI done")

    # Load USC-HAD dataset
    X_usc, y_usc = load_usc_had_data(USC_HAD_PATH, window_size=WINDOW_SIZE)
    

    # Check if USC-HAD dataset is empty
    if len(X_usc) == 0:
        raise ValueError("USC-HAD dataset is empty. Please check your data loading process.")
    print("USC done")
    # Create model
    model = CNN_GRN_Transformer(
        input_channels=3,
        seq_length=WINDOW_SIZE,
        num_classes=NUM_CLASSES
    ).to(device)
    print("model created")
    
    
    # Pretraining phase (using only MobiAct)
    pretrain_dataset = HAR_Dataset(X_mobiact, window_size=WINDOW_SIZE)
    pretrain_loader = DataLoader(pretrain_dataset, batch_size=BATCH_SIZE, shuffle=True)
    model = pretrain(model, pretrain_loader, device, epochs=50)

    # Save pretrained model
    torch.save(model.state_dict(), 'pretrained_model.pth')
    print("Saved pretrained model")

    # Fine-tuning phase (using UCI and USC datasets)
    print("\nStarting fine-tuning on combined datasets...")
    
    # Combine UCI and USC datasets
    X_combined = np.concatenate([X_uci, X_usc])
    y_combined = np.concatenate([y_uci, y_usc])
    
    # Split into train/val sets (80/20)
    X_train, X_val, y_train, y_val = train_test_split(
        X_combined, y_combined, test_size=0.2, random_state=42, stratify=y_combined
    )
    
    # Create datasets and loaders
    train_dataset = HAR_Dataset(X_train, y_train, WINDOW_SIZE)
    val_dataset = HAR_Dataset(X_val, y_val, WINDOW_SIZE)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    # Fine-tune the model
     # During fine-tuning
    model, training_losses = finetune(model, train_loader, val_loader, device, epochs=40)
    
    
    # Final evaluation
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())
    
    # Calculate metrics
    f1 = f1_score(all_labels, all_preds, average='weighted')
    acc = accuracy_score(all_labels, all_preds)
    print(f"\nTOTAL Final Evaluation Results:")
    print(f"Mean F1 Score: {f1:.4f}")
    print(f"Accuracy: {acc:.4f}")
    # Evaluate on both datasets separately
    evaluate_dataset(model, X_uci, y_uci, "UCI-HAR", device)  # Pass device
    evaluate_dataset(model, X_usc, y_usc, "USC-HAD", device)
    
    

In [49]:
if __name__ == "__main__":
    main()

Using device: cuda

Loading datasets...
Loading MobiAct dataset...
Loaded 3294 samples with 20 activities
mobiact done
Loading UCI-HAR dataset...
Both train and test data loaded.
Loaded 10299 samples
UCI done
Loading USC-HAD dataset...
Loaded 840 samples with window_size=128
USC done
model created
Starting pretraining phase...
Epoch [1/50] Batch [0/103] Loss: 4.2304
Epoch [1/50] Batch [100/103] Loss: 3.4669
Epoch [1/50] Average Loss: 3.4822
Epoch [2/50] Batch [0/103] Loss: 3.4669
Epoch [2/50] Batch [100/103] Loss: 3.4661
Epoch [2/50] Average Loss: 3.4662
Epoch [3/50] Batch [0/103] Loss: 3.4664
Epoch [3/50] Batch [100/103] Loss: 3.4659
Epoch [3/50] Average Loss: 3.4656
Epoch [4/50] Batch [0/103] Loss: 3.4661
Epoch [4/50] Batch [100/103] Loss: 3.4658
Epoch [4/50] Average Loss: 3.4653
Epoch [5/50] Batch [0/103] Loss: 3.4658
Epoch [5/50] Batch [100/103] Loss: 3.4658
Epoch [5/50] Average Loss: 3.4652
Epoch [6/50] Batch [0/103] Loss: 3.4658
Epoch [6/50] Batch [100/103] Loss: 3.4658
Epoch [6/