<a href="https://colab.research.google.com/github/dangoln/HAR-using-DAGHAR/blob/main/CNN_LSTM_on_DAGHAR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [None]:
#mounting
from google.colab import drive
drive.mount('/content/drive') # Mount to the default /content/drive directory

Mounted at /content/drive


In [None]:
#imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

In [None]:
# File paths
train_path = "/content/drive/My Drive/HAR-Datasets/DAGHAR/standardized/RealWorld/RealWorld_thigh/train.csv"
val_path = "/content/drive/My Drive/HAR-Datasets/DAGHAR/standardized/RealWorld/RealWorld_thigh/validation.csv"
test_path = "/content/drive/My Drive/HAR-Datasets/DAGHAR/standardized/RealWorld/RealWorld_thigh/test.csv"

In [None]:
# Hyperparameters
input_dim = 3  # Number of sensor channels (adjust based on dataset)
num_classes = 6  # Number of activity classes
batch_size = 64
learning_rate = 0.001
num_epochs = 30
sequence_length = 128  # Define based on dataset characteristics

In [None]:
# CNN-BiLSTM Model
class CNN_BiLSTM(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(CNN_BiLSTM, self).__init__()

        # Adjusted in_channels to 128 and out_channels to 64
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.lstm = nn.LSTM(input_size=64, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(128 * 2, num_classes)  # Adjusted for bidirectional LSTM

    def forward(self, x):
        # Conv1d expects input shape: [batch_size, channels, sequence_length]
        # The input shape might be [batch_size, sequence_length, channels], so we need to transpose
        x = x.transpose(1, 2)  # Transpose to [batch_size, channels, sequence_length]
        x = self.conv1(x)  # Apply convolution
        x = self.pool(x)   # Apply max pooling
        x = x.transpose(1, 2)  # LSTM expects shape [batch_size, seq_len, input_size]
        x, _ = self.lstm(x)  # Apply LSTM
        x = x[:, -1, :]  # Get the output of the last time step
        x = self.fc(x)  # Fully connected layer
        return x


In [None]:
#custom dataset
class HAR_Dataset(Dataset):
    def __init__(self, file_path, sequence_length):
        df = pd.read_csv(file_path)

        # Identify and drop non-numeric columns
        df = df.select_dtypes(include=[np.number])

        # Separate features and labels (assuming last column is activity labels)
        self.data = df.iloc[:, :-1].values  # Sensor features
        self.labels = df.iloc[:, -1].values  # Activity labels

        # Convert labels to integers if they are strings
        if isinstance(self.labels[0], str):
            label_mapping = {label: idx for idx, label in enumerate(np.unique(self.labels))}
            self.labels = np.array([label_mapping[label] for label in self.labels])

        # Normalize sensor data
        scaler = StandardScaler()
        self.data = scaler.fit_transform(self.data)

        # Reshape to (samples, sequence_length, features)
        num_samples = self.data.shape[0] // sequence_length
        self.data = self.data[:num_samples * sequence_length].reshape(num_samples, sequence_length, -1)
        self.labels = self.labels[:num_samples * sequence_length:sequence_length]  # One label per sequence

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32).permute(1, 0), torch.tensor(self.labels[idx], dtype=torch.long)


In [None]:
# Load datasets
train_dataset = HAR_Dataset(train_path, sequence_length)
val_dataset = HAR_Dataset(val_path, sequence_length)
test_dataset = HAR_Dataset(test_path, sequence_length)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = 128  # Adjust as per your actual input data
num_classes = 6  # Example number of classes
learning_rate = 0.001
num_epochs = 10

# Initialize model, loss, and optimizer
model = CNN_BiLSTM(input_dim, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Assuming train_loader and train_dataset are already defined
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Update loss and correct predictions
        total_loss += loss.item()
        correct += (outputs.argmax(dim=1) == labels).sum().item()

    accuracy = correct / len(train_dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}, Accuracy: {accuracy:.4f}")

Epoch [1/10], Loss: 3.5812, Accuracy: 0.0875
Epoch [2/10], Loss: 3.5169, Accuracy: 0.3375
Epoch [3/10], Loss: 3.4514, Accuracy: 0.3125
Epoch [4/10], Loss: 3.3654, Accuracy: 0.4875
Epoch [5/10], Loss: 3.2092, Accuracy: 0.5250
Epoch [6/10], Loss: 2.8483, Accuracy: 0.5500
Epoch [7/10], Loss: 2.6519, Accuracy: 0.5875
Epoch [8/10], Loss: 2.2528, Accuracy: 0.7625
Epoch [9/10], Loss: 1.7678, Accuracy: 0.8750
Epoch [10/10], Loss: 1.3010, Accuracy: 0.9375


In [None]:
# Evaluation function
def evaluate(model, dataloader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            correct += (outputs.argmax(dim=1) == labels).sum().item()
    return correct / len(dataloader.dataset)

# Validation & Test
val_acc = evaluate(model, val_loader)
test_acc = evaluate(model, test_loader)

print(f"Validation Accuracy: {val_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

Validation Accuracy: 0.2143
Test Accuracy: 0.4545


test

In [None]:
#test val+test sets
val_path1 = "/content/drive/My Drive/HAR-Datasets/DAGHAR/standardized/RealWorld/RealWorld_thigh/validation.csv"
test_path1 = "/content/drive/My Drive/HAR-Datasets/DAGHAR/standardized/RealWorld/RealWorld_upperarm/validation.csv"

val_dataset1 = HAR_Dataset(val_path1, sequence_length)
test_dataset1 = HAR_Dataset(test_path1, sequence_length)

val_loader1 = DataLoader(val_dataset1, batch_size=batch_size, shuffle=False)
test_loader1 = DataLoader(test_dataset1, batch_size=batch_size, shuffle=False)


# Validation & Test
val_acc1 = evaluate(model, val_loader1)
test_acc1 = evaluate(model, test_loader1)

print(f"Validation Accuracy: {val_acc1:.4f}")
print(f"Test Accuracy: {test_acc1:.4f}")


Validation Accuracy: 0.2143
Test Accuracy: 0.2143
