<a href="https://colab.research.google.com/github/eshita1802/Continuous-Authentication/blob/main/NBERT%20final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install torch transformers datasets scikit-learn numpy pandas tqdm matplotlib

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

In [3]:
from google.colab import drive
drive. mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Define a dataset class
class SensorDataset(Dataset):
    def __init__(self, file_paths):
        self.data = []
        self.labels = []

        for file_path in file_paths:
            df = pd.read_excel(file_path)

            # Extract numerical features (Ax, Ay, Az, etc.)
            features = df.iloc[:, 2:-1].values  # Exclude participant_id and last column (Normalized=True)

            # Extract participant labels
            labels = df['participant_id'].values - 1  # Adjust labels to 0-indexed

            # Convert data into frames of 10 timestamps
            for i in range(0, len(features) - 10 + 1, 10):  # Sliding window
                self.data.append(features[i:i+10])
                self.labels.append(labels[i+9])  # Use last timestamp's participant as label

        # Convert lists to NumPy arrays before converting to tensors
        self.data = torch.tensor(np.array(self.data), dtype=torch.float32)
        self.labels = torch.tensor(np.array(self.labels), dtype=torch.long)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Automatically list all Excel files in the directory
folder_path = "/content/drive/MyDrive/DS"
file_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".xlsx")]

# Create dataset and data loader
dataset = SensorDataset(file_paths)
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)


In [5]:
from torch.utils.data import random_split

# Define split sizes
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

# Split dataset into training and testing sets
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
import torch.nn as nn
from transformers import BertConfig

class NumericalBERT(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_heads, num_layers, num_classes, max_seq_length=10):
        super(NumericalBERT, self).__init__()

        self.config = BertConfig(hidden_size=hidden_dim, num_attention_heads=num_heads, num_hidden_layers=num_layers)

        # Numerical embedding layer
        self.embedding = nn.Linear(input_dim, hidden_dim)

        # Positional encoding
        self.positional_encoding = nn.Parameter(torch.zeros(1, max_seq_length, hidden_dim))

        # Transformer Encoder
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads, dim_feedforward=hidden_dim * 4)
        self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)

        # Classification layer
        self.classifier = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x + self.positional_encoding[:, :x.size(1), :]
        x = self.encoder(x)
        logits = self.classifier(x[:, -1, :])  # Use last timestamp's representation
        return logits


In [7]:
import torch.nn as nn
import torch.optim as optim

# Model parameters
input_dim = 12  # 12 sensor readings per timestamp
hidden_dim = 128  # Transformer hidden size
num_heads = 4  # Multi-head attention heads
num_layers = 3  # Number of Transformer layers
num_classes = 10  # Participants (1, 2, 3)

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NumericalBERT(input_dim, hidden_dim, num_heads, num_layers, num_classes).to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)





In [8]:
from tqdm import tqdm

def train_model(model, train_loader, criterion, optimizer, num_epochs= 9):
    model.train()  # Set model to training mode

    for epoch in range(num_epochs):
        total_loss = 0

        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=True)

        for batch in progress_bar:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()  # Reset gradients
            outputs = model(inputs)  # Forward pass

            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            total_loss += loss.item()

            progress_bar.set_postfix(loss=total_loss / len(train_loader))

        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

# Train the model
train_model(model, data_loader, criterion, optimizer, num_epochs=9)


Epoch 1/9: 100%|██████████| 63000/63000 [58:19<00:00, 18.00it/s, loss=1.12]


Epoch 1, Loss: 1.1180


Epoch 2/9: 100%|██████████| 63000/63000 [57:23<00:00, 18.29it/s, loss=0.732]


Epoch 2, Loss: 0.7320


Epoch 3/9: 100%|██████████| 63000/63000 [57:04<00:00, 18.40it/s, loss=0.624]


Epoch 3, Loss: 0.6243


Epoch 4/9: 100%|██████████| 63000/63000 [55:40<00:00, 18.86it/s, loss=0.566]


Epoch 4, Loss: 0.5664


Epoch 5/9: 100%|██████████| 63000/63000 [55:07<00:00, 19.05it/s, loss=0.53]


Epoch 5, Loss: 0.5296


Epoch 6/9: 100%|██████████| 63000/63000 [57:00<00:00, 18.42it/s, loss=0.503]


Epoch 6, Loss: 0.5027


Epoch 7/9: 100%|██████████| 63000/63000 [55:52<00:00, 18.79it/s, loss=0.483]


Epoch 7, Loss: 0.4828


Epoch 8/9: 100%|██████████| 63000/63000 [57:47<00:00, 18.17it/s, loss=0.466]


Epoch 8, Loss: 0.4662


Epoch 9/9: 100%|██████████| 63000/63000 [58:20<00:00, 18.00it/s, loss=0.453]

Epoch 9, Loss: 0.4526





In [None]:
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, f1_score, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

def evaluate_model(model, data_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in data_loader:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            predictions = torch.argmax(outputs, dim=1)

            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Accuracy
    accuracy = accuracy_score(all_labels, all_preds)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # F1 Score (macro and per class)
    f1_macro = f1_score(all_labels, all_preds, average='macro')
    print(f"Macro F1 Score: {f1_macro:.4f}")
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=['Participant 1', 'Participant 2', 'Participant 3', 'Participant 4', 'Participant 5', 'Participant 6', 'Participant 7', 'Participant 8', 'Participant 9', 'Participant 10']))

    # Confusion Matrix
    cm = confusion_matrix(all_labels, all_preds)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10'])

    fig, ax = plt.subplots(figsize=(6, 6))
    disp.plot(ax=ax, cmap="Blues", colorbar=False)
    plt.title("Confusion Matrix")
    plt.grid(False)
    plt.tight_layout()
    plt.show()


# Evaluate the model
evaluate_model(model, test_loader)
