# Imports

In [1]:
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import importlib
import data_loading
import data_processing

importlib.reload(data_loading)
importlib.reload(data_processing)

from data_loading import load_dataset, create_input_space, augment_data
from data_processing import preprocess_signals, normalize_data

In [3]:
data = load_dataset(signal_names=['ecg', 'gsr'])
print(len(data))
print(len(data[0]['signals']['ecg']))

8600
2816


# Preprocessing

In [4]:
data_filtered = preprocess_signals(data, 512, 256)
X, y = create_input_space(data_filtered)

(8600, 1408, 2)
(8600,)


In [5]:
num_signals = X.shape[2]  # 5 signals

for signal_idx in range(num_signals):
    global_max = np.max(X[:, :, signal_idx])  # Global max for this signal across all samples
    global_min = np.min(X[:, :, signal_idx])  # Global min for this signal across all samples

    print(f"Signal {signal_idx}: Global Max = {global_max}, Global Min = {global_min}")

Signal 0: Global Max = 4718.812958166116, Global Min = -2272.420349595169
Signal 1: Global Max = 18.741088911129566, Global Min = 0.5993188470456602


In [6]:
X = normalize_data(X)

In [7]:
num_signals = X.shape[2]

for signal_idx in range(num_signals):
    global_max = np.max(X[:, :, signal_idx])  # Global max for this signal across all samples
    global_min = np.min(X[:, :, signal_idx])  # Global min for this signal across all samples

    print(f"Signal {signal_idx}: Global Max = {global_max}, Global Min = {global_min}")

Signal 0: Global Max = 1.0000000000000002, Global Min = 0.0
Signal 1: Global Max = 1.0000000000000284, Global Min = 0.0


In [8]:
augmented_X, augmented_y = augment_data(X, y)

(77400, 1152, 2)
(77400,)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(augmented_X, augmented_y, test_size=0.05, random_state=42)

In [10]:
print(X_train.shape)
print(X_test.shape)

(73530, 1152, 2)
(3870, 1152, 2)


# Transformer

In [11]:
num_classes = len(np.unique(y_train))
print(num_classes)

5


In [12]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import KBinsDiscretizer

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [13]:
# Quantization
num_bins = 10  # Adjust as needed
kbin = KBinsDiscretizer(n_bins=num_bins, encode='ordinal', strategy='uniform')
kbin.fit(X_train.reshape(-1, 1))

X_train_quantized = kbin.transform(X_train.reshape(-1, 1)).reshape(X_train.shape[0], X_train.shape[1], -1)

# Dataset
class TrainingDataset(Dataset):
    def __init__(self, signals, labels):
        self.signals = signals
        self.labels = labels

    def __len__(self):
        return len(self.signals)

    def __getitem__(self, idx):
        return self.signals[idx], self.labels[idx]

# DataLoader
train_dataset = TrainingDataset(X_train_quantized, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [14]:
# Transformer Model (adapted for your task)
class Transformer(nn.Module):
    def __init__(self, num_tokens, d_model, nhead, num_classes, num_layers=2):
        super(Transformer, self).__init__()
        self.embedding = nn.Embedding(num_tokens, d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.classifier = nn.Linear(d_model, num_classes)

    def forward(self, src):
        src = self.embedding(src.long()).mean(dim=2)  # Embed and average the two signals
        src = src.permute(1, 0, 2)  # Reshape for transformer: [seq_len, batch_size, d_model]
        out = self.transformer_encoder(src)
        out = out.mean(dim=0)  # Pooling
        return self.classifier(out)

# Initialize Model
model = Transformer(num_tokens=num_bins, d_model=64, nhead=4, num_classes=num_classes)



Using device: cuda:0


In [17]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model.to(device)

def train_model(model, criterion, optimizer, dataloader, num_epochs=25):
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(dataloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:  # Print every 100 mini-batches
                print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}')
                running_loss = 0.0

train_model(model, criterion, optimizer, train_dataloader, num_epochs=25)

[1, 100] loss: 0.969
[1, 200] loss: 0.897
[1, 300] loss: 0.919
[1, 400] loss: 0.857
[1, 500] loss: 0.892
[1, 600] loss: 0.876
[1, 700] loss: 0.880
[1, 800] loss: 0.865
[1, 900] loss: 0.882
[1, 1000] loss: 0.864
[1, 1100] loss: 0.870
[1, 1200] loss: 0.880
[1, 1300] loss: 0.877
[1, 1400] loss: 0.900
[1, 1500] loss: 0.899
[1, 1600] loss: 0.882
[1, 1700] loss: 0.860
[1, 1800] loss: 0.874
[1, 1900] loss: 0.861
[1, 2000] loss: 0.871
[1, 2100] loss: 0.895
[1, 2200] loss: 0.872
[2, 100] loss: 0.893
[2, 200] loss: 0.886
[2, 300] loss: 0.857
[2, 400] loss: 0.870
[2, 500] loss: 0.875
[2, 600] loss: 0.873
[2, 700] loss: 0.852
[2, 800] loss: 0.883
[2, 900] loss: 0.877
[2, 1000] loss: 0.895
[2, 1100] loss: 0.882
[2, 1200] loss: 0.875
[2, 1300] loss: 0.889
[2, 1400] loss: 0.880
[2, 1500] loss: 0.862
[2, 1600] loss: 0.882
[2, 1700] loss: 0.858
[2, 1800] loss: 0.871
[2, 1900] loss: 0.866
[2, 2000] loss: 0.858
[2, 2100] loss: 0.874
[2, 2200] loss: 0.864
[3, 100] loss: 0.884
[3, 200] loss: 0.862
[3, 300]