In [4]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

In [6]:
train_data = np.load('ae_encoded_train_with_labels.npy')
test_data = np.load('ae_encoded_test_with_labels.npy')

X_train = torch.tensor(train_data[:, :-1], dtype=torch.float32)
y_train = torch.tensor(train_data[:, -1], dtype=torch.long)

X_test = torch.tensor(test_data[:, :-1], dtype=torch.float32)
y_test = torch.tensor(test_data[:, -1], dtype=torch.long)



In [7]:
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

batch_size = 8192
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

## Various Classifier Architecture Tested

In [12]:
import torch.nn as nn
import torch.nn.functional as F

class CNN1DClassifier(nn.Module):
    def __init__(self, num_features, num_classes):
        super(CNN1DClassifier, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * num_features, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add a channel dimension
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [93]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class attention_mlp(nn.Module):
    def __init__(self, num_features, num_classes, embedding_dim=128, num_heads=4):
        super(attention_mlp, self).__init__()
        self.fc1 = nn.Linear(num_features, embedding_dim)
        self.num_heads = num_heads
        self.embedding_dim = embedding_dim 
        self.attention = nn.MultiheadAttention(embed_dim=self.embedding_dim, num_heads=self.num_heads)
        
        self.fc2 = nn.Linear(embedding_dim, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = x.view(-1, x.size(0), self.embedding_dim)

        attn_output, _ = self.attention(x, x, x)
        x = attn_output.mean(dim=0)

        # Apply second MLP layer
        x = self.dropout(x)
        x = self.fc2(x)

        return x


In [97]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class attention_mlp(nn.Module):
    def __init__(self, num_features, num_classes, embedding_dim=128, num_heads=4):
        super(attention_mlp, self).__init__()
        
        self.fc1 = nn.Linear(num_features, embedding_dim)
        
        self.num_heads = num_heads
        self.embedding_dim = embedding_dim
        self.attention = nn.MultiheadAttention(embed_dim=self.embedding_dim, num_heads=self.num_heads)
        
        self.fc2 = nn.Linear(embedding_dim, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = x.view(-1, x.size(0), self.embedding_dim)
        attn_output, _ = self.attention(x, x, x)
        x = attn_output.mean(dim=0)

        x = self.dropout(x)
        x = self.fc2(x)
        x = torch.sigmoid(x)

        return x


In [17]:
class CNN1DClassifier(nn.Module):
    def __init__(self, num_features, num_classes):
        super(CNN1DClassifier, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2))
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2))
        
        self.layer3 = nn.Sequential(
            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2))
        
        self.fc1 = nn.Linear(64 * (num_features // 8), 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [24]:
class MLPMixerLayer(nn.Module):
    def __init__(self, num_patches, hidden_dim, tokens_mlp_dim, channels_mlp_dim):
        super().__init__()
        self.norm1 = nn.LayerNorm(hidden_dim)
        self.token_mixing = nn.Linear(num_patches, tokens_mlp_dim)
        self.token_out = nn.Linear(tokens_mlp_dim, num_patches)
        self.norm2 = nn.LayerNorm(hidden_dim)
        self.channel_mixing = nn.Linear(hidden_dim, channels_mlp_dim)
        self.channel_out = nn.Linear(channels_mlp_dim, hidden_dim)

    def forward(self, x):
        y = self.norm1(x).transpose(1, 2)
        y = F.relu(self.token_mixing(y))
        y = self.token_out(y).transpose(1, 2) 
        x = x + y

        y = self.norm2(x)
        y = F.relu(self.channel_mixing(y))
        y = self.channel_out(y)
        x = x + y

        return x

class VectorMLPMixer(nn.Module):
    def __init__(self, num_patches, num_classes, num_layers, hidden_dim, tokens_mlp_dim, channels_mlp_dim):
        super().__init__()

        self.patch_embedding = nn.Linear(num_patches, hidden_dim)
        self.mixer_layers = nn.ModuleList([])
        for _ in range(num_layers):
            layer = MLPMixerLayer(num_patches, hidden_dim, tokens_mlp_dim, channels_mlp_dim)
            self.mixer_layers.append(layer)

        self.layer_norm = nn.LayerNorm(hidden_dim)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = self.patch_embedding(x) 
        x = x.unsqueeze(2)
        for layer in self.mixer_layers:
            x = layer(x)

        x = x.squeeze(2) 
        x = self.layer_norm(x)
        x = x.mean(dim=1)
        x = self.fc(x)
        return x



model = VectorMLPMixer(
    num_patches=128, 
    num_classes=2,    
    num_layers=8,    
    hidden_dim=128,
    tokens_mlp_dim=256, 
    channels_mlp_dim=2048
)



### Final choice: an MLP network with minimal parameter size that still does good on the test set

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class MLPClassifier(nn.Module):
    def __init__(self, num_features, num_classes):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(num_features, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        x = torch.sigmoid(x)
        return x

In [8]:
device = 'cpu'
print(device)
num_classes = 2
num_features = X_train.shape[1]

model = MLPClassifier(num_features, num_classes).to(device)



cpu


In [9]:
pytorch_total_params = sum(p.numel() for p in model.parameters())

In [10]:
pytorch_total_params

24898

In [111]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


In [112]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    train_loader_with_progress = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Training]', unit='batch')
    
    for inputs, labels in train_loader_with_progress:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)


        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_loader_with_progress.set_postfix(loss=loss.item())

    avg_train_loss = train_loss / len(train_loader)

    # Evaluation phase
    model.eval()
    total_correct = 0
    total_samples = 0
    test_loader_with_progress = tqdm(test_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Testing]', unit='batch')
    
    with torch.no_grad():
        for inputs, labels in test_loader_with_progress:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()

    test_accuracy = total_correct / total_samples
    test_loader_with_progress.set_postfix(accuracy=f'{test_accuracy:.2f}')

    print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

Epoch 1/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.77batch/s, loss=0.5]  
Epoch 1/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 29.55batch/s]


Epoch 1, Train Loss: 0.5433, Test Accuracy: 0.8162


Epoch 2/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.78batch/s, loss=0.487]
Epoch 2/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 30.43batch/s]


Epoch 2, Train Loss: 0.4900, Test Accuracy: 0.8167


Epoch 3/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.87batch/s, loss=0.485]
Epoch 3/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 31.43batch/s]


Epoch 3, Train Loss: 0.4889, Test Accuracy: 0.8164


Epoch 4/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.66batch/s, loss=0.489]
Epoch 4/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 30.41batch/s]


Epoch 4, Train Loss: 0.4880, Test Accuracy: 0.8169


Epoch 5/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.51batch/s, loss=0.482]
Epoch 5/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 30.38batch/s]


Epoch 5, Train Loss: 0.4872, Test Accuracy: 0.8147


Epoch 6/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.93batch/s, loss=0.498]
Epoch 6/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 29.81batch/s]


Epoch 6, Train Loss: 0.4866, Test Accuracy: 0.8172


Epoch 7/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.85batch/s, loss=0.494]
Epoch 7/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 30.25batch/s]


Epoch 7, Train Loss: 0.4863, Test Accuracy: 0.8150


Epoch 8/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.57batch/s, loss=0.498]
Epoch 8/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 28.86batch/s]


Epoch 8, Train Loss: 0.4862, Test Accuracy: 0.8173


Epoch 9/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.84batch/s, loss=0.494]
Epoch 9/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 30.32batch/s]


Epoch 9, Train Loss: 0.4859, Test Accuracy: 0.8167


Epoch 10/10 [Training]: 100%|██████████| 157/157 [00:07<00:00, 21.77batch/s, loss=0.477]
Epoch 10/10 [Testing]: 100%|██████████| 40/40 [00:01<00:00, 30.41batch/s]

Epoch 10, Train Loss: 0.4858, Test Accuracy: 0.8173





## Model Testing

In [108]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test dataset: {100 * correct / total}%')


Accuracy of the model on the test dataset: 81.72125%
