In [26]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from glob import glob
import torch
import torch.nn as nn
import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.functional as F


In [31]:
from sklearn.metrics import classification_report

In [2]:
class CustomCSVDataset(Dataset):
    def __init__(self, root_dir):
        """
        Args:
            root_dir (string): Directory with all the data subdirectories.
        """
        self.data = []
        self.labels = []
        for label, folder_name in enumerate(['0', '1']):
            folder_path = os.path.join(root_dir, folder_name)
            for file_name in os.listdir(folder_path):
                if file_name.endswith('.csv'):
                    file_path = os.path.join(folder_path, file_name)
                    csv_data = pd.read_csv(file_path)
                    csv_data = csv_data.drop('label', axis=1)
                    csv_data = csv_data.values
                    self.data.append(torch.tensor(csv_data, dtype=torch.float32))
                    self.labels.append(label)
    def __len__(self):
        return len(self.data)
 
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [3]:
class SimpleClassifier(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 2)  # Assuming binary classification (2 classes)
 
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [21]:

class TransformerClassifier(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_heads=2, num_layers=1):
        super(TransformerClassifier, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.fc2 = nn.Linear(hidden_size, 2)  # Binary classification

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = x.unsqueeze(0)  # Add batch dimension for transformer input
        x = self.transformer_encoder(x)
        x = x.squeeze(0)  # Remove batch dimension
        x = self.fc2(x)
        return x

In [4]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.view(inputs.size(0), -1), labels  # Flatten inputs if necessary
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
 

In [5]:
def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.view(inputs.size(0), -1), labels
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total:.2f}%')

In [32]:
def evaluate_model(model, test_loader):
    model.eval()
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.view(inputs.size(0), -1), labels
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(labels.numpy())
            all_predictions.extend(predicted.numpy())
    # Generate and print the classification report
    print(classification_report(all_labels, all_predictions, target_names=['Class 0', 'Class 1']))

In [14]:
root_dir = 'dataset_postprocessing'  # Update with your data path
input_size = 64  # Replace with the actual number of features in your data
dataset = CustomCSVDataset(root_dir)
train_loader = DataLoader(dataset, batch_size=2, shuffle=True)
test_loader = DataLoader(dataset, batch_size=2, shuffle=False)

In [40]:
model = SimpleClassifier(input_size=input_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [41]:
model_transformer = TransformerClassifier(input_size=input_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_transformer.parameters(), lr=0.001)

In [42]:
train_model(model, train_loader, criterion, optimizer, num_epochs=20)

Epoch [1/20], Loss: 0.6578
Epoch [2/20], Loss: 0.2210
Epoch [3/20], Loss: 0.7951
Epoch [4/20], Loss: 0.2753
Epoch [5/20], Loss: 1.0259
Epoch [6/20], Loss: 5.7062
Epoch [7/20], Loss: 0.7935
Epoch [8/20], Loss: 5.9031
Epoch [9/20], Loss: 3.5541
Epoch [10/20], Loss: 0.0002
Epoch [11/20], Loss: 0.7089
Epoch [12/20], Loss: 5.6380
Epoch [13/20], Loss: 0.7832
Epoch [14/20], Loss: 1.7740
Epoch [15/20], Loss: 3.2357
Epoch [16/20], Loss: 0.1044
Epoch [17/20], Loss: 0.0249
Epoch [18/20], Loss: 1.0187
Epoch [19/20], Loss: 5.0686
Epoch [20/20], Loss: 0.0320


In [43]:
train_model(model_transformer, train_loader, criterion, optimizer, num_epochs=20)

Epoch [1/20], Loss: 0.4593
Epoch [2/20], Loss: 0.0966
Epoch [3/20], Loss: 0.0179
Epoch [4/20], Loss: 0.1933
Epoch [5/20], Loss: 0.0157
Epoch [6/20], Loss: 0.0153
Epoch [7/20], Loss: 0.0079
Epoch [8/20], Loss: 0.0069
Epoch [9/20], Loss: 0.2230
Epoch [10/20], Loss: 0.1131
Epoch [11/20], Loss: 0.0109
Epoch [12/20], Loss: 0.0071
Epoch [13/20], Loss: 0.1959
Epoch [14/20], Loss: 0.0101
Epoch [15/20], Loss: 0.0070
Epoch [16/20], Loss: 0.0076
Epoch [17/20], Loss: 0.3940
Epoch [18/20], Loss: 0.1287
Epoch [19/20], Loss: 0.0060
Epoch [20/20], Loss: 0.3424


In [44]:
evaluate_model(model_transformer, train_loader)

              precision    recall  f1-score   support

     Class 0       1.00      0.95      0.97        37
     Class 1       0.88      1.00      0.94        15

    accuracy                           0.96        52
   macro avg       0.94      0.97      0.95        52
weighted avg       0.97      0.96      0.96        52



In [33]:
evaluate_model(model, test_loader)

              precision    recall  f1-score   support

     Class 0       0.93      1.00      0.96        37
     Class 1       1.00      0.80      0.89        15

    accuracy                           0.94        52
   macro avg       0.96      0.90      0.92        52
weighted avg       0.95      0.94      0.94        52

