In [1]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from collections import Counter

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Sample data
with open('../geld labeled data/logs2ts_out4_combined.json', 'r') as f:
    logs = json.load(f)
slices = [item[0] for item in logs]
labels = [0 if item[1] == 'transient' else 1 for item in logs] # 0=transient, 1=non-transient
partition_idx = int(len(slices) * 0.80)
x_train = torch.tensor(np.array(slices[0:partition_idx]), dtype=torch.float32)
y_train = torch.tensor(np.array(labels[0:partition_idx]), dtype=torch.int)
x_test = torch.tensor(np.array(slices[partition_idx + 1:]), dtype=torch.float32)
y_test = torch.tensor(np.array(labels[partition_idx + 1:]), dtype=torch.int)

In [2]:
# Data stats

# train label distribution
unique, counts = np.unique(y_train, return_counts=True)
total = counts.sum()
print('train', unique, counts, [v / total for i, v in enumerate(counts)])

# test label distribution
unique, counts = np.unique(y_test, return_counts=True)
total = counts.sum()
print('test', unique, counts, [v / total for i, v in enumerate(counts)])

train [0 1] [5835 6132] [0.48759087490599146, 0.5124091250940085]
test [0 1] [1319 1672] [0.44098963557338683, 0.5590103644266132]


In [3]:
# Define the LSTM network
class LSTMNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMNet, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])  # Get the last time step output
        out = self.sigmoid(out)
        
        return out

# Hyperparameters
input_size = 46       # Number of features
hidden_size = 75     # Number of hidden units
num_layers = 2       # Number of LSTM layers
num_classes = 1      # Number of output classes (for binary classification)
num_epochs = 25      # Number of epochs
batch_size = 256      # Batch size
learning_rate = 0.001  # Learning rate

# Create DataLoader
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model, loss function, and optimizer
model = LSTMNet(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    
    for i, (sequences, labels) in enumerate(train_loader):
        sequences = sequences.to(device)
        labels = labels.to(device).float()

        # Forward pass
        outputs = model(sequences)
        loss = criterion(outputs.squeeze(), labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')

print('Training finished.')

Epoch [1/25], Step [47/47], Loss: 0.2608
Epoch [2/25], Step [47/47], Loss: 0.0702
Epoch [3/25], Step [47/47], Loss: 0.0085
Epoch [4/25], Step [47/47], Loss: 0.0540
Epoch [5/25], Step [47/47], Loss: 0.0127
Epoch [6/25], Step [47/47], Loss: 0.0025
Epoch [7/25], Step [47/47], Loss: 0.0028
Epoch [8/25], Step [47/47], Loss: 0.0008
Epoch [9/25], Step [47/47], Loss: 0.0007
Epoch [10/25], Step [47/47], Loss: 0.0004
Epoch [11/25], Step [47/47], Loss: 0.0090
Epoch [12/25], Step [47/47], Loss: 0.0025
Epoch [13/25], Step [47/47], Loss: 0.0002
Epoch [14/25], Step [47/47], Loss: 0.0004
Epoch [15/25], Step [47/47], Loss: 0.0002
Epoch [16/25], Step [47/47], Loss: 0.0002
Epoch [17/25], Step [47/47], Loss: 0.0004
Epoch [18/25], Step [47/47], Loss: 0.0002
Epoch [19/25], Step [47/47], Loss: 0.0001
Epoch [20/25], Step [47/47], Loss: 0.0001
Epoch [21/25], Step [47/47], Loss: 0.0001
Epoch [22/25], Step [47/47], Loss: 0.0001
Epoch [23/25], Step [47/47], Loss: 0.0001
Epoch [24/25], Step [47/47], Loss: 0.0001
E

In [4]:
model.eval()  # Set the model to evaluation mode
correct = 0

# Create DataLoader
test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

# Testing phase
with torch.no_grad():
    
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)

        # Convert outputs to predicted class labels
        predicted = (outputs > 0.5).float()  # Assuming sigmoid threshold of 0.5 for binary classification
        predicted_labels = predicted.squeeze().cpu().numpy()
        true_labels = labels.squeeze().cpu().numpy()

        for i in range(len(predicted_labels)):
            if predicted_labels[i] == true_labels[i]:
                correct += 1


print('\nAccuracy: {}/{} ({:.2f}%)\n'.format(
    correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))



Accuracy: 2292/2991 (76.63%)

