In [51]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, f1_score



In [52]:
df = pd.read_csv('../resources/linux/log-structured/Linux.log_sequences.csv')

In [53]:
X = df.iloc[:, 1:-1].values
y = df["Severity"].values
print(X.shape, y.shape)

(225, 113) (225,)


In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [55]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [56]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


In [57]:
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)  # one output neuron

    def forward(self, x):
        return torch.sigmoid(self.linear(x))  # logistic (sigmoid) activation

model = LogisticRegressionModel(input_dim=X_train.shape[1])


In [58]:
criterion = nn.BCELoss()  # binary cross-entropy
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [59]:
num_epochs = 1000

for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print progress
    if (epoch+1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


Epoch [100/1000], Loss: 0.2024
Epoch [200/1000], Loss: 0.1543
Epoch [300/1000], Loss: 0.1322
Epoch [400/1000], Loss: 0.1183
Epoch [500/1000], Loss: 0.1083
Epoch [600/1000], Loss: 0.1005
Epoch [700/1000], Loss: 0.0943
Epoch [800/1000], Loss: 0.0890
Epoch [900/1000], Loss: 0.0845
Epoch [1000/1000], Loss: 0.0807


In [60]:
with torch.no_grad():  # disables gradient computation
    y_pred_probs = model(X_test)            # predicted probabilities, shape [N,1]
    y_pred = (y_pred_probs > 0.5).float()   # convert probabilities to 0/1 labels


In [61]:
y_pred_np = y_pred.numpy().flatten()  # flatten to 1D
y_test_np = y_test.numpy().flatten()

accuracy = accuracy_score(y_test_np, y_pred_np)
recall = recall_score(y_test_np, y_pred_np)
f1 = f1_score(y_test_np, y_pred_np)

print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Recall: {recall*100:.2f}%")
print(f"F1 Score: {f1:.2f}")


Accuracy: 100.00%
Recall: 100.00%
F1 Score: 1.00
