In [1]:
import os, os.path
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
import dataloader
import numpy as np
import matplotlib
matplotlib.use('AGG')
import matplotlib.pyplot as plt

In [2]:
log_path = 'logs/HDFS/structured/HDFS.log_structured.csv'
label_path = 'logs/HDFS/anomaly_label.csv'
template_path = 'logs/HDFS/structured/HDFS.log_templates.csv'

x_train, y_train, x_test, y_test = dataloader.load_HDFS(
    log_file = log_path, 
    label_file = label_path, 
    template_file = template_path,
    train_ratio=0.7,
    save_csv=False)


num_val = x_train.shape[0] // 10
num_train = x_train.shape[0] - num_val

x_val = x_train[:num_val]
y_val = y_train[:num_val]
x_train = x_train[num_val:]
y_train = y_train[num_val:]

num_test = x_test.shape[0]
num_total = num_train + num_val + num_test 

num_train_pos = sum(y_train)
num_val_pos = sum(y_val)
num_test_pos = sum(y_val)
num_pos = num_train_pos + num_val_pos + num_test_pos

print('Total: {} instances, {} anomaly, {} normal' \
      .format(num_total, num_pos, num_total - num_pos))
print('Train: {} instances, {} anomaly, {} normal' \
      .format(num_train, num_train_pos, num_train - num_train_pos))
print('Validation: {} instances, {} anomaly, {} normal' \
      .format(num_val, num_val_pos, num_val - num_val_pos))
print('Test: {} instances, {} anomaly, {} normal\n' \
      .format(num_test, num_test_pos, num_test - num_test_pos))

Total: 575061 instances, 14468 anomaly, 560593 normal
Train: 362288 instances, 11762 anomaly, 350526 normal
Validation: 40254 instances, 1353 anomaly, 38901 normal
Test: 172519 instances, 1353 anomaly, 171166 normal



In [3]:
batch_size= 128
lr= 0.001
num_epochs= 20
max_length = x_train.shape[1]

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
import torch.nn.functional as F

from models import Transformer

In [5]:
x_train_tensor = torch.Tensor(x_train[:2000])
y_train_tensor = torch.Tensor(y_train[:2000]).to(torch.int64)
y_train_tensor = F.one_hot(y_train_tensor, num_classes = 2)

train_dataset = TensorDataset(x_train_tensor,y_train_tensor.to(torch.float))
train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True) 

x_val_tensor = torch.Tensor(x_val[:200])
y_val_tensor = torch.Tensor(y_val[:200]).to(torch.int64)
y_val_tensor = F.one_hot(y_val_tensor, num_classes = 2)

val_dataset = TensorDataset(x_val_tensor,y_val_tensor.to(torch.float))
val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle = True)

In [6]:
model = nn.Sequential(
    Transformer(
        in_dim= 1,
        embed_dim= 64, 
        depth= 6,
        heads= 8,
        dim_head= 64,
        dim_ratio= 2,
        dropout= 0.1
    ),
    nn.Linear(max_length*64, 100),
    nn.ReLU(),
    nn.Linear(100, 2),
    nn.Softmax()
)

model = nn.DataParallel(model) # multi-GPU

# if torch.cuda.is_available():
#     model.cuda()

device = torch.device("mps")
# Loss and optimizer
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

# Train the model
loss_min = 99999
model_name = 'best_model.pth'
model_path = "saved_models"

save_path = os.path.join(model_path,model_name)
best_model = model
train_loss_list = []
val_loss_list = []

In [7]:
print("Begin training ......")
for epoch in range(1, num_epochs+1):  # Loop over the dataset multiple times
    train_loss = 0
    val_loss = 0

    # Training
    for step, (seq, label) in enumerate(train_dataloader):
        seq = seq.clone().detach().view(-1, max_length, 1).to(device)
        output = model(seq)
        loss = criterion(output, label.to(device))
        optimizer.zero_grad()
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    
    ave_trainloss = train_loss / len(train_dataloader)
    train_loss_list.append(ave_trainloss)

    # Vaildating
    with torch.no_grad():    
        for step, (seq, label) in enumerate(val_dataloader):
            seq = seq.clone().detach().view(-1, max_length, 1).to(device)
            output = model(seq)
            loss = criterion(output, label.to(device))
            val_loss += loss.item()
    
    ave_valoss = val_loss / len(val_dataloader)
    val_loss_list.append(ave_valoss)

    if ave_valoss < loss_min:
        loss_min = ave_valoss
        torch.save(model.state_dict(), save_path)
        best_model = model
        print("Model saved")

    print('Epoch [{}/{}], train_loss: {:.14f} val loss: {:.14f}'.format(epoch, num_epochs, ave_trainloss, ave_valoss))

print(f"Finished training, model saved in: {save_path} ")


Begin training ......


  input = module(input)


Model saved
Epoch [1/20], train_loss: 0.65725016593933 val loss: 0.70012092590332
Model saved
Epoch [2/20], train_loss: 0.41565077565610 val loss: 0.65808597207069
Model saved
Epoch [3/20], train_loss: 0.37034244649112 val loss: 0.63276532292366
Model saved
Epoch [4/20], train_loss: 0.36295234039426 val loss: 0.62393811345100
Model saved
Epoch [5/20], train_loss: 0.36322812922299 val loss: 0.61715680360794
Epoch [6/20], train_loss: 0.68629427440464 val loss: 0.70092982053757
Epoch [7/20], train_loss: 0.69581278041005 val loss: 0.69004508852959
Epoch [8/20], train_loss: 0.68510353937745 val loss: 0.67947977781296
Epoch [9/20], train_loss: 0.67494423314929 val loss: 0.66956299543381
Epoch [10/20], train_loss: 0.66556967422366 val loss: 0.66093289852142
Epoch [11/20], train_loss: 0.65664117038250 val loss: 0.65176650881767
Epoch [12/20], train_loss: 0.64816820621490 val loss: 0.64310741424561
Epoch [13/20], train_loss: 0.64012734964490 val loss: 0.63619416952133
Epoch [14/20], train_loss:

In [8]:
xx = range(num_epochs)
plt.plot(xx, train_loss_list, label = "Train")
plt.plot(xx, val_loss_list, label = "Val")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
plt.savefig("loss.png")

  plt.show()
