In [1]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import TensorDataset, DataLoader
import argparse
import os
from tqdm import tqdm

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
def generate(name):
    num_sessions = 0
    inputs = []
    outputs = []
    with open(name, 'r') as f:
        for line in tqdm(f,"loading data"):
            num_sessions += 1
            line = tuple(map(lambda n: n - 1, map(int, line.strip().split())))
            for i in range(len(line) - window_size):
                inputs.append(line[i:i + window_size])
                outputs.append(line[i + window_size])
    print('Number of sessions({}): {}'.format(name, num_sessions))
    print('Number of seqs({}): {}'.format(name, len(inputs)))
    dataset = TensorDataset(torch.tensor(inputs, dtype=torch.float), torch.tensor(outputs))
    return dataset


class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_keys):
        super(Model, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_keys)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [7]:
# Hyperparameters
num_classes = 28
num_epochs = 300
batch_size = 2048
input_size = 1
model_dir = 'model'
log = 'Adam_batch_size={}_epoch={}'.format(str(batch_size), str(num_epochs))
num_layers = 2
hidden_size = 64
window_size = 10
file_dir = 'data/'

In [4]:
model = Model(input_size, hidden_size, num_layers, num_classes).to(device)
seq_dataset = generate(file_dir+'hdfs_train')
dataloader = DataLoader(seq_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

loading data: 4855it [00:00, 8472.72it/s] 


Number of sessions(data/hdfs_train): 4855
Number of seqs(data/hdfs_train): 46575


In [5]:
writer = SummaryWriter(log_dir='log/' + log)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
# Train the model
total_step = len(dataloader)
current_epoch = 0


In [7]:
num_epochs = 100

In [12]:
device

device(type='cuda')

In [13]:
start_time = time.time()

for epoch in range(current_epoch,current_epoch+num_epochs):  # Loop over the dataset multiple times
    train_loss = 0
    for step, (seq, label) in enumerate(dataloader):
        # Forward pass
        seq = seq.clone().detach().view(-1, window_size, input_size).to(device)
        output = model(seq)
        loss = criterion(output, label.to(device))

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        writer.add_graph(model, seq)
    print('Epoch [{}/{}], train_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / total_step))
    writer.add_scalar('train_loss', train_loss / total_step, epoch + 1)
elapsed_time = time.time() - start_time
print('elapsed_time: {:.3f}s'.format(elapsed_time))

Epoch [1/100], train_loss: 0.2306
Epoch [2/100], train_loss: 0.2307
Epoch [3/100], train_loss: 0.2303


RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 524288 bytes. Buy new RAM!


In [17]:
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
torch.save(model.state_dict(), model_dir + '/' + log + '.pt')
writer.close()
print('Finished Training')

Finished Training


In [4]:
def generate_test_data(name):
    hdfs = set()
    # hdfs = []
    with open('data/small_dataset/' + name, 'r') as f:
        for ln in f.readlines():
            ln = list(map(lambda n: n - 1, map(int, ln.strip().split())))
            ln = ln + [-1] * (window_size + 1 - len(ln))
            hdfs.add(tuple(ln))
            # hdfs.append(tuple(ln))
    print('Number of sessions({}): {}'.format(name, len(hdfs)))
    return hdfs

In [8]:
model = Model(input_size, hidden_size, num_layers, num_classes)
model.load_state_dict(torch.load(model_dir + '/' + log + '.pt'))
model.to(device)

Model(
  (lstm): LSTM(1, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=28, bias=True)
)

In [9]:
model.eval()
# print('model_path: {}'.format(model_path))
test_normal_loader = generate_test_data('hdfs_test_normal')
# test_abnormal_loader = generate_test_data('hdfs_test_abnormal')

Number of sessions(hdfs_test_normal): 296


In [11]:
pattern = torch.FloatTensor(list(test_normal_loader)[0])
pattern = pattern.view(1,-1)

In [26]:
for window_size in range(1,11):
    test_data = pattern[:,:window_size]
    start = test_data.clone().detach().view(-1, window_size, input_size).to(device)
    output = model(start).cpu()
    predicted = torch.argsort(output, 1)[:,-5:]
    print('window size:'+ str(window_size))
    print('seq: '+str(test_data.numpy()),end=' ')
    print('expected num: '+str(pattern.numpy()[0][window_size]))
    print('prediction:' + str(predicted.numpy()))
    if pattern[0][window_size] in predicted[0]:
        print('correct:'+str(5-list(predicted[0].numpy()).index(pattern[0][window_size])))
    else:
        print('wrong')
    print()

window size:1
seq: [[21.]] expected num: 4.0
prediction:[[17  4 10 24 25]]
correct:4

window size:2
seq: [[21.  4.]] expected num: 4.0
prediction:[[10 25  5  8  4]]
correct:1

window size:3
seq: [[21.  4.  4.]] expected num: 4.0
prediction:[[20 10  5  3 25]]
wrong

window size:4
seq: [[21.  4.  4.  4.]] expected num: 25.0
prediction:[[10 20  2  3 25]]
correct:1

window size:5
seq: [[21.  4.  4.  4. 25.]] expected num: 10.0
prediction:[[24  1 20 25 22]]
wrong

window size:6
seq: [[21.  4.  4.  4. 25. 10.]] expected num: 8.0
prediction:[[ 3 20 24  4  8]]
correct:1

window size:7
seq: [[21.  4.  4.  4. 25. 10.  8.]] expected num: 10.0
prediction:[[ 3 20 25 10  8]]
correct:2

window size:8
seq: [[21.  4.  4.  4. 25. 10.  8. 10.]] expected num: 8.0
prediction:[[ 3 20 10 25  8]]
correct:1

window size:9
seq: [[21.  4.  4.  4. 25. 10.  8. 10.  8.]] expected num: 25.0
prediction:[[ 1  3  8 10 25]]
correct:1

window size:10
seq: [[21.  4.  4.  4. 25. 10.  8. 10.  8. 25.]] expected num: 25.0
pre

In [22]:
TP = 0
FP = 0
num_candidates = 5
# Test the model
start_time = time.time()
with torch.no_grad():
    for line in test_normal_loader:
        for i in range(len(line) - window_size):
            seq = line[i:i + window_size]
            label = line[i + window_size]
            seq = torch.tensor(seq, dtype=torch.float).view(-1, window_size, input_size).to(device)
            label = torch.tensor(label).view(-1).to(device)
            output = model(seq)
            predicted = torch.argsort(output, 1)[0][-num_candidates:]
            if label not in predicted:
                FP += 1
                break
with torch.no_grad():
    for line in test_abnormal_loader:
        for i in range(len(line) - window_size):
            seq = line[i:i + window_size]
            label = line[i + window_size]
            seq = torch.tensor(seq, dtype=torch.float).view(-1, window_size, input_size).to(device)
            label = torch.tensor(label).view(-1).to(device)
            output = model(seq)
            predicted = torch.argsort(output, 1)[0][-num_candidates:]
            if label not in predicted:
                TP += 1
                break
elapsed_time = time.time() - start_time
print('elapsed_time: {:.3f}s'.format(elapsed_time))
# Compute precision, recall and F1-measure
FN = len(test_abnormal_loader) - TP
P = 100 * TP / (TP + FP)
R = 100 * TP / (TP + FN)
F1 = 2 * P * R / (P + R)
print('false positive (FP): {}, false negative (FN): {}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(FP, FN, P, R, F1))
print('Finished Predicting')

elapsed_time: 4.225s
false positive (FP): 4, false negative (FN): 58, Precision: 88.571%, Recall: 34.831%, F1-measure: 50.000%
Finished Predicting
