In [6]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import TensorDataset, DataLoader
import argparse
import os
from tqdm import tqdm

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 训练阶段

In [2]:
from utils.data_utils import *
import unsupervised.model as unsupervised_model

In [7]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_keys):
        super(Model, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_keys)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, :, :])
        return out
def train(model,dataloader,current_epoch=0,num_epochs=10):
    start_time = time.time()
    for epoch in range(current_epoch,current_epoch+num_epochs):  # Loop over the dataset multiple times
        train_loss = 0
        for step, (seq, label) in enumerate(dataloader):
            # Forward pass
            seq = seq.clone().detach().view(-1, window_size, input_size).to(device)
            label1= seq[:,1:,:].cpu().long()
            label2 = label.view(-1,1,1)
            label = torch.cat([label1,label2],1).view(-1,window_size)
            label = label.reshape(label.size(0)*label.size(1))
            output = model(seq)
            output = output.reshape(output.size(0)*output.size(1),-1)
            loss = criterion(output, label.to(device))

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            train_loss += loss.item()
            optimizer.step()
            writer.add_graph(model, seq)
        print('Epoch [{}/{}], train_loss: {:.4f}'.format(epoch + 1, current_epoch+num_epochs, train_loss / total_step))
        writer.add_scalar('train_loss', train_loss / total_step, epoch + 1)
    elapsed_time = time.time() - start_time
    print('elapsed_time: {:.3f}s'.format(elapsed_time))

In [8]:
# Hyperparameters
num_classes = 31
num_epochs = 300
batch_size = 2048
input_size = 1
model_dir = 'model'
log = 'dev_v1.2_batch_size={}_epoch={}'.format(str(batch_size), str(num_epochs))
num_layers = 2
hidden_size = 64
window_size = 10
file_dir = '.\\data\\lstm\\dataset_official\\'

In [9]:
model = Model(input_size, hidden_size, num_layers, num_classes).to(device)
if os.path.exists(model_dir + '/' + log + '.pt'):
    model.load_state_dict(torch.load(model_dir + '/' + log + '.pt'))
    print("成功加载模型"+model_dir + '/' + log + '.pt')
else:
    print("重新训练")

成功加载模型model/dev_v1.2_batch_size=2048_epoch=300.pt


In [14]:
train_dataset = generate_train_data(file_dir+'train.csv')
dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
writer = SummaryWriter(log_dir='log/' + log)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
# Train the model
total_step = len(dataloader)

FileNotFoundError: [Errno 2] File datatrain.csv does not exist: 'datatrain.csv'

In [7]:
# model = Model(input_size, hidden_size, num_layers, num_classes).to(device)
# if os.path.exists(model_dir + '/' + log + '.pt'):
#     model.load_state_dict(torch.load(model_dir + '/' + log + '.pt'))
#     print("成功加载模型"+model_dir + '/' + log + '.pt')
# else:
#     print("重新训练")
model.train()
train(model,dataloader,current_epoch=0,num_epochs=10)
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
torch.save(model.state_dict(), model_dir + '/' + log + '.pt')
writer.close()
print('Finished Training')

Epoch [1/10], train_loss: 0.4202
Epoch [2/10], train_loss: 0.4119
Epoch [3/10], train_loss: 0.4112
Epoch [4/10], train_loss: 0.4108
Epoch [5/10], train_loss: 0.4105
Epoch [6/10], train_loss: 0.4103
Epoch [7/10], train_loss: 0.4105
Epoch [8/10], train_loss: 0.4099
Epoch [9/10], train_loss: 0.4099
Epoch [10/10], train_loss: 0.4101
elapsed_time: 84.350s
Finished Training


In [11]:
log

'dev_v1.2_batch_size=2048_epoch=300'

In [8]:
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
torch.save(model.state_dict(), model_dir + '/' + log + '.pt')
writer.close()
print('Finished Training')

Finished Training


### 简单检测一下训练的结果

In [5]:
correct = 0
num_of_seq = 0
for step, (seq, label) in tqdm(enumerate(dataloader),desc="测试对下一标签预测准确率"):
    # Forward pass
    seq = seq.clone().detach().view(-1, window_size, input_size).to(device)
    label1= seq[:,1:,:].cpu().long()
    label2 = label.view(-1,1,1)
    label = torch.cat([label1,label2],1).view(-1,window_size)
    label = label.reshape(label.size(0)*label.size(1))
    output = model(seq)
    output = output.reshape(output.size(0)*output.size(1),-1)
    predicted = torch.argsort(output, 1)[:, -3:].cpu()
    num_of_seq+=len(label)
    for i in range(len(label)):

    #     print(label[i],predicted[i])
        if label[i] in predicted[i]:
    #         print(label[i],predicted[i])
            correct+=1   
#             if label[i] == 30:
#                 print(label[i-10:i+1])
print('对下一标签预测准确率为: '+str(correct/num_of_seq))

NameError: name 'dataloader' is not defined

In [67]:
print(seq.reshape(1,-1)[0][70:140])
print(label.reshape(1,-1)[0][70:140])

tensor([ 0.,  4., 21.,  4.,  4., 10.,  8., 10.,  8., 25.,  8., 25., 25., 25.,
        22., 22., 22., 20., 20., 20.,  0., 21.,  4.,  4.,  4., 25., 25., 10.,
         8., 10., 21.,  4.,  4.,  4., 25., 25., 25., 10.,  8., 10.,  3.,  3.,
         3.,  2., 22., 22., 22., 20., 20., 20.,  8., 10.,  8., 25., 25., 25.,
        22., 22., 22., 20.,  4., 10.,  8., 10.,  8., 25., 25., 10.,  8., 25.],
       device='cuda:0')
tensor([ 4, 21,  4,  4, 10,  8, 10,  8, 25, 25, 25, 25, 25, 22, 22, 22, 20, 20,
        20, 30, 21,  4,  4,  4, 25, 25, 10,  8, 10,  8,  4,  4,  4, 25, 25, 25,
        10,  8, 10,  8,  3,  3,  2, 22, 22, 22, 20, 20, 20, 30, 10,  8, 25, 25,
        25, 22, 22, 22, 20, 20, 10,  8, 10,  8, 25, 25, 10,  8, 25,  1])


### 清理缓存释放空间 

In [7]:
import gc
gc.collect()
torch.cuda.empty_cache()

# 测试阶段

In [7]:
model = Model(input_size, hidden_size, num_layers, num_classes)
model.load_state_dict(torch.load(model_dir + '/' + log + '.pt'))
model.to(device)
model.eval()

Model(
  (lstm): LSTM(1, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=31, bias=True)
)

In [12]:
from unsupervised.predictor import *

In [10]:
model.eval()
batch_size = 20000
window_size = 10
test_normal_session, test_normal_dataset,normal_block = generate_predicted_data(file_dir+'normal.csv',window_size)
normal_dataloader = DataLoader(test_normal_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
test_abnormal_session, test_abnormal_dataset,abnormal_block = generate_predicted_data(file_dir+'abnormal.csv',window_size)
abnormal_dataloader = DataLoader(test_abnormal_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)


.\data\lstm\dataset_official\normal.csv: 100%|████████████████████████████████| 14200/14200 [00:01<00:00, 12600.42it/s]


Number of sessions(.\data\lstm\dataset_official\normal.csv): 14200
Number of seqs(.\data\lstm\dataset_official\normal.csv): 269989


.\data\lstm\dataset_official\abnormal.csv: 100%|████████████████████████████████| 4123/4123 [00:00<00:00, 10008.28it/s]

Number of sessions(.\data\lstm\dataset_official\abnormal.csv): 4123
Number of seqs(.\data\lstm\dataset_official\abnormal.csv): 88410





In [12]:
model

Model(
  (lstm): LSTM(1, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=31, bias=True)
)

In [13]:
test_normal_result,test_abnormal_result = count_metries(model, normal_dataloader, abnormal_dataloader,
                                                        test_normal_session, test_abnormal_session,
                                                        10, window_size,ts=0)

loading data: 14it [01:04,  4.58s/it]
loading data: 5it [00:20,  4.11s/it]

elapsed_time: 84.812s
false positive (FP): 415, false negative (FN): 1561, Precision: 86.060%, Recall: 62.139%, F1-measure: 72.169%
Finished Predicting





In [10]:
batch_size = 10000
window_size = 5
test_normal_session, test_normal_dataset, test_normal_seq,test_normal_label = generate_test_data('hdfs_test_normal',window_size)
normal_dataloader = DataLoader(test_normal_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
test_abnormal_session, test_abnormal_dataset,test_abnormal_seq,test_abnormal_label = generate_test_data('hdfs_test_abnormal',window_size)
abnormal_dataloader = DataLoader(test_abnormal_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

test_normal_result,test_abnormal_result = fast_predict(model,normal_dataloader,abnormal_dataloader,10,window_size)

normal:: 100%|████████████████████████████████████████████████████████████████| 14177/14177 [00:00<00:00, 25271.87it/s]


Number of sessions(hdfs_test_normal): 14177
Number of seqs(hdfs_test_normal): 340455


normal:: 100%|██████████████████████████████████████████████████████████████████| 4123/4123 [00:00<00:00, 27493.86it/s]
normal: 0it [00:00, ?it/s]

Number of sessions(hdfs_test_abnormal): 4123
Number of seqs(hdfs_test_abnormal): 108981


normal: 35it [00:39,  1.12s/it]
abnormal: 11it [00:12,  1.11s/it]

elapsed_time: 51.352s
false positive (FP): 1541, false negative (FN): 359, Precision: 70.952%, Recall: 91.293%, F1-measure: 79.847%
Finished Predicting





### 快速预测

In [9]:
# fast predict
def fast_predict(model,normal_dataloader,abnormal_dataloader,num_candidates=5,window_size=10):
    TP = 0
    FP = 0
    # Test the model
    start_time = time.time()
    test_normal_result = []
    test_abnormal_result = []
    with torch.no_grad():
        result = []
        with torch.no_grad():
            for step, (seq, labels) in tqdm(enumerate(normal_dataloader), desc='normal'):
                seq = seq.clone().detach().view(-1, window_size, input_size).to(device)
                output = model(seq).cpu()

                predicted = torch.argsort(output[:,-1,:], 1)[:,-num_candidates:]
                for i, label in enumerate(labels):
                    if label not in predicted[i]:
                        test_normal_result.append(True)
                    else:
                        test_normal_result.append(False)
    for session in test_normal_session:
        for seq_id in session:
            if test_normal_result[seq_id] == True:
                FP += 1
                break

    with torch.no_grad():
        for step, (seq, labels) in tqdm(enumerate(abnormal_dataloader), desc='abnormal'):
            seq = seq.clone().detach().view(-1, window_size, input_size).to(device)
            output = model(seq).cpu()

            predicted = torch.argsort(output[:,-1,:], 1)[:,-num_candidates:]
            for i, label in enumerate(labels):
                if label not in predicted[i]:
                    test_abnormal_result.append(True)
                else:
                    test_abnormal_result.append(False)
        for session in test_abnormal_session:
            for seq_id in session:
                if test_abnormal_result[seq_id] == True:
                    TP += 1
                    break
    elapsed_time = time.time() - start_time
    print('elapsed_time: {:.3f}s'.format(elapsed_time))
    # Compute precision, recall and F1-measure
    FN = len(test_abnormal_session) - TP
    P = 100 * TP / (TP + FP)
    R = 100 * TP / (TP + FN)
    F1 = 2 * P * R / (P + R)
    print('false positive (FP): {}, false negative (FN): {}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(FP, FN, P, R, F1))
    print('Finished Predicting')
    return test_normal_result,test_abnormal_result

In [69]:
TP = 0
FP = 0
# Test the model
start_time = time.time()
test_normal_result = []
test_abnormal_result = []
with torch.no_grad():
    for step, (seq, labels) in tqdm(enumerate(abnormal_dataloader), desc='abnormal'):
        seq = seq.clone().detach().view(-1, 5, input_size).to(device)
        output = model(seq).cpu()

        predicted = torch.argsort(output[:,-1,:], 1)[:,-5:]
        for i, label in enumerate(labels):
            if label not in predicted[i]:
                test_abnormal_result.append(True)
            else:
                test_abnormal_result.append(False)
    for session in test_abnormal_session:
        for seq_id in session:
            if test_abnormal_result[seq_id] == True:
                TP += 1
                break
elapsed_time = time.time() - start_time
print('elapsed_time: {:.3f}s'.format(elapsed_time))
# Compute precision, recall and F1-measure
FN = len(test_abnormal_session) - TP
P = 100 * TP / (TP + FP)
R = 100 * TP / (TP + FN)
F1 = 2 * P * R / (P + R)
print('false positive (FP): {}, false negative (FN): {}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(FP, FN, P, R, F1))
print('Finished Predicting')



normal: 0it [00:00, ?it/s][A[A

normal: 1it [00:01,  1.50s/it][A[A

normal: 2it [00:03,  1.59s/it][A[A

normal: 3it [00:05,  1.64s/it][A[A

normal: 4it [00:06,  1.60s/it][A[A

normal: 5it [00:08,  1.72s/it][A[A

normal: 6it [00:10,  1.66s/it][A[A

normal: 7it [00:11,  1.67s/it][A[A

normal: 8it [00:13,  1.63s/it][A[A

normal: 9it [00:14,  1.61s/it][A[A

normal: 10it [00:16,  1.69s/it][A[A

RuntimeError: shape '[-1, 5, 1]' is invalid for input of size 53886

In [72]:
seq.shape

torch.Size([8981, 6])

In [70]:
TP

0

In [77]:
fast_predict(model,normal_dataloader,abnormal_dataloader,10,5)




normal: 0it [00:00, ?it/s][A[A[A


normal: 1it [00:01,  1.78s/it][A[A[A


normal: 2it [00:03,  1.65s/it][A[A[A


normal: 3it [00:04,  1.66s/it][A[A[A


normal: 4it [00:06,  1.58s/it][A[A[A


normal: 5it [00:07,  1.60s/it][A[A[A


normal: 6it [00:09,  1.52s/it][A[A[A


normal: 7it [00:10,  1.47s/it][A[A[A


normal: 8it [00:12,  1.55s/it][A[A[A


normal: 9it [00:13,  1.51s/it][A[A[A


normal: 10it [00:15,  1.50s/it][A[A[A


normal: 11it [00:16,  1.54s/it][A[A[A


normal: 12it [00:18,  1.48s/it][A[A[A


normal: 13it [00:19,  1.53s/it][A[A[A


normal: 14it [00:21,  1.48s/it][A[A[A


normal: 15it [00:22,  1.46s/it][A[A[A


normal: 16it [00:24,  1.56s/it][A[A[A


normal: 17it [00:25,  1.53s/it][A[A[A


normal: 18it [00:27,  1.64s/it][A[A[A


normal: 19it [00:29,  1.63s/it][A[A[A


normal: 20it [00:30,  1.61s/it][A[A[A


normal: 21it [00:32,  1.73s/it][A[A[A


normal: 22it [00:34,  1.73s/it][A[A[A


normal: 23it [00:36,  

elapsed_time: 69.522s
false positive (FP): 1541, false negative (FN): 359, Precision: 70.952%, Recall: 91.293%, F1-measure: 79.847%
Finished Predicting





([False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  True,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  

In [78]:
test_normal_result,test_abnormal_result = fast_predict(model,normal_dataloader,abnormal_dataloader,10,5)




normal: 0it [00:00, ?it/s][A[A[A


normal: 1it [00:02,  2.16s/it][A[A[A


normal: 2it [00:04,  2.22s/it][A[A[A


normal: 3it [00:06,  2.06s/it][A[A[A


normal: 4it [00:07,  1.88s/it][A[A[A


normal: 5it [00:09,  1.89s/it][A[A[A


normal: 6it [00:10,  1.74s/it][A[A[A


normal: 7it [00:12,  1.64s/it][A[A[A


normal: 8it [00:13,  1.63s/it][A[A[A


normal: 9it [00:15,  1.56s/it][A[A[A


normal: 10it [00:16,  1.57s/it][A[A[A


normal: 11it [00:18,  1.58s/it][A[A[A


normal: 12it [00:19,  1.52s/it][A[A[A


normal: 13it [00:21,  1.55s/it][A[A[A


normal: 14it [00:22,  1.50s/it][A[A[A


normal: 15it [00:24,  1.46s/it][A[A[A


normal: 16it [00:26,  1.52s/it][A[A[A


normal: 17it [00:27,  1.48s/it][A[A[A


normal: 18it [00:29,  1.53s/it][A[A[A


normal: 19it [00:30,  1.48s/it][A[A[A


normal: 20it [00:31,  1.46s/it][A[A[A


normal: 21it [00:33,  1.51s/it][A[A[A


normal: 22it [00:34,  1.48s/it][A[A[A


normal: 23it [00:36,  

elapsed_time: 68.889s
false positive (FP): 1541, false negative (FN): 359, Precision: 70.952%, Recall: 91.293%, F1-measure: 79.847%
Finished Predicting





In [46]:
test_normal_seq[FP_result[0]]

(3, 4, 3, 4, 3, 23, 23, 23, 21, 21, 20)

In [51]:
for i in FP_result[10:25]:
    seq = test_normal_seq[i]
    t = torch.FloatTensor(seq[:-1]).reshape(1,-1)
    max_len = 60
    pattern = set()
    t,predicted,output = generate_seq(t,1,10,1)
    prob = softmax(output)
    print(seq)
#     print(t.int().cpu().numpy()[0])
    print("预测的序号排序:",end=' ')
    print(predicted)
    print("对应的可能性:",end=' ')
    print(prob[predicted])
    print()
    pattern.add(tuple(t.int().cpu().numpy()[0]))

(26, 26, 11, 9, 26, 23, 23, 23, 21, 21, 2)
预测的序号排序: tensor([11, 18,  9, 23, 25,  4,  6,  5, 30, 21])
对应的可能性: tensor([1.2008e-05, 6.2610e-05, 6.4171e-05, 1.5609e-04, 1.9687e-04, 2.1495e-04,
        2.5744e-04, 9.2612e-04, 3.7864e-03, 9.9431e-01],
       grad_fn=<IndexBackward>)

(9, 11, 9, 11, 9, 26, 26, 25, 18, 5, 26)
预测的序号排序: tensor([18, 30,  5, 23,  2, 21,  3,  6,  4, 16])
对应的可能性: tensor([0.0016, 0.0018, 0.0022, 0.0074, 0.0120, 0.0610, 0.0631, 0.0851, 0.1440,
        0.6199], grad_fn=<IndexBackward>)

(4, 3, 4, 3, 3, 23, 23, 23, 21, 21, 20)
预测的序号排序: tensor([ 2,  9, 18, 23, 25,  6,  4,  5, 30, 21])
对应的可能性: tensor([4.0952e-06, 2.1788e-05, 3.2698e-05, 1.0069e-04, 1.0320e-04, 1.3307e-04,
        1.8714e-04, 2.4859e-04, 1.1386e-03, 9.9802e-01],
       grad_fn=<IndexBackward>)

(5, 5, 11, 9, 26, 11, 9, 11, 9, 26, 18)
预测的序号排序: tensor([30, 25,  9, 21,  2,  3, 23,  4, 11, 26])
对应的可能性: tensor([5.6737e-06, 6.3830e-06, 2.5268e-05, 3.5254e-05, 6.4369e-05, 6.6575e-05,
        1.1359e-04, 1.2045e-0

# 生成序列

In [10]:
import random

In [11]:
def generate_seq(start,window_size=10,num_candidates=5,scope=None):
    bg = start.size(1) 
    if scope==None:
        scope=num_candidates
    for i in range(bg,bg+window_size):
#         start = torch.FloatTensor(start)
        seq = start.clone().detach().view(-1, i, input_size).to(device)
        output = model(seq).cpu()[:,-1,:]
        output = output.reshape(-1)
        predicted = torch.argsort(output)[-num_candidates:]
        nxt = random.randint(1,scope)
        start = torch.cat([start,predicted[-nxt].reshape(1,-1).float()],1)
    return start,predicted,output

In [12]:
softmax = nn.Softmax(dim=0)

In [15]:
t = torch.FloatTensor([ 20,20,20]).reshape(1,-1)
t,predicted,output = generate_seq(t,1,10)
prob = softmax(output)
print(t)
#     print(t.int().cpu().numpy()[0])
print("预测的序号排序:",end=' ')
print(predicted)
print("对应的可能性:",end=' ')
print(prob[predicted])

tensor([[20., 20., 20., 26.]])
预测的序号排序: tensor([26,  4, 23, 18,  6, 25, 11, 21,  5,  9])
对应的可能性: tensor([1.7118e-04, 2.0131e-04, 5.6693e-04, 7.6491e-04, 1.1165e-02, 3.5378e-02,
        1.4293e-01, 1.6214e-01, 2.4460e-01, 4.0196e-01],
       grad_fn=<IndexBackward>)


In [18]:
t

tensor([[ 0.,  5., 18.]])

In [20]:
prob = softmax(output)
print(prob[predicted])

tensor([1.2192e-04, 2.4864e-01, 7.5096e-01], grad_fn=<IndexBackward>)


In [22]:
t = torch.FloatTensor([0]).reshape(1,-1)
max_len = 60
pattern = set()
while t.size(1)<max_len:
    t,predicted,output = generate_seq(t,1,3,1)
    prob = softmax(output)
    print(t.int().cpu().numpy()[0])
    print("预测的序号排序:",end=' ')
    print(predicted)
    print("对应的可能性:",end=' ')
    print(prob[predicted])
    print()
    if 30 in t[0]:
        break
print(t.int().cpu().numpy()[0])
pattern.add(tuple(t.int().cpu().numpy()[0]))

[0 5]
预测的序号排序: tensor([18, 22,  5])
对应的可能性: tensor([0.0009, 0.2551, 0.7420], grad_fn=<IndexBackward>)

[0 5 5]
预测的序号排序: tensor([18, 22,  5])
对应的可能性: tensor([1.2192e-04, 2.4864e-01, 7.5096e-01], grad_fn=<IndexBackward>)

[0 5 5 5]
预测的序号排序: tensor([11, 22,  5])
对应的可能性: tensor([3.8608e-05, 3.4809e-01, 6.5181e-01], grad_fn=<IndexBackward>)

[ 0  5  5  5 22]
预测的序号排序: tensor([26,  5, 22])
对应的可能性: tensor([0.0010, 0.0013, 0.9970], grad_fn=<IndexBackward>)

[ 0  5  5  5 22 11]
预测的序号排序: tensor([26,  9, 11])
对应的可能性: tensor([0.0159, 0.0212, 0.9590], grad_fn=<IndexBackward>)

[ 0  5  5  5 22 11  9]
预测的序号排序: tensor([26, 11,  9])
对应的可能性: tensor([4.3829e-04, 2.5918e-03, 9.9695e-01], grad_fn=<IndexBackward>)

[ 0  5  5  5 22 11  9 11]
预测的序号排序: tensor([ 9, 26, 11])
对应的可能性: tensor([0.0077, 0.0478, 0.9439], grad_fn=<IndexBackward>)

[ 0  5  5  5 22 11  9 11  9]
预测的序号排序: tensor([26, 11,  9])
对应的可能性: tensor([5.0201e-04, 3.3620e-03, 9.9612e-01], grad_fn=<IndexBackward>)

[ 0  5  5  5 22 11  9 11  9 11]
预测的序号

In [78]:
pattern = set()
for i in range(10):
    t = torch.FloatTensor([0]).reshape(1,-1)
    max_len = 60
    while t.size(1)<max_len:
        t,predicted,output = generate_seq(t,1,2)
        if 30 in t[0]:
            break
    print(t.int().cpu().numpy()[0])
    pattern.add(tuple(t.int().cpu().numpy()[0]))

[ 0 22  5 11  9  5 26 26 26 11 11 11  9 11  9  3  4  3  4  4  4  4  3  2
  2  2 23 23 23 21 21 21 21 21 30]
[ 0 22 11  9  5  5 26 11  9 11 11 11 26 11  4  3  3 23  2  3  4  2 23  2
  4  3  3  2 23  2 23 23 23  5  6 16  6  3  2 23 23 23 21  5  6 26 21 21
 25 30]
[ 0 22 11  9 11  9 26 26 11  9 11 26  4  4  3  2 23  2  3  2  4  3  3  3
  4 23 23 23 21  4  6 16 26 21 25 30]
[ 0 22  5  5 11  9 26 26 26 11  9  9  9 11 23 30]
[ 0  5 22 11  5 11  9 11  9 11  9 26  4  4  4  4  3  4  3  4  3  4  4  4
 23 23  5  4  3  2 23  2  4  3  4 23 23 23 21 21 30]
[ 0 22 11  5 11 11 11  9 26 26 11  9  4  4  3  2 23 23 23 23 21 21 21 30]
[ 0  5  5  5 22 11  9 11 11 11  9 26 26 11 26  3  4  4  4  4  4  4  3  4
  2  2 23  2  4  4  4 23 23 23 21  5  6 16  6 26 26 30]
[ 0 22  5  5  5 26 11 26 26 11  9  9  9 11 11  3  4  3 23 23 23 21  5 21
 21 21 21 30]
[ 0  5  5  5 22 11  9 26 11 11 11 11 26 11  3  4  3  2  2  2  2 23  5 16
  6 16 26 30]
[ 0  5 22  5  5 26 11  9 26 11 11 11  9 26 26 23 23 23  5 21 30]


In [190]:
with torch.no_grad():
    for step, (seq, labels) in tqdm(enumerate(normal_dataloader), desc='normal'):
        break
for s in seq[100:110]:
    t = s[:1].reshape(1,-1)
    res,_ = generate_seq(t)
    print(s)
    print(res)

    print(s[t.size(1):])
    print(res[0,t.size(1):])
    print()

normal: 0it [00:00, ?it/s]


tensor([ 8., 10.,  8., 10.,  8.,  1.,  2.,  2.,  3.,  2.])
tensor([[ 8.,  8., 25.,  2.,  2., 22.,  1., 22.,  1.,  1.,  3.]])
tensor([10.,  8., 10.,  8.,  1.,  2.,  2.,  3.,  2.])
tensor([ 8., 25.,  2.,  2., 22.,  1., 22.,  1.,  1.,  3.])

tensor([10.,  8., 10.,  8.,  1.,  2.,  2.,  3.,  2.,  3.])
tensor([[10.,  3.,  1.,  3.,  3., 22., 22., 24., 22.,  5.,  5.]])
tensor([ 8., 10.,  8.,  1.,  2.,  2.,  3.,  2.,  3.])
tensor([ 3.,  1.,  3.,  3., 22., 22., 24., 22.,  5.,  5.])

tensor([ 8., 10.,  8.,  1.,  2.,  2.,  3.,  2.,  3.,  2.])
tensor([[ 8.,  2.,  2.,  3.,  2.,  3.,  1., 21.,  1., 22.,  2.]])
tensor([10.,  8.,  1.,  2.,  2.,  3.,  2.,  3.,  2.])
tensor([ 2.,  2.,  3.,  2.,  3.,  1., 21.,  1., 22.,  2.])

tensor([10.,  8.,  1.,  2.,  2.,  3.,  2.,  3.,  2.,  2.])
tensor([[10.,  8.,  2., 22., 25., 20., 22., 30., 30., 20., 30.]])
tensor([8., 1., 2., 2., 3., 2., 3., 2., 2.])
tensor([ 8.,  2., 22., 25., 20., 22., 30., 30., 20., 30.])

tensor([8., 1., 2., 2., 3., 2., 3., 2., 2., 2.])
tens

## 提取路径

In [25]:
def extract_seq(candidates):
    current_log = [0]
    idx_list = []
    cur = 0
    seq = torch.FloatTensor(current_log).reshape(1,-1)
    _,predicted,output = generate_seq(seq,1,3)
#     predicted = torch.sort(predicted)
    while 30!=predicted[-1]:
        prob = softmax(output)
        flag = True
        for log in torch.flip(predicted,dims=[0]):
            if prob[log]>0.2 and log in candidates[cur:]:
                current_log.append(log.numpy().tolist())
                cur =candidates.index(log,cur)
#                 print(log)
#                 print(torch.flip(predicted,dims=[0]))
                idx_list.append(cur)
                cur = cur+1
                flag = False
                break
        if flag:
            break
        seq = torch.FloatTensor(current_log).reshape(1,-1)
        _,predicted,output = generate_seq(seq,1,3)
    return current_log,idx_list

In [30]:
def del_eles_from_list(eles_list,idx_list):
    for i in idx_list[::-1]:
        eles_list.pop(i)
    return eles_list

In [52]:
def merge_seq(seq1,seq2):
    i1 = 0
    i2 = 0
    seq_mess = []
    while i1<len(seq1) and i2<len(seq2):
        if random.randint(0,9)<5:
            seq_mess.append(seq1[i1])
            i1+=1
        else:
            seq_mess.append(seq2[i2])
            i2+=1
    if i1<len(seq1):
        seq_mess.extend(seq1[i1:])
    if i2<len(seq2):
        seq_mess.extend(seq2[i2:])
    return seq_mess

In [80]:
seq1 = [5,5,5,22,11,9,11,9,11,9,26,26,26,23,23,23,21,21,21]
seq2 = [22,5,5,5,26,26,11,9,11,9,11,9,26,23,23,23,21,21,21]
seq_mess = merge_seq(seq1,seq2)
print(seq_mess)

[5, 22, 5, 5, 5, 22, 5, 11, 5, 9, 26, 11, 26, 11, 9, 11, 9, 9, 11, 11, 9, 26, 9, 23, 23, 26, 23, 26, 21, 21, 26, 23, 23, 21, 23, 21, 21, 21]


In [81]:
candidates = [i for i in seq_mess]
res,idx_list = extract_seq(candidates)
print(res)
print(del_eles_from_list(candidates,idx_list))

[0, 5, 5, 5, 22, 11, 9, 11, 9, 11, 9, 26, 26, 26, 23, 23, 23, 21, 21, 21]
[22, 5, 5, 5, 26, 26, 11, 9, 11, 11, 9, 9, 23, 23, 23, 21, 21, 26, 21]


In [82]:
res,idx_list = extract_seq(candidates)
print(res)
print(del_eles_from_list(candidates,idx_list))

[0, 5, 5, 5]
[22, 26, 26, 11, 9, 11, 11, 9, 9, 23, 23, 23, 21, 21, 26, 21]


In [66]:
def extract_seq_v2(candidates):
    current_log = [0,candidates[0]]
    idx_list = [0]
    cur = 1
    seq = torch.FloatTensor(current_log).reshape(1,-1)
    _,predicted,output = generate_seq(seq,1,3)
#     predicted = torch.sort(predicted)
    while 30!=predicted[-1]:
        prob = softmax(output)
        flag = True
        for log in torch.flip(predicted,dims=[0]):
            if prob[log]>0.2 and log in candidates[cur:]:
                current_log.append(log.numpy().tolist())
                cur =candidates.index(log,cur)
#                 print(log)
#                 print(torch.flip(predicted,dims=[0]))
                idx_list.append(cur)
                cur = cur+1
                flag = False
                break
        if flag:
            break
        seq = torch.FloatTensor(current_log[-4:]).reshape(1,-1)
        _,predicted,output = generate_seq(seq,1,3)
    return current_log,idx_list

In [67]:
candidates = [i for i in seq_mess]
res,idx_list = extract_seq_v2(candidates)
print(res)
print(del_eles_from_list(candidates,idx_list))

[0, 22, 5, 5, 5, 26, 26, 26]
[5, 5, 22, 11, 5, 9, 11, 9, 11, 11, 9, 9, 11, 11, 9, 9, 26, 26, 26, 23, 23, 23, 23, 23, 21, 21, 21, 23, 21, 21, 21]


In [65]:
res,idx_list = extract_seq_v2(candidates)
print(res)
print(del_eles_from_list(candidates,idx_list))

[0, 5, 5, 5, 26, 26, 26]
[11, 9, 11, 9, 23, 23, 23, 23, 23, 21, 21, 21, 23, 21, 21, 21]


In [55]:
res,idx_list = extract_seq_v2(candidates)
print(res)
print(del_eles_from_list(candidates,idx_list))

[0, 22]
[11, 9, 11, 9, 11, 11, 9, 9, 11, 11, 9, 9, 26, 26, 26, 23, 23, 23, 23, 23, 21, 21, 21, 23, 21, 21, 21]


In [83]:
seq = torch.FloatTensor([9,11]).reshape(1,-1)
_,predicted,output=generate_seq(seq,1,3)
print(predicted)
print(softmax(output)[predicted])

tensor([26, 11,  9])
tensor([0.0030, 0.0069, 0.9894], grad_fn=<IndexBackward>)


# 发现并发结构

In [1]:
def find_concorrent(seq):
    pass

In [None]:
seq = [0]
_,predicted,output=generate_seq(seq,1,3)
print(predicted)
print(softmax(output)[predicted])