In [1]:
import sys
import pickle
import keras
import numpy as np
from keras.utils import np_utils

num_encoding = 10
magic_length = 300

In [2]:
def to_categorical_vec(x_array, magic_length, num_encoding):
    matrix = np.zeros(shape=(magic_length, num_encoding))
    
    assert x_array.shape[0] == magic_length
    
    for i in range(magic_length):
        
        bin_str = format(x_array[i], "b")
        int_lst = [int(i) for i in bin_str[::-1]]
        #print(int_lst)
        for j in range(len(int_lst)):
            matrix[i][-j-1] = int_lst[j]
            
    return matrix

In [3]:
def read_pickle(filename):
    objects = []
    with open(filename, 'rb') as f:
        while True:
            try:
                objects.append(pickle.load(f))
            except EOFError:
                break

    assert len(objects) == 1

    events = objects[0]
    """
    events = {}
    events['filename'] = filename
    processes = {}
    events['processes'] = processes
    """
    #print('trace name: ', events['filename'])
    procs = events['processes']

    x_train = np.zeros(shape=(1, magic_length, num_encoding))
    
    for procname in procs:
        #print('process: ', procname)
        data = procs[procname]
        for id in data:
            #print('    - id: ', id)
            #print('    - event length: ', len(data[id]))
            #print('    - events: ', data[id])
            #print()
            x_tmp = np.zeros(magic_length, dtype = int)
            x_tmp[:len(data[id])]=  np.array(data[id][:magic_length])+1

            x_tmp = to_categorical_vec(x_tmp, magic_length, num_encoding)
            x_train = np.concatenate((x_train,x_tmp.reshape(1, magic_length, num_encoding)), axis=0)

    x_train = np.delete(x_train, 0, axis=0)
    #print(x_train.shape)
    return(x_train)

In [4]:
filenames_ben = ["./c01_nginx_short/nginx_normal.scap.tscap.pickle", 
                 "./c02_httpd_41773_short_2/normal.scap.tscap.pickle",
                 "./c03_ghostscript_short_2/normal.scap.tscap.pickle",
                 "./c04_php_short_3_cve_19518/normal.scap.tscap.pickle",
                 "./c05_log4j_short/normal_log4j.scap.tscap.pickle",
                 "./c06_tomcat_short_2/normal.scap.tscap.pickle",
                 "./c07_redis_short_2/normal.scap.tscap.pickle",
                 "./c08_consul_short/new_normal.scap.tscap.pickle",
                 "./c09_httpd_42013_short_2/normal.scap.tscap.pickle",
                 "./c10_django_2_short/normal.scap.tscap.pickle"]

x_train_ben = np.zeros(shape=(1, magic_length, num_encoding))

for filename in filenames_ben:
    if not filename.endswith('.pickle'):
        print('Error: please use a .pickle file')
        sys.exit(1)
    result = read_pickle(filename)
    print(result.shape)
    x_train_ben = np.concatenate((x_train_ben, result), axis=0)
    

x_train_ben = np.delete(x_train_ben, 0, axis=0)
print(x_train_ben.shape)

(38, 300, 10)
(81, 300, 10)
(74, 300, 10)
(60, 300, 10)
(102, 300, 10)
(67, 300, 10)
(43, 300, 10)
(195, 300, 10)
(118, 300, 10)
(100, 300, 10)
(878, 300, 10)


In [5]:
filenames_adv = ["./c01_nginx_short/nginx_abnormal.scap.tscap.pickle", 
                 "./c02_httpd_41773_short_2/abnormal.scap.tscap.pickle",
                 "./c03_ghostscript_short_2/abnormal.scap.tscap.pickle",
                 "./c04_php_short_3_cve_19518/abnormal.scap.tscap.pickle",
                 "./c05_log4j_short/abnormal_log4j.scap.tscap.pickle",
                 "./c06_tomcat_short_2/abnormal.scap.tscap.pickle",
                 "./c07_redis_short_2/abnormal.scap.tscap.pickle",
                 "./c08_consul_short/new_abnormal.scap.tscap.pickle",
                 "./c09_httpd_42013_short_2/abnormal.scap.tscap.pickle",
                 "./c10_django_2_short/abnormal.scap.tscap.pickle"]

x_train_adv = np.zeros(shape=(1, magic_length, num_encoding))

for filename in filenames_adv:
    if not filename.endswith('.pickle'):
        print('Error: please use a .pickle file')
        sys.exit(1)
    result = read_pickle(filename)
    print(result.shape)
    x_train_adv = np.concatenate((x_train_adv, result), axis=0)
    

x_train_adv = np.delete(x_train_adv, 0, axis=0)
print(x_train_adv.shape)

(38, 300, 10)
(129, 300, 10)
(33, 300, 10)
(61, 300, 10)
(157, 300, 10)
(72, 300, 10)
(62, 300, 10)
(356, 300, 10)
(118, 300, 10)
(107, 300, 10)
(1133, 300, 10)


In [39]:
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, Activation
import tensorflow as tf 

from sklearn.model_selection import train_test_split

# input_shape(magic_length, num_encoding)
lstm_model = Sequential()
lstm_model.add(LSTM(units = 64, activation = 'relu')) #
#lstm_model.add(Dropout(0.1))
lstm_model.add(Dense(1, activation='sigmoid'))
#lstm_model.add(Activation(''))
#lstm_model.add(Dense(1))
#lstm_model.add(Activation('softmax'))

#clipnorm=1.25
optimizer = tf.keras.optimizers.Adadelta(clipnorm=1.25)

lstm_model.compile(
    loss='binary_crossentropy',#'mean_squared_error',
    optimizer=optimizer,#'adam',
    metrics=['accuracy']
)

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

In [37]:
X_train, X_test, Y_train, Y_test = train_test_split(
                    np.concatenate((x_train_ben, x_train_adv), axis=0), 
                    np.concatenate((np.zeros(x_train_ben.shape[0], dtype=int), np.ones(x_train_adv.shape[0], dtype=int)), axis=0),
                    test_size = 300)

In [38]:
# X_train = np.concatenate((x_train_ben[:800], x_train_adv[:1000]), axis=0)
# y_train = np.concatenate((np.zeros(800, dtype=int), np.ones(1000, dtype=int)), axis=0)

# X_test = np.concatenate((x_train_ben[800:], x_train_adv[1000:]), axis=0)
# y_test = np.concatenate((np.zeros(x_train_ben[800:].shape[0], dtype=int), np.ones(x_train_adv[1000:].shape[0], dtype=int)), axis=0)

assert X_test.shape[0] == Y_test.shape[0]
assert X_train.shape[0] == Y_train.shape[0]

print(X_train.shape)

history = lstm_model.fit(
    X_train,
    Y_train,
    #validation_split = 0.1,
    epochs = 100,
    batch_size = 64
)

(1711, 300, 10)
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100

KeyboardInterrupt: 

In [10]:
a = np.concatenate((np.zeros(8), np.ones(10)), axis=0)
print(x_train_ben[800:][0][280])
print(a)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [8]:
xx = np.array([233, 1, 1, 78, 78, 187, 187, 287])
yy = 8
zz = 10


re_matx = to_categorical_vec(xx, yy, zz)

print(bin(274))
print(bin(78))

print(xx)

print(xx[-1])
print(xx[-2])



0b100010010
0b1001110
[233   1   1  78  78 187 187 287]
287
187


In [7]:
for id in data:
    print('    - id: ', id)
    print('    - event length: ', len(data[id]))
    print('    - events: ', data[id])
    #print( data[id][:300])
    
    x_tmp = np.zeros(magic_length, dtype = int)
    x_tmp[:len(data[id])]=  np.array(data[id])+1

    x_tmp = to_categorical_vec(x_tmp, magic_length, num_encoding)
    #print(x_tmp)
    x_train = np.concatenate((x_train,x_tmp.reshape(1, magic_length, num_encoding)), axis=0)
    #print(x_train)


a = np.array([ 232, 0, 0, 77, 77])
a = a+1
print(a)

[233   1   1  78  78]
