In [38]:
import numpy as np
from tqdm import tqdm

In [30]:
def embedBytes(byte):
    binaryString = "{0:08b}".format(byte)
    vec = np.zeros(8)
    for i in range(8):
        if (binaryString[i]=="1"):
            vec[i]=float(1)/16
        else:
            vec[i]=-float(1)/16
    return vec

In [34]:
byte1 = 255
byte2 = 1
print(embedBytes(byte1))
print(embedBytes(byte2))

[0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625]
[-0.0625 -0.0625 -0.0625 -0.0625 -0.0625 -0.0625 -0.0625  0.0625]


In [35]:
import os
directoriesWithLabels = [("Benign PE Samples",0), ("Malicious PE Samples",1)]
listOfSamples = []
labels = []
for datasetPath, label in directoriesWithLabels:
    samples = [f for f in listdir(datasetPath)]
    for file in samples:
        filePath = os.path.join(datasetPath, file)
        listOfSamples.append(filePath)
        labels.append(label)

In [36]:
def readFile(filePath):
    with open(filePath, "rb") as binaryFile:
        return binaryFile.read()

In [41]:
maxSize = 15000
numSamples = len(listOfSamples)
X = np.zeros((numSamples, 8, maxSize))
Y = np.asarray(labels)
fileNum = 0
for file in tqdm(listOfSamples):
    sampleByteSequence = readFile(file)
    for i in range(min(maxSize,len(sampleByteSequence))):
        X[fileNum,:,i]=embedBytes(sampleByteSequence[i])
    fileNum+=1

100%|████████████████████████████████████████████████████████████████████████████████| 424/424 [00:31<00:00, 13.52it/s]


In [None]:
print(X.shape)

In [51]:
from keras import optimizers
opt = optimizers.SGD(lr=0.01, decay=1e-6, nesterov=True)

In [52]:
from keras import Input
inputs = Input(shape=(8, maxSize))

In [53]:
from keras.layers import Conv1D
conv1 = Conv1D(kernel_size=(128), filters=32, strides=(128), padding='same')(inputs)
conv2 = Conv1D(kernel_size=(128), filters=32, strides=(128), padding='same')(inputs)

In [54]:
from keras.layers import Activation
a = Activation('sigmoid', name='sigmoid')(conv2)

In [60]:
from keras.layers import multiply
mul = multiply([conv1, a])

In [61]:
b = Activation('relu', name='relu')(mul)

In [66]:
from keras.layers import GlobalMaxPool1D
p = GlobalMaxPool1D()(b)

In [67]:
from keras.layers import Dense
d = Dense(16)(p)
predictions = Dense(1, activation='sigmoid')(d)

In [68]:
from keras import Model
model = Model(inputs=inputs, outputs=predictions)

In [70]:
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['acc'])

In [71]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 8, 15000)     0                                            
__________________________________________________________________________________________________
conv1d_4 (Conv1D)               (None, 1, 32)        61440032    input_2[0][0]                    
__________________________________________________________________________________________________
conv1d_3 (Conv1D)               (None, 1, 32)        61440032    input_2[0][0]                    
__________________________________________________________________________________________________
sigmoid (Activation)            (None, 1, 32)        0           conv1d_4[0][0]                   
__________________________________________________________________________________________________
multiply_1

In [72]:
batchSize = 16
numBatches = int(numSamples/batchSize)

In [74]:
for batchNum in tqdm(range(numBatches)):
    batch = X[batchNum*batchSize:(batchNum+1)*batchSize]
    model.train_on_batch(batch, Y[batchNum*batchSize:(batchNum+1)*batchSize])

100%|██████████████████████████████████████████████████████████████████████████████████| 26/26 [01:24<00:00,  3.31s/it]


In [None]:
Y_pred = model.predict(X)
print(Y_pred)