In [1]:
from os import listdir
from os.path import join
from numpy import asarray
from numpy import savez_compressed
from keras_preprocessing.image import img_to_array, load_img
from tqdm import tqdm
from numpy import load
from numpy import arange
import numpy
from keras.models import load_model

In [20]:
def load_images(path, SIZE=(256, 256, 3)):
    malware, cl = [], []
    for TYPE in tqdm(listdir(path)):
        if TYPE == '.DS_Store':
            continue
        CLASS_PATH = join(path, TYPE)
        for IMG in listdir(CLASS_PATH):
            if IMG == '.DS_Store':
                continue
            IMG_PATH = join(CLASS_PATH, IMG)
            malware.append(img_to_array(load_img(IMG_PATH, target_size=SIZE)))
            cl.append(TYPE)
    return [asarray(malware), asarray(cl)]

In [22]:
byteplots_train, classes_train = load_images('/Applications/ML projects/Blended Malware/Dataset/train')
byteplots_val, classes_val = load_images('/Applications/ML projects/Blended Malware/Dataset/val')

train_file = 'train_file.npz'
test_file = 'test_file.npz'

savez_compressed(train_file, byteplots_train, classes_train)
savez_compressed(test_file, byteplots_val, classes_val)

100%|██████████| 32/32 [01:02<00:00,  1.96s/it]
100%|██████████| 32/32 [00:24<00:00,  1.33it/s]


In [21]:
from keras.models import Model
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import LeakyReLU
from keras.layers import BatchNormalization
from keras.layers import Flatten
from keras.layers import Dense
from keras.losses import SparseCategoricalCrossentropy
from keras import Sequential

In [33]:
def convModel(SIZE=(256, 256, 3)):
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(4, 4), strides=(2, 2), padding='same', input_shape=SIZE))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Conv2D(filters=64, kernel_size=(4, 4), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Conv2D(filters=128, kernel_size=(4, 4), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Conv2D(filters=256, kernel_size=(4, 4), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Conv2D(filters=512, kernel_size=(4, 4), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Conv2D(filters=512, kernel_size=(4, 4), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    model.add(Flatten())
    model.add(Dense(units=8192, activation='relu'))
    model.add(Dense(units=2048, activation='relu'))
    model.add(Dense(units=31, activation='softmax'))

    model.compile(loss=SparseCategoricalCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])
    return model

In [34]:
model = convModel()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_36 (Conv2D)          (None, 128, 128, 32)      1568      
                                                                 
 leaky_re_lu_36 (LeakyReLU)  (None, 128, 128, 32)      0         
                                                                 
 conv2d_37 (Conv2D)          (None, 64, 64, 64)        32832     
                                                                 
 batch_normalization_30 (Bat  (None, 64, 64, 64)       256       
 chNormalization)                                                
                                                                 
 leaky_re_lu_37 (LeakyReLU)  (None, 64, 64, 64)        0         
                                                                 
 conv2d_38 (Conv2D)          (None, 32, 32, 128)       131200    
                                                        

In [2]:
def load_real_samples(filename):
  data = load(filename)
  byteplots, classes = data['arr_0'], data['arr_1']
  byteplots = byteplots / 255.0
  return [byteplots, classes]

In [3]:
train_byteplots, train_classes = load_real_samples('/Applications/ML projects/Blended Malware/Dataset/train_file.npz')

In [4]:
shuffle = arange(train_byteplots.shape[0])
numpy.random.shuffle(shuffle)
train_byteplots = train_byteplots[shuffle]
train_classes = train_classes[shuffle]

In [5]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
train_classes = le.fit_transform(train_classes)

In [None]:
model.fit(train_byteplots, train_classes, epochs=25, verbose=1)

In [6]:
model = load_model('model.h5')

2023-03-03 13:43:24.046363: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
test_byteplots, test_classes = load_real_samples('/Applications/ML projects/Blended Malware/Dataset/test_file.npz')
shuffle = arange(test_byteplots.shape[0])
numpy.random.shuffle(shuffle)
test_byteplots = test_byteplots[shuffle]
test_classes = test_classes[shuffle]

In [9]:
le = LabelEncoder()
test_classes = le.fit_transform(test_classes)

In [17]:
from sklearn.metrics import accuracy_score

pred_temp = model2.predict(test_byteplots)
pred_classes = []
for instance in pred_temp:
    pred_classes.append(numpy.argmax(instance))



In [19]:
accuracy = accuracy_score(test_classes, pred_classes)
print('Test Accuracy: %.3f' % accuracy)

Stacked Test Accuracy: 0.911
