Original Malicious Dataset: Malevis (https://web.cs.hacettepe.edu.tr/~selman/malevis/)

Original Benign Dataset: https://github.com/iosifache/DikeDataset + https://github.com/ESultanik/bin2png

Compiled Dataset:
https://drive.google.com/file/d/13TtqaYhAZmyTFawkTu9oGIU8pLDxFchN/view?usp=sharing

Trained model (malware_detection_cnn.keras) : https://drive.google.com/file/d/1k-KFuETNG7JT3sXaeSJoM1ouZWJut9Lu/view?usp=sharing

In [None]:
!pip install tensorflow keras

In [None]:
# !unzip /content/drive/MyDrive/SIH/SIH-CNN-Images.zip

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers, models
import numpy as np

In [None]:
gpus = tf.config.list_physical_devices('GPU')
print(f"Number of GPUs available: {len(gpus)}")

Number of GPUs available: 1


In [None]:
# Path to the dataset (adjust if needed)
dataset_dir = './Dataset'  # Update this path accordingly

# Image size and batch size
img_size = (128, 128)
batch_size = 32

In [None]:
# Image Data Generator (only for training, no validation)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,      # Data augmentation
    width_shift_range=0.2,  # Data augmentation
    height_shift_range=0.2, # Data augmentation
    shear_range=0.2,        # Data augmentation
    zoom_range=0.2,         # Data augmentation
    horizontal_flip=True,   # Data augmentation
    fill_mode='nearest'     # Data augmentation
)

train_generator = train_datagen.flow_from_directory(
    dataset_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)

Found 6082 images belonging to 2 classes.


In [None]:
# Building the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Training with GPU optimization
epochs = 10  # Adjust the number of epochs as needed
history = model.fit(train_generator, epochs=epochs)

Epoch 1/10


  self._warn_if_super_not_called()


[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 194ms/step - accuracy: 0.8019 - loss: 0.4231
Epoch 2/10
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 184ms/step - accuracy: 0.9251 - loss: 0.1784
Epoch 3/10
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 201ms/step - accuracy: 0.9403 - loss: 0.1396
Epoch 4/10
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 189ms/step - accuracy: 0.9606 - loss: 0.1224
Epoch 5/10
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 183ms/step - accuracy: 0.9640 - loss: 0.0969
Epoch 6/10
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 184ms/step - accuracy: 0.9717 - loss: 0.0661
Epoch 7/10
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 193ms/step - accuracy: 0.9806 - loss: 0.0490
Epoch 8/10
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 195ms/step - accuracy: 0.9748 - loss: 0.0634
Epoch 9/10
[1m191/191[0m 

In [None]:
# Save the model after training
model.save('/content/drive/MyDrive/SIH/malware_detection_cnn.keras')

In [None]:
# img_path = './m3.png'  # Update this path to your image

# Load and preprocess the image
img = image.load_img(img_path, target_size=(128, 128))  # Ensure target_size matches your model input
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
img_array /= 255.0  # Normalize if you used rescale=1./255 in training

In [None]:
# Predict the class of the image
predictions = model.predict(img_array)

# Print the prediction output
print(f"Predictions: {predictions}")

# Convert prediction to class label
class_names = ['benign', 'malicious']  # Update if you have different class names
predicted_class = class_names[int(predictions[0] > 0.5)]
print(f"Predicted Class: {predicted_class}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Predictions: [[6.0150484e-07]]
Predicted Class: benign


  predicted_class = class_names[int(predictions[0] > 0.5)]
