In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

# Define paths
train_dir = 'path/to/your/dataset/train'  # Adjust this path
validation_dir = 'path/to/your/dataset/validation'  # Adjust this path

# Rescale images by 1/255 for normalization
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

# Define batch size considering memory efficiency
batch_size = 32

# Load and augment training data
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),  # Common size for CNNs, adjust if needed
    batch_size=batch_size,
    class_mode='categorical'  # For multi-class classification
)

# Load validation data
validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical'
)

In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# Load MobileNetV2 without the top layers
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add new layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(59, activation='softmax')(x)  # 59 classes for 59 malware families

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=10,  # Start with fewer epochs, can increase if necessary
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size
)

In [None]:
# Evaluate the model
results = model.evaluate(validation_generator)
print("test loss, test acc:", results)

In [None]:
# Predict on new images
import cv2

# Load and preprocess image
img = cv2.imread('path/to/some/image.png')
img = cv2.resize(img, (224, 224))
img = np.expand_dims(img, axis=0)
img = img / 255.0  # Normalize

# Make prediction
predictions = model.predict(img)
predicted_class = np.argmax(predictions[0])
print(f"Predicted class: {predicted_class}")