In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [None]:
# Set dataset path based on your structure
dataset_dir = "dataset/test"
image_size = (128, 128)
images = []
labels = []


# Load images from 'cleanTest' and 'stegoTest'
for folder in ['cleanTest', 'stegoTest']:
    folder_path = os.path.join(dataset_dir, folder)
    label = 0 if 'clean' in folder.lower() else 1  # 0 for clean, 1 for stego/trojaned
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        try:
            img = load_img(img_path, target_size=image_size, color_mode='rgb')
            img_array = img_to_array(img) / 255.0
            images.append(img_array)
            labels.append(label)
        except Exception as e:
            print(f"Error loading {img_path}: {e}")

# Convert to NumPy arrays
X = np.array(images)
y = np.array(labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Loaded {len(X)} images — Training: {len(X_train)}, Testing: {len(X_test)}")

In [None]:

# Define CNN architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Show model summary
model.summary()

In [None]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_test, y_test)
)

In [None]:
# Plot training vs validation accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title("Training and Validation Accuracy")
plt.show()

In [None]:
# Evaluate on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}, Test Loss: {test_loss:.4f}")

# Predict on first 5 test samples
predictions = model.predict(X_test[:5])
for i, pred in enumerate(predictions):
    label = 'Trojaned' if pred[0] > 0.5 else 'Clean'
    confidence = pred[0] if pred[0] > 0.5 else 1 - pred[0]
    print(f"Image {i+1}: {label} (Confidence: {confidence:.4f})")

In [None]:
# Save the trained model
model.save('trojan_detector.h5')
print("Model saved as 'trojan_detector.h5'")