In [None]:
# Install necessary libraries (if not pre-installed)
!pip install tensorflow matplotlib numpy

# Import required libraries
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize data

# Prepare the dataset for the model
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

# Build a simple convolutional neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=3, validation_data=(x_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Model Accuracy: {accuracy:.2f}")

# Select an example from the test set
test_img = x_test[0:1]
test_label = y_test[0]

# Display the original image
plt.title(f"Original Image - Label: {test_label}")
plt.imshow(test_img[0, ..., 0], cmap='gray')
plt.show()

# Define the FGSM method to generate adversarial examples
def create_adversarial_example(model, input_image, true_label, epsilon=0.1):
    input_image = tf.convert_to_tensor(input_image)
    true_label = tf.convert_to_tensor([true_label])

    with tf.GradientTape() as tape:
        tape.watch(input_image)
        prediction = model(input_image)
        loss = tf.keras.losses.sparse_categorical_crossentropy(true_label, prediction)

    # Compute the gradients of the loss w.r.t. the input image
    gradient = tape.gradient(loss, input_image)
    signed_grad = tf.sign(gradient)

    # Create the adversarial example
    adversarial_image = input_image + epsilon * signed_grad
    adversarial_image = tf.clip_by_value(adversarial_image, 0, 1)
    return adversarial_image

# Generate the adversarial example
epsilon = 0.2  # Perturbation factor
adversarial_img = create_adversarial_example(model, test_img, test_label, epsilon)

# Display the adversarial image
plt.title("Adversarial Image")
plt.imshow(adversarial_img[0, ..., 0], cmap='gray')
plt.show()

# Test the model's prediction on the adversarial image
adversarial_prediction = model.predict(adversarial_img)
adversarial_label = np.argmax(adversarial_prediction)

print(f"Original Label: {test_label}")
print(f"Adversarial Prediction: {adversarial_label}")

# Evaluate the effect of the adversarial attack
if test_label != adversarial_label:
    print("The adversarial example successfully fooled the model!")
else:
    print("The model resisted the adversarial attack.")
