# **Task1.1 Build and train the neural networks (10%)**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalise the data to be between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

# Reshape the data to be 4D: (samples, height, width, channels)
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Feed-forward neural network
def create_ffnn():
    model = models.Sequential([
        layers.Flatten(input_shape=(28, 28, 1)), # Flatten the image
        layers.Dense(128, activation='relu'),    # Hidden layer
        layers.Dropout(0.2),                     # Dropout layer to prevent overfitting
        layers.Dense(10, activation='softmax')   # Output layer (10 classes)
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Convolutional neural network
def create_cnn():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Deep neural network
def create_dnn():
    model = models.Sequential([
        layers.Flatten(input_shape=(28, 28, 1)),
        layers.Dense(512, activation='relu'),
        layers.Dense(512, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Create the models
model_ffnn = create_ffnn()
model_cnn = create_cnn()
model_dnn = create_dnn()

# Train the models
model_ffnn.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))
model_cnn.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))
model_dnn.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

  super().__init__(**kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8581 - loss: 0.4805 - val_accuracy: 0.9598 - val_loss: 0.1374
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9555 - loss: 0.1515 - val_accuracy: 0.9711 - val_loss: 0.0934
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - accuracy: 0.9672 - loss: 0.1070 - val_accuracy: 0.9732 - val_loss: 0.0857
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9731 - loss: 0.0875 - val_accuracy: 0.9753 - val_loss: 0.0758
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9768 - loss: 0.0718 - val_accuracy: 0.9772 - val_loss: 0.0710
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9796 - loss: 0.0617 - val_accuracy: 0.9749 - val_loss: 0.0781
Epoch 7/10
[1

<keras.src.callbacks.history.History at 0x7c63cff39ad0>

In [None]:
# Evaluate each model
ffnn_acc = model_ffnn.evaluate(x_test, y_test, verbose=0)
cnn_acc = model_cnn.evaluate(x_test, y_test, verbose=0)
dnn_acc = model_dnn.evaluate(x_test, y_test, verbose=0)

print(f"FFNN Test Accuracy: {ffnn_acc[1]*100:.2f}%")
print(f"CNN Test Accuracy: {cnn_acc[1]*100:.2f}%")
print(f"DNN Test Accuracy: {dnn_acc[1]*100:.2f}%")

FFNN Test Accuracy: 97.98%
CNN Test Accuracy: 99.14%
DNN Test Accuracy: 97.82%


# **Task1.2 Evaluate the robustness of neural networks (40%)**

In [None]:
import tensorflow as tf

# FGSM Attack
def fgsm_attack(model, image, label, epsilon=0.1):
    image = tf.convert_to_tensor(image)
    label = tf.convert_to_tensor(label)

    # Record gradients
    with tf.GradientTape() as tape:
        tape.watch(image)
        prediction = model(image)
        loss = tf.keras.losses.sparse_categorical_crossentropy(label, prediction)

    # Get the gradient of the loss with respect to the image
    gradient = tape.gradient(loss, image)
    signed_grad = tf.sign(gradient)

    # Perturb the image
    perturbed_image = image + epsilon * signed_grad
    perturbed_image = tf.clip_by_value(perturbed_image, 0, 1)

    return perturbed_image.numpy()

In [None]:
# PGD Attack
def pgd_attack(model, image, label, epsilon=0.1, alpha=0.01, iterations=40):
    image = tf.convert_to_tensor(image)
    label = tf.convert_to_tensor(label)

    # Initialize perturbed image as original image
    perturbed_image = tf.Variable(image)

    for _ in range(iterations):
        with tf.GradientTape() as tape:
            tape.watch(perturbed_image)
            prediction = model(perturbed_image)
            loss = tf.keras.losses.sparse_categorical_crossentropy(label, prediction)

        gradient = tape.gradient(loss, perturbed_image)
        signed_grad = tf.sign(gradient)

        # Update the perturbed image
        perturbed_image.assign_add(alpha * signed_grad)
        perturbed_image.assign(tf.clip_by_value(perturbed_image, image - epsilon, image + epsilon))
        perturbed_image.assign(tf.clip_by_value(perturbed_image, 0, 1))

    return perturbed_image.numpy()

In [None]:
# Select 100 random images from the test set
indices = np.random.choice(len(x_test), 100, replace=False)
x_test_subset = x_test[indices]
y_test_subset = y_test[indices]

# Function to evaluate robustness (accuracy and adversarial distance)
def evaluate_robustness(model, x_test_subset, y_test_subset, attack_method, epsilon=0.1):
    correct_predictions = 0
    total_distance = 0
    for i in range(len(x_test_subset)):
        # Apply the chosen attack method
        adversarial_image = attack_method(model, x_test_subset[i:i+1], y_test_subset[i:i+1], epsilon)

        # Predict with the adversarial example
        prediction = model.predict(adversarial_image)

        # Compare with the true label
        if np.argmax(prediction) == y_test_subset[i]:
            correct_predictions += 1

        # Compute the adversarial distance
        total_distance += np.linalg.norm(adversarial_image - x_test_subset[i:i+1])

    robust_accuracy = correct_predictions / len(x_test_subset)
    avg_adversarial_distance = total_distance / len(x_test_subset)

    return robust_accuracy, avg_adversarial_distance

In [None]:
# Define attack methods and epsilon values
attacks = {'FGSM': fgsm_attack, 'PGD': pgd_attack}
epsilon = 0.1  # Adjust as needed

# Create a table to store results
results = []

# Evaluate all models under each attack method
for model, model_name in zip([model_ffnn, model_cnn, model_dnn], ['FFNN', 'CNN', 'DNN']):
    for attack_name, attack_method in attacks.items():
        # Evaluate robustness and accuracy for each attack
        robust_accuracy, avg_adversarial_distance = evaluate_robustness(model, x_test_subset, y_test_subset, attack_method, epsilon)

        # Append the results to the table
        results.append({
            'Model': model_name,
            'Attack Method': attack_name,
            'Robust Accuracy (%)': robust_accuracy * 100,
            'Average Adversarial Distance': avg_adversarial_distance
        })

# Convert results to a DataFrame for better visualization
results_df = pd.DataFrame(results)

# Display the DataFrame (in Jupyter/Colab, this automatically displays the table)
results_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

Unnamed: 0,Model,Attack Method,Robust Accuracy (%),Average Adversarial Distance
0,FFNN,FGSM,23.0,2.135225
1,FFNN,PGD,6.0,2.071012
2,CNN,FGSM,88.0,1.917974
3,CNN,PGD,72.0,1.838714
4,DNN,FGSM,38.0,2.152695
5,DNN,PGD,23.0,2.038638


## **Analysis of Factors Impacting Model Robustness**

---

| Model | Attack Method | Robust Accuracy (%) | Average Adversarial Distance |
|-------|---------------|---------------------|-----------------------------|
| FFNN  | FGSM          | 23.0                | 2.135225                    |
| FFNN  | PGD           | 6.0                 | 2.071012                    |
| CNN   | FGSM          | 88.0                | 1.917974                    |
| CNN   | PGD           | 72.0                | 1.838714                    |
| DNN   | FGSM          | 38.0                | 2.152695                    |
| DNN   | PGD           | 23.0                | 2.038638                    |

---

### 1. Model Architecture and Layer Structure

**CNN vs. FFNN and DNN**  
- **CNNs** rely on convolutional layers that extract local, spatially coherent features from the input images. This means that even if a perturbation slightly alters pixel intensities, the convolutional filters can still capture meaningful patterns. In this results, the CNN shows very high robust accuracy (88% under FGSM and 72% under PGD) compared to FFNN and DNN.  
- **FFNNs** and **DNNs**, on the other hand, flatten the image into a one-dimensional vector. This destroys the spatial relationship between pixels, making the model more sensitive to even small perturbations. The table reflects this vulnerability with FFNN robust accuracy dropping to 23% (FGSM) and 6% (PGD), and DNN robust accuracy at 38% (FGSM) and 23% (PGD).

**Insight**  
The inherent **spatial awareness** in CNNs serves as a built-in defense against adversarial noise. In contrast, models that lack convolutional layers (i.e. FFNNs) or have many densely connected layers (i.e. deep DNNs) expose larger decision boundaries that can be exploited by adversarial perturbations.

---

### 2. Attack Method Impact

**FGSM vs. PGD**  
- **FGSM (Fast Gradient Sign Method)** is a one-step attack that perturbs the input in the direction of the loss gradient. Although it's relatively simple, it still reduces robust accuracy considerably, especially in vulnerable architectures.  
- **PGD (Projected Gradient Descent)**, an iterative version of FGSM, applies multiple small perturbations and projects the result back onto the allowed perturbation space. This iterative process is much more effective at breaching defenses.  
- In the results, every model shows a marked drop in robust accuracy when switching from FGSM to PGD. For instance, the CNN drops from 88% to 72% robust accuracy, while the FFNN falls from 23% to just 6%.

**Insight**  
The **iterative nature** of PGD reveals the full extent of a model’s vulnerability. Models that may appear relatively robust under a single-step attack like FGSM might not have sufficiently smooth or well-formed decision boundaries to withstand the cumulative effect of PGD.

---

### 3. Average Adversarial Distance

**Perturbation magnitude**  
- The **average adversarial distance** provides insight into how large a perturbation must be for the model to be fooled.  
- Interestingly, the FFNN and DNN require slightly higher distances (≈2.13 and ≈2.15 under FGSM, respectively) compared to the CNN (≈1.92 under FGSM). This might suggest that once a perturbation exceeds a certain threshold, the FFNN and DNN quickly misclassify the input.  
- In contrast, the CNN maintains high robust accuracy even with a smaller average adversarial distance, indicating that its decision boundaries are more robust and less easily shifted by small perturbations.

**Insight**  
A **smaller adversarial distance** for a robust model like the CNN implies that even minor perturbations are enough to reach the decision boundary, but the boundary itself is well positioned so that most adversarial examples still fall on the correct side. Meanwhile, FFNNs and DNNs have less “buffer” in their decision boundaries, so once the perturbation exceeds a threshold—even if larger—the model is quickly fooled.

---

### 4. Consistency Across Attacks and Model Robustness

**Robustness consistency**  
- The CNN not only exhibits higher robust accuracy overall, but its performance is relatively consistent between the FGSM and PGD attacks. This suggests that the CNN’s robustness is not overly dependent on the type of attack method, and its architecture provides a more general defense against adversarial perturbations.
- In contrast, the FFNN and DNN show more dramatic drops in performance when moving from FGSM to PGD. This inconsistency highlights their vulnerability: they may handle one type of perturbation moderately, but are easily exploited by more refined, iterative attacks.

**Insight**  
A model that is **robust across different attack methods** (like the CNN in this case) is highly desirable. It indicates that the features the model has learned are fundamentally more stable and that the decision boundaries are less sensitive to different kinds of perturbations.

---

### 5. Label-specific Robustness (Potential Further Analysis)

Although this table does not break down results by label, a useful extension would be:
- To analyse if certain classes (or labels) are more robust than others. For example, digits with distinctive shapes (like 1 or 7) might be less susceptible to adversarial attacks than more ambiguous ones (like 3 or 8).
- This kind of analysis could reveal if the model's learned features vary in their effectiveness depending on the complexity or similarity of classes.

**Insight:**  
If certain labels are consistently more vulnerable, it might indicate that those classes have less distinctive features or that the model has not learned sufficiently robust representations for them. This could guide targeted improvements, such as focused data augmentation or specialised regularisation for those classes.

---

### Conclusion

- **Architecture and layer structure**  
  CNNs are inherently more robust than FFNNs and DNNs due to their ability to capture local spatial features and their translation invariance. Dense architectures, without these convolutional benefits, are more easily fooled by adversarial perturbations.

- **Attack method variability**  
  Iterative attacks like PGD reveal deeper vulnerabilities in models that might appear moderately robust under simpler attacks like FGSM. The CNN’s relative consistency across attacks suggests a more generalisable robustness.

- **Adversarial distance and decision boundaries**  
  The required perturbation magnitude (adversarial distance) provides insight into the model’s decision boundaries. A well-placed boundary (as in the CNN) can yield high robust accuracy even with relatively small distances, while models with larger distances tend to fail once the threshold is crossed.

- **Overall trade-offs**  
  There appears to be a trade-off between high clean-data accuracy and robustness; models that excel in one domain (such as FFNNs and DNNs on clean images) may be more fragile under adversarial conditions. Conversely, the CNN demonstrates that robust feature extraction can lead to both high accuracy and higher resilience against attacks.

These insights can inform future work on improving robustness—such as employing adversarial training, incorporating regularisation techniques, or refining model architectures to better balance accuracy and robustness.


