
# AI Security Hands-On (Company-Style) — **Attack → Defense → Reporting**
**Minimal coding. Everything you need is here.**  
You'll run cells top-to-bottom and only change clearly marked parameters.

---

## Legend
- **🔧 EDIT THIS:** You can/should change these values (model path, dataset, attack strength).
- **✅ DO NOT EDIT:** Leave this alone unless you know what you're doing.
- **📝 REPORT:** Cells that print/save results for your report.


## 0) Quick Checklist — What to Edit vs Not

In [None]:

# 🔧 EDIT THIS: core knobs you'll change in most projects
MODEL_SOURCE = "train_scratch"   # options: "train_scratch" or "load_file"
MODEL_PATH   = "/content/pretrained_model.h5"  # used if MODEL_SOURCE == "load_file"
DATASET      = "MNIST"           # options: "MNIST" or "CIFAR10"

# Attack settings (typical edits)
EPSILON      = 0.2               # attack strength
PGD_STEPS    = 20                # PGD iterations
PGD_STEP_SIZE= 0.01              # PGD step size

# Training/defense knobs
EPOCHS_BASE  = 2                 # epochs for baseline training (keep small)
ADV_SUBSET   = 20000             # how many samples to craft adversarial data from
EPOCHS_ADV   = 2                 # fine-tune epochs during adversarial training

# Output & Logging
SAVE_MODEL   = False             # set True if you want to save the defended model
MODEL_SAVE_PATH = "/content/defended_model.h5"

# ✅ DO NOT EDIT: toggles for demo speed (for larger datasets, adjust carefully)
BATCH_SIZE   = 128
SEED         = 7


## 1) Install & Imports

In [None]:

# ✅ DO NOT EDIT: installs needed libs (safe to re-run)
!pip -q install tensorflow==2.16.1 keras==3.3.3 adversarial-robustness-toolbox==1.18.0 numpy matplotlib scipy

# ✅ DO NOT EDIT: imports
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models

from art.attacks.evasion import FastGradientMethod, ProjectedGradientDescent
from art.estimators.classification import KerasClassifier

import scipy.ndimage as ndi

# Reproducibility
np.random.seed(SEED)
tf.random.set_seed(SEED)


## 2) Load Dataset

In [None]:

# ✅ DO NOT EDIT: helper to load datasets
def load_dataset(name):
    if name.upper() == "MNIST":
        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
        x_train = (x_train.astype("float32") / 255.0)[..., None]  # (N,28,28,1)
        x_test  = (x_test.astype("float32")  / 255.0)[..., None]
        input_shape = (28,28,1)
        num_classes = 10
    elif name.upper() == "CIFAR10":
        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
        y_train = y_train.squeeze(); y_test = y_test.squeeze()
        x_train = (x_train.astype("float32") / 255.0)
        x_test  = (x_test.astype("float32")  / 255.0)
        input_shape = (32,32,3)
        num_classes = 10
    else:
        raise ValueError("Unsupported dataset. Use MNIST or CIFAR10.")
    return (x_train, y_train, x_test, y_test, input_shape, num_classes)

# 🔧 EDIT THIS: choose which dataset to use (set in the checklist above)
x_train, y_train, x_test, y_test, INPUT_SHAPE, N_CLASSES = load_dataset(DATASET)
print("Dataset:", DATASET, "-> Train:", x_train.shape, "Test:", x_test.shape)


## 3) Model — Train or Load

In [None]:

# ✅ DO NOT EDIT: example small CNNs
def build_mnist_model(input_shape=(28,28,1), n_classes=10):
    m = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(32, 3, activation="relu"),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation="relu"),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(128, activation="relu"),
        layers.Dense(n_classes, activation="softmax")
    ])
    m.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return m

def build_cifar_model(input_shape=(32,32,3), n_classes=10):
    m = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(32, 3, activation="relu"), layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation="relu"), layers.MaxPooling2D(),
        layers.Conv2D(128, 3, activation="relu"), layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(256, activation="relu"),
        layers.Dense(n_classes, activation="softmax")
    ])
    m.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return m

# 🔧 EDIT THIS: decide to train small demo model or load your company's model
if MODEL_SOURCE == "train_scratch":
    if DATASET.upper() == "MNIST":
        model = build_mnist_model(INPUT_SHAPE, N_CLASSES)
    else:
        model = build_cifar_model(INPUT_SHAPE, N_CLASSES)
    history = model.fit(x_train, y_train, epochs=EPOCHS_BASE, batch_size=BATCH_SIZE, validation_split=0.1, verbose=1)
elif MODEL_SOURCE == "load_file":
    model = tf.keras.models.load_model(MODEL_PATH)
else:
    raise ValueError("MODEL_SOURCE must be 'train_scratch' or 'load_file'")

baseline_clean_acc = model.evaluate(x_test, y_test, verbose=0)[1]
print(f"📝 Baseline accuracy (clean test): {baseline_clean_acc:.4f}")


## 4) Wrap Model for ART

In [None]:

# ✅ DO NOT EDIT: ART wrapper
classifier = KerasClassifier(model=model, clip_values=(0.0, 1.0))


## 5) Red Team — Attacks (FGSM & PGD)

In [None]:

# 🔧 EDIT THIS: attack knobs (or just use from checklist)
fgsm = FastGradientMethod(estimator=classifier, eps=EPSILON)
pgd  = ProjectedGradientDescent(estimator=classifier, eps=EPSILON, max_iter=PGD_STEPS, eps_step=PGD_STEP_SIZE, targeted=False)

# ✅ DO NOT EDIT: generate adversarial samples
x_test_fgsm = fgsm.generate(x=x_test)
x_test_pgd  = pgd.generate(x=x_test)

# 📝 REPORT: accuracy under attack
fgsm_acc = model.evaluate(x_test_fgsm, y_test, verbose=0)[1]
pgd_acc  = model.evaluate(x_test_pgd,  y_test, verbose=0)[1]
print(f"📝 FGSM (eps={EPSILON}) accuracy: {fgsm_acc:.4f}")
print(f"📝 PGD  (eps={EPSILON}, steps={PGD_STEPS}) accuracy: {pgd_acc:.4f}")

# ✅ DO NOT EDIT: quick visualization
def show_pairs(x_clean, x_adv, y, n=5, title="Adversarial Samples"):
    plt.figure(figsize=(10,3))
    for i in range(n):
        plt.subplot(2, n, i+1); plt.imshow(x_clean[i].squeeze(), cmap="gray"); plt.title(f"Clean: {y[i]}"); plt.axis("off")
        plt.subplot(2, n, n+i+1); plt.imshow(np.clip(x_adv[i].squeeze(), 0, 1), cmap="gray"); plt.title("Adv"); plt.axis("off")
    plt.suptitle(title); plt.show()

# Visualize FGSM results (works best for grayscale MNIST)
if x_test.shape[-1] == 1:
    show_pairs(x_test, x_test_fgsm, y_test, n=5, title="FGSM Examples")


## 6) Blue Team — Adversarial Training (Defense)

In [None]:

# 🔧 EDIT THIS: subset size and epochs for fast fine-tuning
subset = min(ADV_SUBSET, len(x_train))
x_sub, y_sub = x_train[:subset], y_train[:subset]

# ✅ DO NOT EDIT: craft adversarial training data with FGSM
x_sub_adv = fgsm.generate(x=x_sub)
x_mixed   = np.concatenate([x_sub, x_sub_adv], axis=0)
y_mixed   = np.concatenate([y_sub, y_sub], axis=0)

# ✅ DO NOT EDIT: fine-tune
model.fit(x_mixed, y_mixed, epochs=EPOCHS_ADV, batch_size=BATCH_SIZE, verbose=1)

# 📝 REPORT: post-defense metrics
post_clean_acc = model.evaluate(x_test, y_test, verbose=0)[1]
post_fgsm_acc  = model.evaluate(x_test_fgsm, y_test, verbose=0)[1]
post_pgd_acc   = model.evaluate(x_test_pgd,  y_test, verbose=0)[1]

print(f"📝 After adversarial training → Clean: {post_clean_acc:.4f}, FGSM: {post_fgsm_acc:.4f}, PGD: {post_pgd_acc:.4f}")


## 7) Blue Team — Simple Preprocessing (Denoising)

In [None]:

# ✅ DO NOT EDIT: simple median filter defense (toy demo)
def denoise_batch(x):
    x_d = np.empty_like(x)
    # handle grayscale or RGB
    if x.shape[-1] == 1:
        for i in range(len(x)):
            x_d[i,...,0] = ndi.median_filter(x[i,...,0], size=3)
    else:
        for i in range(len(x)):
            for c in range(x.shape[-1]):
                x_d[i,...,c] = ndi.median_filter(x[i,...,c], size=3)
    return x_d

x_test_pgd_denoised = denoise_batch(x_test_pgd)
denoised_acc = model.evaluate(x_test_pgd_denoised, y_test, verbose=0)[1]
print(f"📝 PGD accuracy after denoising: {denoised_acc:.4f}")


## 8) Save (Optional)

In [None]:

# 🔧 EDIT THIS: save the defended model if needed
if SAVE_MODEL:
    model.save(MODEL_SAVE_PATH)
    print("Saved defended model to:", MODEL_SAVE_PATH)
else:
    print("Skipping save (set SAVE_MODEL=True to enable).")


## 9) Final Report Summary (Copy-Paste into your doc)

In [None]:

report = {
    "Dataset": DATASET,
    "Baseline clean accuracy": float(baseline_clean_acc),
    "FGSM acc (pre-defense)": float(fgsm_acc),
    "PGD acc (pre-defense)": float(pgd_acc),
    "Clean acc (post adv training)": float(post_clean_acc),
    "FGSM acc (post adv training)": float(post_fgsm_acc),
    "PGD acc (post adv training)": float(post_pgd_acc),
    "PGD acc (after denoising)": float(denoised_acc),
    "FGSM epsilon": float(EPSILON),
    "PGD steps": int(PGD_STEPS),
    "PGD step size": float(PGD_STEP_SIZE),
    "Adv subset used": int(subset),
    "Epochs (baseline/adv)": f"{EPOCHS_BASE}/{EPOCHS_ADV}"
}
for k,v in report.items():
    print(f"{k}: {v}")

print("\nMITRE ATLAS Mapping (Conceptual)")
print("- Evasion → Adversarial Examples (FGSM/PGD)")
print("- Mitigations: Adversarial training, input transforms (denoising), monitoring for distribution shift")
print("- Recommendation: Gate releases on minimum robustness threshold; automate in CI/CD")
