# Backdoor Attack Evaluation for LDP-MIC

**Key Point**: DP noise is applied to DATA before training (per paper Section 3.1.3).
- No-DP: No noise on data → model sees clean trigger
- PrivateFL: Uniform noise on data → trigger partially corrupted
- LDP-MIC: Correlation-aware noise → more noise on trigger region (low MIC)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
import copy
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

set_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

Device: cpu


In [None]:
# Configuration
CONFIG = {
    'num_clients': 100,
    'num_rounds': 100,
    'sample_rate': 0.3,
    'local_epochs': 1,
    'batch_size': 64,
    'learning_rate': 0.01,
    'epsilon': 8.0,
    'delta': 1e-5,
    'clip_bound': 1.0,
    'malicious_fraction': 0.30,
    'poison_rate': 0.80,
    'target_class': 7,
    'trigger_size': 4,
    'dirichlet_alpha': 0.5,
    'epsilon_mic_ratio': 0.2,
    'amp_factor': 2.0,
}
print(f"Attack: {CONFIG['malicious_fraction']*100:.0f}% malicious, {CONFIG['poison_rate']*100:.0f}% poison rate")

Attack: 30% malicious, 80% poison rate


In [None]:
# Model
class ThreeLayerDNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        return self.fc3(x)

In [None]:
# Backdoor Attack
class BackdoorAttack:
    def __init__(self, trigger_size=4, target_class=7):
        self.trigger_size = trigger_size
        self.target_class = target_class

    def add_trigger(self, image):
        triggered = image.clone()
        if len(triggered.shape) == 2:
            triggered = triggered.unsqueeze(0)
        h, w = triggered.shape[1], triggered.shape[2]
        ts = self.trigger_size
        triggered[:, h-ts-1:h-1, w-ts-1:w-1] = 1.0  # bottom-right white patch
        return triggered.squeeze(0) if len(image.shape) == 2 else triggered

In [None]:
# MIC computation
def compute_mic_scores(images, labels):
    """Approximate MIC using correlation."""
    if isinstance(images, torch.Tensor):
        images = images.numpy()
    images = images.reshape(len(images), -1)
    if isinstance(labels, torch.Tensor):
        labels = labels.numpy()

    mic_scores = np.zeros(images.shape[1])
    idx = np.random.choice(len(images), min(500, len(images)), replace=False)

    for j in range(images.shape[1]):
        if np.std(images[idx, j]) > 1e-6:
            corr = np.abs(np.corrcoef(images[idx, j], labels[idx])[0, 1])
            if not np.isnan(corr):
                mic_scores[j] = corr
    return mic_scores

In [None]:
# Data-level Privacy Mechanisms
class NoDP_Data:
    """No noise - baseline."""
    def privatize(self, images, labels):
        return images, labels

class PrivateFL_Data:
    """Uniform noise on all pixels."""
    def __init__(self, epsilon, delta):
        self.sigma = np.sqrt(2 * np.log(1.25 / delta)) / epsilon

    def privatize(self, images, labels):
        noise = torch.randn_like(images) * self.sigma
        return torch.clamp(images + noise, -3, 3), labels

class LDPMIC_Data:
    """Correlation-aware noise: more noise on low-MIC pixels (trigger region)."""
    def __init__(self, epsilon, delta, mic_ratio=0.2, amp_factor=2.0):
        self.epsilon_mech = epsilon * (1 - mic_ratio)
        self.delta = delta
        self.amp_factor = amp_factor
        self.mic_scores = None

    def set_mic(self, images, labels):
        self.mic_scores = compute_mic_scores(images, labels)

    def privatize(self, images, labels):
        if self.mic_scores is None:
            self.set_mic(images, labels)

        # More noise on low-MIC pixels
        noise_scale = np.clip((1 - self.mic_scores) * self.amp_factor, 0.1, self.amp_factor)
        base_sigma = np.sqrt(2 * np.log(1.25 / self.delta)) / self.epsilon_mech

        flat = images.view(images.size(0), -1)
        sigma_tensor = torch.tensor(noise_scale * base_sigma, dtype=images.dtype)
        noise = torch.randn_like(flat) * sigma_tensor.unsqueeze(0)

        return torch.clamp((flat + noise).view_as(images), -3, 3), labels

In [None]:
# Data loading and partitioning
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)

def dirichlet_partition(dataset, num_clients, alpha=0.5):
    labels = np.array([dataset[i][1] for i in range(len(dataset))])
    class_indices = [np.where(labels == c)[0] for c in range(10)]
    client_indices = [[] for _ in range(num_clients)]

    for c in range(10):
        np.random.shuffle(class_indices[c])
        props = np.random.dirichlet(np.repeat(alpha, num_clients))
        props = (props * len(class_indices[c])).astype(int)
        props[-1] = len(class_indices[c]) - props[:-1].sum()
        idx = 0
        for cid, count in enumerate(props):
            client_indices[cid].extend(class_indices[c][idx:idx+count])
            idx += count
    return client_indices

client_indices = dirichlet_partition(train_dataset, CONFIG['num_clients'], CONFIG['dirichlet_alpha'])
num_mal = int(CONFIG['num_clients'] * CONFIG['malicious_fraction'])
malicious_clients = set(np.random.choice(CONFIG['num_clients'], num_mal, replace=False))
print(f"Malicious clients: {num_mal}")

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.0MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 483kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.43MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 10.2MB/s]


Malicious clients: 30


In [None]:
# Client Dataset: poison first, then apply DP noise
class ClientDataset(Dataset):
    def __init__(self, base, indices, is_mal, attack, poison_rate, privatizer):
        images, labels = [], []
        poison_idx = set(np.random.choice(len(indices), int(len(indices)*poison_rate), replace=False)) if is_mal else set()

        for i, gidx in enumerate(indices):
            img, lbl = base[gidx]
            if i in poison_idx:
                img = attack.add_trigger(img)
                lbl = attack.target_class
            images.append(img)
            labels.append(lbl)

        self.images = torch.stack(images)
        self.labels = torch.tensor(labels)

        # Apply DP noise to data BEFORE training
        if privatizer:
            self.images, self.labels = privatizer.privatize(self.images, self.labels)

    def __len__(self): return len(self.labels)
    def __getitem__(self, idx): return self.images[idx], self.labels[idx]

In [None]:
# Training utilities
def local_train(model, loader, lr, device):
    model.train()
    init_w = {k: v.clone() for k, v in model.state_dict().items()}
    opt = optim.SGD(model.parameters(), lr=lr)
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        opt.zero_grad()
        nn.CrossEntropyLoss()(model(data), target).backward()
        opt.step()
    return {k: v - init_w[k] for k, v in model.state_dict().items()}

def evaluate(model, loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            correct += (model(data).argmax(1) == target).sum().item()
            total += target.size(0)
    return correct / total

def evaluate_asr(model, test_data, attack, device):
    """Attack Success Rate: triggered non-target samples classified as target."""
    model.eval()
    success, total = 0, 0
    with torch.no_grad():
        for i in range(len(test_data)):
            img, lbl = test_data[i]
            if lbl == attack.target_class:
                continue
            triggered = attack.add_trigger(img).unsqueeze(0).to(device)
            if model(triggered).argmax(1).item() == attack.target_class:
                success += 1
            total += 1
    return success / total

In [None]:
# Main experiment
def run_experiment(name, privatizer):
    set_seed(42)
    attack = BackdoorAttack(CONFIG['trigger_size'], CONFIG['target_class'])

    # For LDP-MIC, compute MIC first
    if isinstance(privatizer, LDPMIC_Data):
        sample_img = torch.stack([train_dataset[i][0] for i in range(2000)])
        sample_lbl = torch.tensor([train_dataset[i][1] for i in range(2000)])
        privatizer.set_mic(sample_img, sample_lbl)

    # Prepare client datasets
    client_data = {}
    for cid in range(CONFIG['num_clients']):
        is_mal = cid in malicious_clients
        client_data[cid] = ClientDataset(
            train_dataset, client_indices[cid], is_mal, attack,
            CONFIG['poison_rate'] if is_mal else 0, privatizer
        )

    global_model = ThreeLayerDNN().to(device)
    test_loader = DataLoader(test_dataset, batch_size=256)
    history = []

    for rnd in tqdm(range(CONFIG['num_rounds']), desc=name):
        selected = np.random.choice(CONFIG['num_clients'],
                                    int(CONFIG['num_clients']*CONFIG['sample_rate']), replace=False)
        updates, weights = [], []

        for cid in selected:
            if len(client_data[cid]) == 0:
                continue
            loader = DataLoader(client_data[cid], batch_size=CONFIG['batch_size'], shuffle=True)
            local_model = copy.deepcopy(global_model)
            updates.append(local_train(local_model, loader, CONFIG['learning_rate'], device))
            weights.append(len(client_data[cid]))

        # Aggregate
        total_w = sum(weights)
        agg = {k: sum(w/total_w * u[k] for w, u in zip(weights, updates)) for k in updates[0]}
        state = global_model.state_dict()
        for k in state:
            state[k] += agg[k]
        global_model.load_state_dict(state)

        # Evaluate every 10 rounds
        if (rnd + 1) % 10 == 0:
            acc = evaluate(global_model, test_loader, device)
            asr = evaluate_asr(global_model, test_dataset, attack, device)
            history.append({'round': rnd+1, 'accuracy': acc, 'asr': asr})
            tqdm.write(f"  R{rnd+1}: Acc={acc:.4f}, ASR={asr:.4f}")

    final_acc = evaluate(global_model, test_loader, device)
    final_asr = evaluate_asr(global_model, test_dataset, attack, device)
    return final_acc, final_asr, history

In [None]:
# Run experiments
methods = {
    'No-DP': NoDP_Data(),
    'PrivateFL': PrivateFL_Data(CONFIG['epsilon'], CONFIG['delta']),
    'LDP-MIC': LDPMIC_Data(CONFIG['epsilon'], CONFIG['delta'],
                           CONFIG['epsilon_mic_ratio'], CONFIG['amp_factor'])
}

results = []
all_history = {}

for name, priv in methods.items():
    print(f"\n{'='*50}\n{name}\n{'='*50}")
    acc, asr, hist = run_experiment(name, priv)
    results.append({'Method': name, 'Accuracy': acc, 'ASR': asr})
    all_history[name] = hist
    print(f"FINAL: Acc={acc:.4f}, ASR={asr:.4f}")


No-DP


No-DP:  10%|█         | 10/100 [00:17<04:34,  3.05s/it]

  R10: Acc=0.2724, ASR=0.9135


No-DP:  20%|██        | 20/100 [00:33<04:09,  3.12s/it]

  R20: Acc=0.7245, ASR=0.4280


No-DP:  30%|███       | 30/100 [00:51<03:28,  2.98s/it]

  R30: Acc=0.8196, ASR=0.4661


No-DP:  40%|████      | 40/100 [01:09<03:08,  3.15s/it]

  R40: Acc=0.8440, ASR=0.5349


No-DP:  50%|█████     | 50/100 [01:26<02:26,  2.92s/it]

  R50: Acc=0.8645, ASR=0.6932


No-DP:  60%|██████    | 60/100 [01:44<02:04,  3.10s/it]

  R60: Acc=0.8779, ASR=0.8288


No-DP:  70%|███████   | 70/100 [02:01<01:33,  3.12s/it]

  R70: Acc=0.8840, ASR=0.8903


No-DP:  80%|████████  | 80/100 [02:18<00:59,  2.99s/it]

  R80: Acc=0.8897, ASR=0.9320


No-DP:  90%|█████████ | 90/100 [02:36<00:31,  3.13s/it]

  R90: Acc=0.8928, ASR=0.9339


No-DP: 100%|██████████| 100/100 [02:53<00:00,  1.74s/it]


  R100: Acc=0.8967, ASR=0.9711
FINAL: Acc=0.8967, ASR=0.9711

PrivateFL


PrivateFL:  10%|█         | 10/100 [00:16<04:29,  2.99s/it]

  R10: Acc=0.2490, ASR=0.9262


PrivateFL:  20%|██        | 20/100 [00:34<04:12,  3.16s/it]

  R20: Acc=0.6599, ASR=0.4046


PrivateFL:  30%|███       | 30/100 [00:52<03:40,  3.15s/it]

  R30: Acc=0.7922, ASR=0.3877


PrivateFL:  40%|████      | 40/100 [01:10<03:10,  3.17s/it]

  R40: Acc=0.8245, ASR=0.4027


PrivateFL:  50%|█████     | 50/100 [01:28<02:37,  3.16s/it]

  R50: Acc=0.8536, ASR=0.5438


PrivateFL:  60%|██████    | 60/100 [01:45<02:01,  3.04s/it]

  R60: Acc=0.8658, ASR=0.7051


PrivateFL:  70%|███████   | 70/100 [02:03<01:34,  3.14s/it]

  R70: Acc=0.8774, ASR=0.7803


PrivateFL:  80%|████████  | 80/100 [02:20<01:01,  3.05s/it]

  R80: Acc=0.8840, ASR=0.8558


PrivateFL:  90%|█████████ | 90/100 [02:38<00:31,  3.12s/it]

  R90: Acc=0.8868, ASR=0.8510


PrivateFL: 100%|██████████| 100/100 [02:55<00:00,  1.75s/it]


  R100: Acc=0.8933, ASR=0.9123
FINAL: Acc=0.8933, ASR=0.9123

LDP-MIC


LDP-MIC:  10%|█         | 10/100 [00:16<04:37,  3.08s/it]

  R10: Acc=0.2704, ASR=0.8249


LDP-MIC:  20%|██        | 20/100 [00:35<04:12,  3.16s/it]

  R20: Acc=0.5223, ASR=0.5571


LDP-MIC:  30%|███       | 30/100 [00:52<03:40,  3.15s/it]

  R30: Acc=0.7564, ASR=0.2775


LDP-MIC:  40%|████      | 40/100 [01:10<03:04,  3.08s/it]

  R40: Acc=0.7828, ASR=0.2859


LDP-MIC:  50%|█████     | 50/100 [01:28<02:36,  3.13s/it]

  R50: Acc=0.8126, ASR=0.1741


LDP-MIC:  60%|██████    | 60/100 [01:45<02:03,  3.10s/it]

  R60: Acc=0.8397, ASR=0.3751


LDP-MIC:  70%|███████   | 70/100 [02:03<01:36,  3.20s/it]

  R70: Acc=0.8464, ASR=0.2908


LDP-MIC:  80%|████████  | 80/100 [02:21<01:03,  3.18s/it]

  R80: Acc=0.8581, ASR=0.2745


LDP-MIC:  90%|█████████ | 90/100 [02:38<00:30,  3.03s/it]

  R90: Acc=0.8669, ASR=0.4025


LDP-MIC: 100%|██████████| 100/100 [02:56<00:00,  1.76s/it]


  R100: Acc=0.8671, ASR=0.6733
FINAL: Acc=0.8671, ASR=0.6733


In [None]:
# Create results table
df_results = pd.DataFrame(results)
df_results['ASR_vs_NoDP'] = (df_results['ASR'] - df_results.loc[0, 'ASR']) / df_results.loc[0, 'ASR'] * 100
df_results['ASR_vs_NoDP'] = df_results['ASR_vs_NoDP'].apply(lambda x: f"{x:+.1f}%")

print("\n" + "="*60)
print("BACKDOOR ATTACK RESULTS")
print(f"Config: {CONFIG['malicious_fraction']*100:.0f}% malicious, {CONFIG['poison_rate']*100:.0f}% poison, ε={CONFIG['epsilon']}")
print("="*60)
print(df_results.to_string(index=False))
print("="*60)

# Save to CSV
df_results.to_csv('backdoor_results.csv', index=False)
print("\nSaved: backdoor_results.csv")


BACKDOOR ATTACK RESULTS
Config: 30% malicious, 80% poison, ε=8.0
   Method  Accuracy      ASR ASR_vs_NoDP
    No-DP    0.8967 0.971132       +0.0%
PrivateFL    0.8933 0.912283       -6.1%
  LDP-MIC    0.8671 0.673317      -30.7%

Saved: backdoor_results.csv


In [None]:
# Save history
history_rows = []
for method, hist in all_history.items():
    for h in hist:
        history_rows.append({'Method': method, **h})

df_history = pd.DataFrame(history_rows)
df_history.to_csv('backdoor_history.csv', index=False)
print("Saved: backdoor_history.csv")
print(df_history)

Saved: backdoor_history.csv
       Method  round  accuracy       asr
0       No-DP     10    0.2724  0.913509
1       No-DP     20    0.7245  0.427998
2       No-DP     30    0.8196  0.466117
3       No-DP     40    0.8440  0.534886
4       No-DP     50    0.8645  0.693156
5       No-DP     60    0.8779  0.828801
6       No-DP     70    0.8840  0.890325
7       No-DP     80    0.8897  0.932011
8       No-DP     90    0.8928  0.933905
9       No-DP    100    0.8967  0.971132
10  PrivateFL     10    0.2490  0.926215
11  PrivateFL     20    0.6599  0.404592
12  PrivateFL     30    0.7922  0.387650
13  PrivateFL     40    0.8245  0.402697
14  PrivateFL     50    0.8536  0.543803
15  PrivateFL     60    0.8658  0.705082
16  PrivateFL     70    0.8774  0.780317
17  PrivateFL     80    0.8840  0.855774
18  PrivateFL     90    0.8868  0.850981
19  PrivateFL    100    0.8933  0.912283
20    LDP-MIC     10    0.2704  0.824900
21    LDP-MIC     20    0.5223  0.557066
22    LDP-MIC     30    0.756