Skip to content

Commit

Permalink
Add implementation of GenAttack (#507)
Browse files Browse the repository at this point in the history
* Add first version of GenAttack

* Add type annotations

* Restore unit tests

* Add docstring

* Fix type annotations

* Add support for JAX models

* Apply attack only to real models

* Refactor code, add unit tests

* Improve stability of unit tests

* Improve test coverage

* Fix type annotation

* Improve coverage

* Refactor utility functions of gen attack
  • Loading branch information
zimmerrol committed Mar 2, 2020
1 parent 3999d43 commit ad7ba73
Show file tree
Hide file tree
Showing 6 changed files with 573 additions and 0 deletions.
1 change: 1 addition & 0 deletions foolbox/attacks/__init__.py
Expand Up @@ -47,6 +47,7 @@
L2BrendelBethgeAttack,
LinfinityBrendelBethgeAttack,
)
from .gen_attack import GenAttack # noqa: F401

# from .blended_noise import LinearSearchBlendedUniformNoiseAttack # noqa: F401
# from .brendel_bethge import ( # noqa: F401
Expand Down
255 changes: 255 additions & 0 deletions foolbox/attacks/gen_attack.py
@@ -0,0 +1,255 @@
from typing import Optional, Any, Tuple, Union
import numpy as np
import eagerpy as ep

from ..devutils import atleast_kd

from ..models import Model

from ..criteria import TargetedMisclassification

from ..distances import linf

from .base import FixedEpsilonAttack
from .base import T
from .base import get_channel_axis
from .base import raise_if_kwargs
import math

from .gen_attack_utils import rescale_images


class GenAttack(FixedEpsilonAttack):
"""A black-box algorithm for L-infinity adversarials. [#Alz18]_
This attack is performs a genetic search in order to find an adversarial
perturbation in a black-box scenario in as few queries as possible.
References:
.. [#Alz18] Moustafa Alzantot, Yash Sharma, Supriyo Chakraborty, Huan Zhang,
Cho-Jui Hsieh, Mani Srivastava,
"GenAttack: Practical Black-box Attacks with Gradient-Free
Optimization",
https://arxiv.org/abs/1805.11090
"""

def __init__(
self,
*,
steps: int = 1000,
population: int = 10,
mutation_probability: float = 0.10,
mutation_range: float = 0.15,
sampling_temperature: float = 0.3,
channel_axis: Optional[int] = None,
reduced_dims: Optional[Tuple[int, int]] = None,
):
self.steps = steps
self.population = population
self.min_mutation_probability = mutation_probability
self.min_mutation_range = mutation_range
self.sampling_temperature = sampling_temperature
self.channel_axis = channel_axis
self.reduced_dims = reduced_dims

distance = linf

def apply_noise(
self,
x: ep.TensorType,
noise: ep.TensorType,
epsilon: float,
channel_axis: Optional[int],
) -> ep.TensorType:
if noise.shape != x.shape and channel_axis is not None:
# upscale noise

noise = rescale_images(noise, x.shape, channel_axis)

return ep.clip(noise + x, -epsilon, +epsilon)

def choice(
self, a: int, size: Union[int, ep.TensorType], replace: bool, p: ep.TensorType
) -> Any:
p = p.numpy()
x = np.random.choice(a, size, replace, p)
return x

def run(
self,
model: Model,
inputs: T,
criterion: TargetedMisclassification,
*,
epsilon: float,
**kwargs: Any,
) -> T:
raise_if_kwargs(kwargs)
x, restore_type = ep.astensor_(inputs)
del inputs, kwargs

N = len(x)

if isinstance(criterion, TargetedMisclassification):
classes = criterion.target_classes
else:
raise ValueError("unsupported criterion")

if classes.shape != (N,):
raise ValueError(
f"expected target_classes to have shape ({N},), got {classes.shape}"
)

noise_shape: Union[Tuple[int, int, int, int], Tuple[int, ...]]
channel_axis: Optional[int] = None
if self.reduced_dims is not None:
if x.ndim != 4:
raise NotImplementedError(
"only implemented for inputs with two spatial dimensions"
" (and one channel and one batch dimension)"
)

if self.channel_axis is None:
maybe_axis = get_channel_axis(model, x.ndim)
if maybe_axis is None:
raise ValueError(
"cannot infer the data_format from the model, please"
" specify channel_axis when initializing the attack"
)
else:
channel_axis = maybe_axis
else:
channel_axis = self.channel_axis % x.ndim

if channel_axis == 1:
noise_shape = (x.shape[1], *self.reduced_dims)
elif channel_axis == 3:
noise_shape = (*self.reduced_dims, x.shape[3])
else:
raise ValueError(
"expected 'channel_axis' to be 1 or 3, got {channel_axis}"
)
else:
noise_shape = x.shape[1:] # pragma: no cover

def is_adversarial(logits: ep.TensorType) -> ep.TensorType:
return ep.argmax(logits, 1) == classes

num_plateaus = ep.zeros(x, len(x))
mutation_probability = (
ep.ones_like(num_plateaus) * self.min_mutation_probability
)
mutation_range = ep.ones_like(num_plateaus) * self.min_mutation_range

noise_pops = ep.uniform(
x, (N, self.population, *noise_shape), -epsilon, epsilon
)

def calculate_fitness(logits: ep.TensorType) -> ep.TensorType:
first = logits[range(N), classes]
second = ep.log(ep.exp(logits).sum(1) - first)

return first - second

n_its_wo_change = ep.zeros(x, (N,))
for step in range(self.steps):
fitness_l, is_adv_l = [], []

for i in range(self.population):
it = self.apply_noise(x, noise_pops[:, i], epsilon, channel_axis)
logits = model(it)
f = calculate_fitness(logits)
a = is_adversarial(logits)
fitness_l.append(f)
is_adv_l.append(a)

fitness = ep.stack(fitness_l)
is_adv = ep.stack(is_adv_l, 1)
elite_idxs = ep.argmax(fitness, 0)

elite_noise = noise_pops[range(N), elite_idxs]
is_adv = is_adv[range(N), elite_idxs]

# early stopping
if is_adv.all():
return restore_type( # pragma: no cover
self.apply_noise(x, elite_noise, epsilon, channel_axis)
)

probs = ep.softmax(fitness / self.sampling_temperature, 0)
parents_idxs = np.stack(
[
self.choice(
self.population,
2 * self.population - 2,
replace=True,
p=probs[:, i],
)
for i in range(N)
],
1,
)

mutations = [
ep.uniform(
x,
noise_shape,
-mutation_range[i].item() * epsilon,
mutation_range[i].item() * epsilon,
)
for i in range(N)
]

new_noise_pops = [elite_noise]
for i in range(0, self.population - 1):
parents_1 = noise_pops[range(N), parents_idxs[2 * i]]
parents_2 = noise_pops[range(N), parents_idxs[2 * i + 1]]

# calculate crossover
p = probs[parents_idxs[2 * i], range(N)] / (
probs[parents_idxs[2 * i], range(N)]
+ probs[parents_idxs[2 * i + 1], range(N)]
)
p = atleast_kd(p, x.ndim)
p = ep.tile(p, (1, *noise_shape))

crossover_mask = ep.uniform(p, p.shape, 0, 1) < p
children = ep.where(crossover_mask, parents_1, parents_2)

# calculate mutation
mutation_mask = ep.uniform(children, children.shape)
mutation_mask = mutation_mask <= atleast_kd(
mutation_probability, children.ndim
)
children = ep.where(mutation_mask, children + mutations[i], children)

# project back to epsilon range
children = ep.clip(children, -epsilon, epsilon)

new_noise_pops.append(children)

noise_pops = ep.stack(new_noise_pops, 1)

# increase num_plateaus if fitness does not improve
# for 100 consecutive steps
n_its_wo_change = ep.where(
elite_idxs == 0, n_its_wo_change + 1, ep.zeros_like(n_its_wo_change)
)
num_plateaus = ep.where(
n_its_wo_change >= 100, num_plateaus + 1, num_plateaus
)
n_its_wo_change = ep.where(
n_its_wo_change >= 100, ep.zeros_like(n_its_wo_change), n_its_wo_change
)

mutation_probability = ep.maximum(
self.min_mutation_probability,
0.5 * ep.exp(math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus),
)
mutation_range = ep.maximum(
self.min_mutation_range,
0.5 * ep.exp(math.log(0.9) * ep.ones_like(num_plateaus) * num_plateaus),
)

return restore_type(self.apply_noise(x, elite_noise, epsilon, channel_axis))

0 comments on commit ad7ba73

Please sign in to comment.