# Visualisation for Random Forest Attack

In [None]:
import multiprocessing
import time

from graphviz import Source
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import make_blobs
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.tree import export_graphviz
from tqdm import trange

from rfa import RandomForestAttack

In [None]:
SEED = 0

In [None]:
X, y = make_blobs(n_samples=300, n_features=2, centers=4,
                  random_state=SEED, cluster_std=1.0)
# Rescaling to [-1, 1]
X_max = np.max(X, axis=0)
X_min = np.min(X, axis=0)
X = 1 - 2 * (X - X_min)/(X_max - X_min)

In [None]:
plt.figure(figsize=(7, 7))
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow')
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.show()

In [None]:
def visualize_classifier(model, X, y, cmap='rainbow'):
    plt.figure(figsize=(7, 7))
    ax = plt.gca()
    
    ax.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cmap, 
               clim=(y.min(), y.max()), zorder=3)
    xlim = (-1.0, 1.0)
    ylim = (-1.0, 1.0)
    
    xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                         np.linspace(*ylim, num=200))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
    
    # Create a color plot with the results
    n_classes = len(np.unique(y))
    contours = ax.contourf(xx, yy, Z, alpha=0.3,
                           levels=np.arange(n_classes + 1) - 0.5,
                           cmap=cmap, zorder=1)

    ax.set(xlim=xlim, ylim=ylim)
    plt.show()

In [None]:
N_TREES = 10
model = RandomForestClassifier(n_estimators=N_TREES, random_state=SEED)
model.fit(X, y)

In [None]:
visualize_classifier(model, X, y)

In [None]:
digraphs = []

for estimator in model.estimators_:
    dot = export_graphviz(estimator,
                feature_names = ['x0', 'x1'],
                class_names = ['0', '1', '2', '3'],
                rounded = True, proportion = False, 
                precision = 2, filled = True)
    digraphs.append(Source(dot))

In [None]:
digraphs[0]

In [None]:
digraphs[1]

In [None]:
print('Accuracy on train set:', model.score(X, y))

In [None]:
SIZE = 100

In [None]:
shuffled_indices = np.random.choice(X.shape[0], size=SIZE, replace=False)
X_test = X[shuffled_indices]
y_test = y[shuffled_indices]
print('Accuracy on test set: {:.2f}%'.format(model.score(X_test, y_test) * 100))

In [None]:
# Hyperparameters
# N_THREADS = multiprocessing.cpu_count()
N_THREADS = 1
EPSILON = 1e-4  # The minimum change to update a feature.
MAX_BUDGET = 0.01 * X.shape[1]   # The max. perturbation is allowed.

# rule : {'least_leaf', 'least_root', 'least_global', 'random'}
PICK_RULE = 'random'

In [None]:
attack = RandomForestAttack(model, max_budget=MAX_BUDGET, epsilon=EPSILON,
                            rule=PICK_RULE, n_threads=N_THREADS)

In [None]:
start = time.time()
X_adv = attack.generate(X_test, y_test)
time_elapsed = time.time() - start
print('Time to complete: {:d}m {:.3f}s'.format(int(time_elapsed // 60), time_elapsed % 60))

In [None]:
y_pred = model.predict(X_test)
acc = np.count_nonzero(y_pred == y_test) / SIZE
print('Accuracy on test set = {:.2f}%'.format(acc*SIZE))

adv_pred = model.predict(X_adv)
acc = np.count_nonzero(adv_pred == y_test) / SIZE
print('Accuracy on adversarial example set = {:.2f}%'.format(acc*SIZE))

In [None]:
l2_norm = np.mean(np.linalg.norm(X_test - X_adv, axis=1))
print('Average l2 norm = {:.3f}'.format(l2_norm))

In [None]:
visualize_classifier(model, X[shuffled_indices], y[shuffled_indices])

In [None]:
visualize_classifier(model, X_adv, y[shuffled_indices])