# Robustness of Machine Learning Models using Adversarial Robustness Toolbox

In [1]:
!pip install adversarial-robustness-toolbox



In [107]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [149]:
data_path = './recruitmentdataset-2022-1.3.csv'

In [153]:
df = pd.read_csv(data_path)
df.head(10)

Unnamed: 0,Id,gender,age,nationality,sport,ind-university_grade,ind-debateclub,ind-programming_exp,ind-international_exp,ind-entrepeneur_exp,ind-languages,ind-exact_study,ind-degree,company,decision
0,x8011e,female,24,German,Swimming,70,False,False,False,False,1,True,phd,A,True
1,x6077a,male,26,German,Golf,67,False,True,False,False,2,True,bachelor,A,False
2,x6006e,female,23,Dutch,Running,67,False,True,True,False,0,True,master,A,False
3,x2173b,male,24,Dutch,Cricket,70,False,True,False,False,1,True,master,A,True
4,x6241a,female,26,German,Golf,59,False,False,False,False,1,False,master,A,True
5,x9063d,female,26,Dutch,Chess,63,False,False,False,False,1,True,bachelor,A,True
6,x5785d,female,27,Dutch,Tennis,63,True,True,False,False,2,True,bachelor,A,False
7,x8767c,female,22,Dutch,Swimming,71,False,True,False,False,1,True,master,A,True
8,x6541b,female,28,Dutch,Football,65,True,False,False,True,3,False,bachelor,A,False
9,x3890b,male,24,Dutch,Football,55,True,False,False,True,3,False,master,A,True


In [154]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Replace boolean values with 0 and 1
df = pd.get_dummies(df.drop('Id',axis=1), drop_first=True)

df = df.replace({False: 0, True: 1})

# Min-max scaling of columns age, ind-university_grade and ind-languages
df['age'] = (df['age'] - df['age'].min()) / (df['age'].max() - df['age'].min())
df['ind-university_grade'] = (df['ind-university_grade'] - df['ind-university_grade'].min()) / (df['ind-university_grade'].max() - df['ind-university_grade'].min())
df['ind-languages'] = (df['ind-languages'] - df['ind-languages'].min()) / (df['ind-languages'].max() - df['ind-languages'].min())

In [155]:
# Display the first few rows of the dataframe
df.head(10)

Unnamed: 0,age,ind-university_grade,ind-debateclub,ind-programming_exp,ind-international_exp,ind-entrepeneur_exp,ind-languages,ind-exact_study,decision,gender_male,...,sport_Golf,sport_Rugby,sport_Running,sport_Swimming,sport_Tennis,ind-degree_master,ind-degree_phd,company_B,company_C,company_D
0,0.272727,0.757576,0,0,0,0,0.333333,1,1,0,...,0,0,0,1,0,0,1,0,0,0
1,0.454545,0.666667,0,1,0,0,0.666667,1,0,1,...,1,0,0,0,0,0,0,0,0,0
2,0.181818,0.666667,0,1,1,0,0.0,1,0,0,...,0,0,1,0,0,1,0,0,0,0
3,0.272727,0.757576,0,1,0,0,0.333333,1,1,1,...,0,0,0,0,0,1,0,0,0,0
4,0.454545,0.424242,0,0,0,0,0.333333,0,1,0,...,1,0,0,0,0,1,0,0,0,0
5,0.454545,0.545455,0,0,0,0,0.333333,1,1,0,...,0,0,0,0,0,0,0,0,0,0
6,0.545455,0.545455,1,1,0,0,0.666667,1,0,0,...,0,0,0,0,1,0,0,0,0,0
7,0.090909,0.787879,0,1,0,0,0.333333,1,1,0,...,0,0,0,1,0,1,0,0,0,0
8,0.636364,0.606061,1,0,0,1,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0.272727,0.30303,1,0,0,1,1.0,0,1,1,...,0,0,0,0,0,1,0,0,0,0


In [156]:
X = df.drop(['decision'], axis=1)
y = df['decision']

# One-hot encode the target variable (one column for each class)
y = pd.get_dummies(y)
y = y.replace({False: 0, True: 1})

0       1
1       0
2       0
3       1
4       1
       ..
3995    0
3996    0
3997    0
3998    0
3999    0
Name: decision, Length: 4000, dtype: int64
      0  1
0     0  1
1     1  0
2     1  0
3     0  1
4     0  1
...  .. ..
3995  1  0
3996  1  0
3997  1  0
3998  1  0
3999  1  0

[4000 rows x 2 columns]


In [168]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

## Support Vector Classifier and Fast Gradient Method Attack

In [169]:
from art.estimators.classification import SklearnClassifier
model = SVC(C=1.0, kernel='rbf', gamma='auto')

# Create a classifier from the trained model
classifier = SklearnClassifier(model=model, clip_values=(0, 1))

y_train = np.reshape(y_train, (-1, 2))

classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print('Accuracy on test data:', accuracy_score(y_test, y_pred))

(3200, 24) (3200, 2)
[[1 0]
 [0 1]
 [1 0]
 ...
 [1 0]
 [0 1]
 [1 0]]
Accuracy on test data: 0.7925


In [171]:
from art.attacks.evasion import FastGradientMethod

# Create the adversarial attack
attack = FastGradientMethod(estimator=classifier, eps=0.2)

# Generate the adversarial test examples
X_test_adv = attack.generate(x=X_test)

# Evaluate the classifier on the adversarial test examples
y_pred_adv = classifier.predict(X_test_adv)
print('Accuracy on adversarial test data:', accuracy_score(y_test, y_pred_adv))

Accuracy on adversarial test data: 0.555


We can see that the accuracy on the adversarial test data goes down to 0.55 from 0.79. This means that the SVC model is not robust to the Fast Gradient Method attack with an epsilon of 0.2. Increasing epsilon will lower the accuracy even more, but then the attacks will be more noticeable.

In [160]:
#Metrics to study the robustness of the model (CLEVER, loss sensitivity, empirical robustness)
from art.metrics import loss_sensitivity, empirical_robustness
loss_sensitivity = loss_sensitivity(classifier, X_test, y_test)
print('Loss sensitivity:', loss_sensitivity)


emp_robustness = empirical_robustness(classifier, X_test, 'fgsm')
print('Empirical robustness:', emp_robustness)

Loss sensitivity: 2.604759
Empirical robustness: 0.15218254195023623


Here, we computed two metrics to study the robustness of the model. The first one is the loss sensitivity, which measures the sensitivity of the loss function to small perturbations in the input data. It is equal to the larget variation of the output of a model under a small perturbation of the input. We obtained a value of 2.6, but it is hard to interpret it without a reference.

The second metric is the empirical robustness, which measures the minimum perturbation that needs to be applied to the input data to change the output of the model. The value obtained is 0.15, which is quite a good result. Concretely, it means that if an attacker wants to change the output of the model, they need to apply a sufficiently large perturbation to the input data, but not too large to be noticeable.

## Decision Tree Classifier and Decision Tree Attack

In [161]:
y_test = y_test.argmax(axis=1)

In [162]:
# Scikit-learn tree classifier
from sklearn.tree import DecisionTreeClassifier
from art.estimators.classification.scikitlearn import ScikitlearnDecisionTreeClassifier

# Create a decision tree classifier
model = DecisionTreeClassifier()

# Create a classifier from the trained model
classifier = ScikitlearnDecisionTreeClassifier(model=model)

classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
print('Accuracy on test data:', accuracy_score(y_test, y_pred))

Accuracy on test data: 0.80875


In [163]:
from art.attacks.evasion import DecisionTreeAttack

# Create the adversarial attack
attack = DecisionTreeAttack(classifier)

# Generate the adversarial test examples
X_test_adv = attack.generate(x=X_test)

# Evaluate the classifier on the adversarial test examples
y_pred_adv = classifier.predict(X_test_adv)

# Replace 0 and 1 with False and True
y_pred_adv = np.argmax(y_pred_adv, axis=1)
print('Accuracy on adversarial test data:', accuracy_score(y_test, y_pred_adv))

Decision tree attack:   0%|          | 0/800 [00:00<?, ?it/s]

Accuracy on adversarial test data: 0.19125


Here, we observe an accuracy on the adversarial test data of 0.19, which is a very poor result compared to the initial accuracy of 0.81. This can be explained by the fact that our dataset is not very large, and that the designed attack is inherently designed to be effective against decision trees.

## XGBoost Classifier and Robustness Verification

In [164]:
from xgboost import XGBClassifier

# Create a decision tree classifier
model = XGBClassifier(n_estimators=4, max_depth=6)
model.fit(X_train, np.argmax(y_train, axis=1))

# Evaluate the classifier on the test data
y_pred = model.predict(X_test)
print('Accuracy on test data:', accuracy_score(y_test, y_pred))

Accuracy on test data: 0.84


As a final study of robustness, we study the robustness verification of the XGBoost model using the RobustnessVerificationTreeModelsCliqueMethod. This method is based on the paper "Robustness Verification of Tree Models" (https://arxiv.org/abs/1906.03849) and is designed to verify the robustness of tree models to adversarial attacks. It is based on the computation of the average bound and the verified error at a given epsilon. The average bound is the average of the maximum perturbation that can be applied to the input data without changing the output of the model. The verified error is the maximum error that can be obtained by applying a perturbation of epsilon to the input data.

In [166]:
from art.estimators.classification import XGBoostClassifier
from art.metrics import RobustnessVerificationTreeModelsCliqueMethod

model = XGBClassifier(n_estimators=4, max_depth=6)
model.fit(X_train, np.argmax(y_train, axis=1))

classifier = XGBoostClassifier(model=model, nb_features=13, nb_classes=2)

rt = RobustnessVerificationTreeModelsCliqueMethod(classifier=classifier)
average_bound, verified_error = rt.verify(x=X_test, y=y_test, eps_init=0.3, nb_search_steps=10, max_clique=2,
                                          max_level=2)

print('Average bound:', average_bound)
print('Verified error at eps:', verified_error)

Decision tree verification:   0%|          | 0/800 [00:00<?, ?it/s]

Average bound: 0.11629101562500047
Verified error at eps: 0.735


The average bound is 0.116, which is of the same order of magnitude as the empirical robustness of the SVC model. 

The verified error at $\epsilon=0.3$ is 0.735, which we consider to be a good result. Indeed, the value of $\epsilon$ is quite large, making the attack noticeable, and the verified error is on the other hand quite small, meaning that the model is robust to this attack.