In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, f1_score

In [3]:
df = pd.read_csv("../data/dataset_cleaned.csv", index_col=0)

In [4]:
X, y = df.drop(columns="attack"), df["attack"]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [7]:
y_pred = rf.predict(X_test)

In [8]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [9]:
print(f"Accuracy sur le test: {accuracy:4f}")
print(f"Precision sur le test: {precision:4f}")
print(f"F1 Score sur le test: {f1:4f}")

Accuracy sur le test: 0.999788
Precision sur le test: 0.999732
F1 Score sur le test: 0.999866


In [10]:
X.columns.shape

(1481,)

In [11]:
X_train.columns.shape

(1481,)

In [12]:
importances = pd.Series(rf.feature_importances_, index=X_train.columns).sort_values(ascending=False)
print(importances.head(10))

system.cpu.dcache.demandMissRate::total           0.059630
system.cpu.dcache.overallMissRate::total          0.042275
system.cpu.dcache.overallMissRate::cpu.data       0.041848
system.cpu.dcache.demandMissRate::cpu.data        0.034587
system.cpu.dcache.demandMshrMissRate::cpu.data    0.029930
system.mem_ctrl.avgPriority_cpu.data::samples     0.028125
system.membus.snoop_filter.hitSingleRequests      0.028028
system.mem_ctrl.avgWrQLen                         0.028012
system.mem_ctrl.dram.totBusLat                    0.027967
system.cpu.dcache.ReadReq.mshrMissRate::total     0.021136
dtype: float64


Le modèle dépend fortement de l’activité de cache L1D + trafic mémoire + comportement du bus.

## Boundary Attack (white-box)

In [19]:
import numpy as np

In [20]:
def attack_rf_boundary(model, x, max_changes=10, eps=1e-3):
    """
    x : échantillon 1D numpy array
    model : RandomForestClassifier sklearn
    Retourne un x_adv qui trompe le modèle si possible
    """

    x_adv = x.copy()
    orig_pred = model.predict([x])[0]

    # On trie les features par importance décroissante
    important_features = np.argsort(model.feature_importances_)[::-1]

    changes = 0

    for f in important_features:
        if changes >= max_changes:
            break

        # Parcourt les arbres pour trouver des thresholds pour cette feature
        thresholds = []
        for tree in model.estimators_:
            tree_ = tree.tree_
            feature = tree_.feature
            thr = tree_.threshold
            thresholds.extend(thr[feature == f])

        if len(thresholds) == 0:
            continue

        # On prend le seuil le plus proche
        nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))

        # On franchit le seuil
        if x_adv[f] < nearest_thr:
            x_adv[f] = nearest_thr + eps
        else:
            x_adv[f] = nearest_thr - eps

        changes += 1

        # Vérifie si le modèle est trompé
        new_pred = model.predict([x_adv])[0]
        if new_pred != orig_pred:
            print(f"Succès ! Feature {f} modifiée, seuil {nearest_thr}")
            return x_adv

    print("Échec de l'attaque : augmenter max_changes ?")
    return x_adv


In [28]:
i = 0
x = X_test.iloc[i]
y = y_test.iloc[i]

x_adv = attack_rf_boundary(rf, x, max_changes=100)

print("Avant :", rf.predict([x])[0])
print("Après :", rf.predict([x_adv])[0])

  nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))
  if x_adv[f] < nearest_thr:
  x_adv[f] = nearest_thr - eps
  nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))
  if x_adv[f] < nearest_thr:
  x_adv[f] = nearest_thr - eps
  nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))
  if x_adv[f] < nearest_thr:
  x_adv[f] = nearest_thr + eps
  nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))
  if x_adv[f] < nearest_thr:
  x_adv[f] = nearest_thr + eps
  nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))
  if x_adv[f] < nearest_thr:
  x_adv[f] = nearest_thr - eps
  nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))
  if x_adv[f] < nearest_thr:
  x_adv[f] = nearest_thr - eps
  nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))
  if x_adv[f] < nearest_thr:
  x_adv[f] = nearest_thr - eps
  nearest_thr = min(thresholds, key=lambda t: abs(x_adv[f] - t))
  if x_adv[f] < nearest_thr:
  x_adv[f] = nearest_thr - eps


Succès ! Feature 378 modifiée, seuil 732125024.0
Avant : 1
Après : 0
