# Model Extraction Attacks

# Libs and Confs

In [None]:
!pip install tqdm
!pip install shap
!pip install ipywidgets
!pip install -U matplotlib
!pip install sklearn

In [None]:
import numpy as np
import pandas as pd
import os
from joblib import load, dump   # load models
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score, precision_score, confusion_matrix
import tqdm
import time
import shap

In [None]:
# Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.mixture import BayesianGaussianMixture
from sklearn.tree import DecisionTreeClassifier

In [None]:
# conf
actual_directory = os.getcwd()
dataset_path = os.path.join(actual_directory, "CIC-IDS-2017", "GeneratedLabelledFlows", "TrafficLabelling")

# Data manipulation - CIC-IDS2017

Creating samples to be used during the experimentation

## CIC-IDS2017

https://www.unb.ca/cic/datasets/ids-2017.html

Iman Sharafaldin, Arash Habibi Lashkari, and Ali A. Ghorbani, “Toward Generating a New Intrusion Detection Dataset and Intrusion Traffic Characterization”, 4th International Conference on Information Systems Security and Privacy (ICISSP), Portugal, January 2018

## Combined Data - CIC-IDS2017

One against others, if we merge all dataset. Every attack was considered 1 and benign flow was considered 0

In [None]:
''' Function to combine the csv files '''
def combine_dataset(path):
    filenames = [file for file in os.listdir(path)]
    dataframes = []

    print('Starting to combine datasets. Wait a moment...')
    for file in filenames:
        try:
            df = pd.read_csv(os.path.join(path, file), encoding='utf-8')
        except UnicodeDecodeError:
            df = pd.read_csv(os.path.join(path, file), encoding='ISO-8859-1')
            
        dataframes.append(df)

    combined_df = pd.concat(dataframes, ignore_index=True)

    combined_df.to_csv(os.path.join(path, 'combined_dataset.csv'), index=False)
    print('Datasets combined')

In [None]:
''' 
This block of code receives the merged CIC-IDS-2017 dataset and splits it in groups of data:
X_data_model / y_data_model : data used to train the target models
X_data_extraction / y_data_extraction: data sample to be used during model extraction attacks

So X_data_model / y_data_model was splited in X_train, X_test, y_train, y_test to train and validade target models.
'''

data = pd.read_csv(os.path.join(dataset_path, "combined_dataset.csv"), low_memory=False)
# Pre-processing
data = data.drop(' Destination IP', axis=1)
data = data.drop('Flow ID', axis=1)
data = data.drop(' Source IP', axis=1)
data = data.drop(' Timestamp', axis=1)
data = data.drop(' Source Port', axis=1)
data = data.drop(' Destination Port', axis=1)
data = data.drop(' Protocol', axis=1)

# Replace infinite values to np.nan values
data.replace([np.inf, -np.inf], np.nan, inplace=True)

# dropping unlabeled rows
data = data.dropna(subset=[' Label'])

# changing label values. Malicious: 1, Benign: 0
data[' Label'] = data[' Label'].apply(lambda x: 0 if x=='BENIGN' else 1)

# Missing Values
missing_values = (( data.isnull().sum() ))
missing_values[missing_values > 0]

data['Flow Bytes/s'].fillna(data['Flow Bytes/s'].median(), inplace=True)
data[' Flow Packets/s'].fillna(data[' Flow Packets/s'].median(), inplace=True)

missing_values = (( data.isnull().sum() ))
missing_values[missing_values > 0]
# split and normalize data
X = data.drop(' Label', axis=1)
y = data[' Label']

#scaler = MinMaxScaler()
scaler = MinMaxScaler()

X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
# split data into data_model, data_extraction. So split data_model into data_model_train and data_model_test
X_data_model , X_data_extraction, y_data_model, y_data_extraction = train_test_split(X_scaled, y, test_size=0.1, random_state=42, stratify=y)
X_train, X_test, y_train, y_test = train_test_split(X_data_model, y_data_model, test_size=0.3, random_state=42, stratify=y_data_model)
print(f'Data created\n X_data_mode shape: {X_data_model.shape}\n X_data_extraction shape: {X_data_extraction.shape}') 


# Training Target Models - CIC-IDS2017

## Target Models

### Linear Model

#### Logistic Regression

In [None]:
model_file = os.path.join(os.getcwd(), "LogisticRegression.joblib")
if os.path.exists(model_file):
    print('Model already trained')
    lg_model = load(os.path.join(actual_directory, "LogisticRegression.joblib"))

else:
    lg_model = LogisticRegression(solver='newton-cholesky',random_state=42, n_jobs=-1)
    try:
        lg_model.fit(X_train, y_train)
    except Exception as e:
        print(f"We had the {e} error. The model wasn't trained")
    else:
        print(f'Model Trained')
        # Saving the model
        dump(lg_model, os.path.join(actual_directory, "LogisticRegression.joblib"))
predict = lg_model.predict(X_test)
predict_proba = lg_model.predict_proba(X_test)[:,1]

lg_cr = (classification_report(y_test, predict, target_names=['Benign flow', 'Malicious flow']),roc_auc_score(y_test, predict_proba), lg_model.score(X_test, y_test)) #saving the classification report to each target model
print(lg_cr[0])
print()
print(f'AUC-ROC: {lg_cr[1]}')
print(f'Accuracy: {lg_cr[2]}')

### Non-Linear Models

#### Random Forest Classifier

In [None]:
model_file = os.path.join(os.getcwd(), "RandomForestClassifier.joblib")
if os.path.exists(model_file):
    print('Model already trained')
    rfc_model = load(os.path.join(actual_directory, "RandomForestClassifier.joblib"))

else:
    rfc_model = RandomForestClassifier(random_state=42)
    try:
        rfc_model.fit(X_train, y_train)
    except Exception as e:
        print(f"We had the {e} error. The model wasn't trained")
    else:
        print(f'Model Trained')
        # Saving the model
        dump(rfc_model, os.path.join(actual_directory, "RandomForestClassifier.joblib"))
predict = rfc_model.predict(X_test)
predict_proba = rfc_model.predict_proba(X_test)[:,1]

rfc_cr = (classification_report(y_test, predict, target_names=['Benign flow', 'Malicious flow']),roc_auc_score(y_test, predict_proba), rfc_model.score(X_test, y_test)) #saving the classification report to each target model
print(rfc_cr[0])
print()
print(f'AUC-ROC: {rfc_cr[1]}')
print(f'Accuracy: {rfc_cr[2]}')

#### KNN

In [None]:
model_file = os.path.join(os.getcwd(), "KNNClassifier.joblib")
if os.path.exists(model_file):
    print('Model already trained')
    knn_model = load(os.path.join(actual_directory, "KNNClassifier.joblib"))

else:
    knn_model = KNeighborsClassifier(n_neighbors = 5)
    try:
        knn_model.fit(X_train, y_train)
    except Exception as e:
        print(f"We had the {e} error. The model wasn't trained")
    else:
        print(f'Model Trained')
        # Saving the model
        dump(knn_model, os.path.join(actual_directory, "KNNClassifier.joblib"))
predict = knn_model.predict(X_test)
predict_proba = knn_model.predict_proba(X_test)[:,1]

knn_cr = (classification_report(y_test, predict, target_names=['Benign flow', 'Malicious flow']),roc_auc_score(y_test, predict_proba), knn_model.score(X_test, y_test)) #saving the classification report to each target model
print(knn_cr[0])
print()
print(f'AUC-ROC: {knn_cr[1]}')
print(f'Accuracy: {knn_cr[2]}')

#### Quadratic Discriminant Analysis (QDA)

In [None]:
model_file = os.path.join(os.getcwd(), "QDAClassifier.joblib")
if os.path.exists(model_file):
    print('Model already trained')
    qda_model = load(os.path.join(actual_directory, "QDAClassifier.joblib"))

else:
    qda_model = QuadraticDiscriminantAnalysis()

    try:
        qda_model.fit(X_train, y_train)
    except Exception as e:
        print(f"We had the {e} error. The model wasn't trained")
    else:
        print(f'Model Trained')
        # Saving the model
        dump(qda_model, os.path.join(actual_directory, "QDAClassifier.joblib"))
predict = qda_model.predict(X_test)
predict_proba = qda_model.predict_proba(X_test)[:,1]

qda_cr = (classification_report(y_test, predict, target_names=['Benign flow', 'Malicious flow']),roc_auc_score(y_test, predict_proba), qda_model.score(X_test, y_test)) #saving the classification report to each target model
print(qda_cr[0])
print()
print(f'AUC-ROC: {qda_cr[1]}')
print(f'Accuracy: {qda_cr[2]}')

#### ADABoost

In [None]:
model_file = os.path.join(os.getcwd(), "ADABoostClassifier.joblib")
if os.path.exists(model_file):
    print('Model already trained')
    ada_model = load(os.path.join(actual_directory, "ADABoostClassifier.joblib"))

else:
    ada_model = AdaBoostClassifier()
    try:
        ada_model.fit(X_train, y_train)
    except Exception as e:
        print(f"We had the {e} error. The model wasn't trained")
    else:
        print(f'Model Trained')
        # Saving the model
        dump(ada_model, os.path.join(actual_directory, "ADABoostClassifier.joblib"))
predict = ada_model.predict(X_test)
predict_proba = ada_model.predict_proba(X_test)[:,1]

ada_cr = (classification_report(y_test, predict, target_names=['Benign flow', 'Malicious flow']),roc_auc_score(y_test, predict_proba), ada_model.score(X_test, y_test)) #saving the classification report to each target model
print(ada_cr[0])
print()
print(f'AUC-ROC: {ada_cr[1]}')
print(f'Accuracy: {ada_cr[2]}')

#### MLP

In [None]:
model_file = os.path.join(os.getcwd(), "MLPClassifier.joblib")
if os.path.exists(model_file):
    print('Model already trained')
    mlp_model = load(os.path.join(actual_directory, "MLPClassifier.joblib"))

else:
    mlp_model = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu', solver='sgd', learning_rate_init=0.0001, max_iter=10, random_state=42, verbose=True)
    
    try:
        mlp_model.fit(X_train, y_train)
    except Exception as e:
        print(f"We had the {e} error. The model wasn't trained")
    else:
        print(f'Model Trained')
        # Saving the model
        dump(mlp_model, os.path.join(actual_directory, "MLPClassifier.joblib"))
predict = mlp_model.predict(X_test)
predict_proba = mlp_model.predict_proba(X_test)[:,1]

mlp_cr = (classification_report(y_test, predict, target_names=['Benign flow', 'Malicious flow']),roc_auc_score(y_test, predict_proba), mlp_model.score(X_test, y_test)) #saving the classification report to each target model
print(mlp_cr[0])
print()
print(f'AUC-ROC: {mlp_cr[1]}')
print(f'Accuracy: {mlp_cr[2]}')

#### Logistic Regression

In [None]:
model_file = os.path.join(os.getcwd(), "LogisticRegression_dos.joblib")
if os.path.exists(model_file):
    print('Model already trained')
    lg_dos_model = load(os.path.join(actual_directory, "LogisticRegression_dos.joblib"))

else:
    lg_dos_model = LogisticRegression(solver='newton-cholesky',random_state=42, n_jobs=-1)
    try:
        lg_dos_model.fit(X_dos_train, y_dos_train)
    except Exception as e:
        print(f"We had the {e} error. The model wasn't trained")
    else:
        print(f'Model Trained')
        # Saving the model
        dump(lg_dos_model, os.path.join(actual_directory, "LogisticRegression_dos.joblib"))
predict = lg_dos_model.predict(X_dos_test)
predict_proba = lg_dos_model.predict_proba(X_dos_test)[:,1]

lg_dos_cr = (classification_report(y_dos_test, predict, target_names=['Benign flow', 'Malicious flow']),roc_auc_score(y_dos_test, predict_proba), lg_dos_model.score(X_dos_test, y_dos_test)) #saving the classification report to each target model
print(lg_dos_cr[0])
print()
print(f'AUC-ROC: {lg_dos_cr[1]}')
print(f'Accuracy: {lg_dos_cr[2]}')

# Complementary Functions

## Classification API

In [None]:
def features_importance(model_file, n):
    '''
    Return top n features of a Forest model
    :param model_file: name of the saved model
    :param n: top n features
    '''
    
    a = load(model_file)
    a_importances = a.feature_importances_
    a_features_df = pd.DataFrame({
        'Feature': (X_test.columns), 
        'Importance': a_importances
    })
    a_features_df = a_features_df.sort_values(by='Importance', ascending=False)
    
    return a_features_df.head(n)

In [None]:
def classify_api(target, features, threshold=0.5, type='hard'):
    '''
    This function opens a model previously saved and uses it to classify some samples.
    you can pass the feature line or a matrix of features, each sample a line
    
    : target: model used to classify feature's samples. This code use scikit way of predict 
    : features: feature vector to classify in pd Dataset format
    : threshold: to use models which use threshold
    : type: soft -> output vector with class and other information; hard -> output with only class
    '''
    actual_directory = os.getcwd()
    target_file = os.path.join(actual_directory, target)
    if os.path.exists(target_file):
        # loading specific model (it needs to get the model file name to be opened)
        target_model = load(os.path.join(actual_directory, target))
    else:
        print('Model file not found!')
        return -1
    
    prediction = target_model.predict(features)
    output = []

    for result in prediction:

        if result <= threshold:
            if type == 'soft':
                output.append((result, 0)) #benign
            elif type == 'hard':
                output.append(0)
            else:
                output.append(-1) #error
        else:
            if type == 'soft':
                output.append((result, 1)) #malicious
            elif type == 'hard':
                output.append(1)
            else:
                output.append(-1) #error

    if type == 'soft':
        return pd.Series(output, dtype=object)
    elif type == 'hard':
        return pd.Series(output, dtype=np.int64)
    else:
        return -1

# Evaluation

## Explainability-Based Feature Agreement (EBFA)

We are using EBFA metrics to evaluate if target model and surrogate model are deciding their decisions based on same logics

### Using shap

In [None]:
''' That version is not a black-box version, despite of the version presented in the article '''
def ebfa(target, surrogate, X_train, y_train, X_surrogate_train, y_surrogate_train, X_test, y_test, k=10, p=0.0001):
    '''
    EBFA (Explainability-Based Feature Agreement) Calculation between two models.
    This function calculates the top k important features of target model and the surrogate model, and the realation between them.
    The result is between 0 and 1. Next to 1 indicates that surrogate model and target model works with the same way.

    As SHAP is really expensive, we are going to use part of the train dataset, used to train the model.

    :param target: Path to the target model file.
    :param surrogate: Path to the surrogate model file.
    :param X_train: The dataset for which SHAP values are computed on target model.
    :param X_surrogate_train: The dataset for which SHAP values are computed on surrogate model.
    :param k: Number of top features to consider for EBFA.
    :param p: Size of the subsample of X_train
    :return: Average EBFA score across all instances.
    '''
    
    actual_directory = os.getcwd()
    target_file = os.path.join(actual_directory, target)
    surrogate_file = os.path.join(actual_directory, surrogate)
    if os.path.exists(target_file):
        # loading specific model (it needs to get the model file name to be opened)
        target_model = load(target_file)
    else:
        print('Target Model not found!')
        return -1
        
    if os.path.exists(surrogate_file):
        # loading specific model (it needs to get the model file name to be opened)
        surrogate_model = load(surrogate_file)
    else:
        print('Surrogate Model not found!')
        return -1

    # subsample used to calculate SHAP of TARGET model
    X_train_sample, _, y_train_sample, _ = train_test_split(X_train, y_train, train_size=p, stratify=y_train)
    
    if len(X_surrogate_train) > 800:
        surrogate_proportion = 0.1   
    else:
        surrogate_proportion = 0.5
    X_surrogate_train_sample, _, y_surrogate_train_sample, _ = train_test_split(X_surrogate_train, y_surrogate_train, train_size=surrogate_proportion, stratify=y_surrogate_train)

    _, X_test_sample, _, y_test_sample = train_test_split(X_test, y_test, test_size=0.01, stratify=y_test)
    
    # Create SHAP explainers
    #explainer_1 = shap.KernelExplainer(target_model.predict, shap.sample(X_train_sample, 100))
    #explainer_2 = shap.KernelExplainer(surrogate_model.predict, shap.sample(X_surrogate_train_sample, 100))

    explainer_1 = shap.KernelExplainer(target_model.predict, shap.kmeans(X_train_sample,10))
    #explainer_2 = shap.KernelExplainer(surrogate_model.predict, shap.kmeans(X_train_sample,10)) # somente para teste -> deletar

    explainer_2 = shap.KernelExplainer(surrogate_model.predict, shap.kmeans(X_surrogate_train_sample, 10))  #_> comentar para teste

    # Calculate SHAP values
    shap_values_1 = explainer_1.shap_values(X_test_sample)
    shap_values_2 = explainer_2.shap_values(X_test_sample)
    
    # Calculate EBFA for each instance
    ebfa_scores = []

    importance_1 = ( np.sum(np.abs(shap_values_1), axis=1) / shap_values_1.shape[0] )
    importance_2 = ( np.sum(np.abs(shap_values_2), axis=1) / shap_values_2.shape[0] )

    # Determine top k features and convert indices to tuples
    top_feats_1 = set(tuple(np.argsort(-importance_1))[:k])
    top_feats_2 = set(tuple(np.argsort(-importance_2))[:k])

    # Calculate intersection and EBFA score
    intersection = top_feats_1.intersection(top_feats_2)
    ebfa_score = len(intersection) / k
    print('Average EBFA:', ebfa_score)
    return ebfa_score
    # we can return the intersection feature list, return (ebfa_score, intersection). 




# Model Stealing attacks

## Attack - Query only - Baseline 1

In [None]:
def substitute_model_a1(target, X, y, X_train, y_train, detection_variation=0.05, threshold=0.5, surrogate_algo='RandomForestClassifier'):
    '''
    This attacks works only sending some real samples to classifier and retrieving the label. With these pars, we're going to train a substitute model
    :Param X: Attack data
    :Param y: Attack label data
    :Param X_train: The data used to train the target model. This is not a black-box phase
    :Param y_train: The labels of data used to train the target model.
    '''

    # config
    max_iter = 600 # max iterations to avoid an eternal loop
    num_samples = 10 #stating number of each class sample. eg: 10 benign and 10 malicious samples
    surrogate_file_name = 'surrogate_model_a1_'+target 
    substitute_model_path = os.path.join(os.getcwd(), surrogate_file_name)
    
    # reset unpaired index and drop unlabeled rows
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    label_index = (y != -1)
    X_train = X[label_index]
    y_train = y[label_index]

    # creating subsamples (index)
    index_0 = (y <= threshold)
    index_1 = (y > threshold)
    

    # test samples to be used in all of testes, including to calculate prediction value of the target model
    test_index_0 = np.random.choice(np.where(index_0)[0], 500, replace=False)
    test_index_1 = np.random.choice(np.where(index_1)[0], 500, replace=False)
    test_sample = np.concatenate((test_index_0, test_index_1))
    np.random.shuffle(test_sample)
    X_test = X_train.iloc[test_sample]
    y_test = y_train.iloc[test_sample]

    # Removing test data from original dataset, avoiding data leak
    X_train = X_train.drop(test_sample, axis=0).reset_index(drop=True)
    y_train = y_train.drop(test_sample, axis=0).reset_index(drop=True)

    # re-calculating the index without the test samples
    index_0 = (y_train <= threshold)   # index_0 - index_0(test)
    index_1 = (y_train > threshold)    # index_1 - index_1(test)

    # target prediction value -> This value could be added to the query number, or we could consider that attacks were done before, out of attack context.
    # if hard:
    target_classify = classify_api(target, X_test, threshold=0.5, type='hard')
    detection_rate = np.mean(target_classify.to_numpy() == y_test.to_numpy())
    # if soft, comment line above and uncomment line below
    #detection_rate = np.mean(classify_api(target, X_test, threshold=0.5, type='hard')[:,0] == y_test)
    
    for iter_count in tqdm.tqdm(range(max_iter), desc='Substitute model training'):
        print(f'Iteration {iter_count + 1}: Number of samples = {num_samples * 2}')
        balanced_sample_0 = np.random.choice(np.where(index_0)[0], num_samples, replace=False)
        balanced_sample_1 = np.random.choice(np.where(index_1)[0], num_samples, replace=False)
        balanced_sample = np.concatenate((balanced_sample_0, balanced_sample_1))
        np.random.shuffle(balanced_sample)

        X_subsample = X_train.iloc[balanced_sample]
        y_labels_subsample = y_train.iloc[balanced_sample]
        y_subsample = classify_api(target, X_subsample, threshold=0.5, type='hard')

        
        # Training the substitute model
        if surrogate_algo == 'RandomForestClassifier':
            surrogate_model = RandomForestClassifier(random_state=38)
        elif surrogate_algo == 'KNNClassifier':
            surrogate_model = KNeighborsClassifier(n_neighbors = 5)
        elif surrogate_algo == 'QDAClassifier':
            surrogate_model = QuadraticDiscriminantAnalysis()
        elif surrogate_algo == 'ADABoostClassifier':
            surrogate_model = AdaBoostClassifier(random_state=38)
        elif surrogate_algo == 'LogisticRegression':
            surrogate_model = LogisticRegression(random_state=38)
        elif surrogate_algo == 'MLPClassifier':
            surrogate_model = MLPClassifier(hidden_layer_sizes=(154, 77), activation='relu', solver='sgd', learning_rate_init=0.0001, max_iter=20, random_state=38, verbose=True)
        else:
            print(f"Option {surrogate_algo} isn't within algorithms options.")
            break
        
        surrogate_model.fit(X_subsample, y_subsample)
        dump(surrogate_model, "surrogate_model.joblib")
        
        surrogate_model_detection_rate = np.mean(classify_api("surrogate_model.joblib", X_test, threshold=0.5, type='hard').to_numpy() == y_test.to_numpy())
        print("=======================================================================================================")
        print(f'Detection rate of target model: {detection_rate}')
        print(f'Detection rate of surrogate model: {surrogate_model_detection_rate:.4f}')
        print("=======================================================================================================")
        

        temp_file = os.path.join(os.getcwd(), "surrogate_model.joblib")
        if os.path.exists(temp_file):
            os.remove(temp_file)

        if abs(surrogate_model_detection_rate - detection_rate) <= detection_variation:
            print("Saving substitute model...")
 
            dump(surrogate_model, substitute_model_path)
            print(f'Convergence achieved after {iter_count +1}. Saving model at: {substitute_model_path}')
            break

        # if continuing, the number of samples is enlarged
        num_samples += 2
        if num_samples > len(index_0) or num_samples > len(index_1):
            print(f"Process finished after {iter_count +1}. The convergence wasn't achieved.")
            break
    print('Calculating EBFA...')
    # this phase is not black-box because it needs the train dataset of target model
    ave_ebfa = ebfa(target, substitute_model_path, X_train, y_train, X_subsample, y_subsample, X_test, y_test, k=15, p=0.01) 
    #confusion_matrix_target = confusion_matrix(y_test, target_classify)
    #confusion_matrix_surrogate = confusion_matrix(y_labels_subsample, y_subsample)
    return (num_samples * 2, surrogate_model_detection_rate, ave_ebfa)

    

## Attack - Data Augmentation

In this type of attacks, we are using some augmentation technique to enlarge number of data.

### Augmentation Techniques

In [None]:
def linear_interpolation(data, num_samples, lambda_):
    synthetic_data = []

    for _ in range(num_samples):
        # choose random two numbers
        idx1, idx2 = np.random.choice(len(data), 2, replace=False)
        point1, point2 = data.iloc[idx1], data.iloc[idx2]

        if (lambda_):
            factor = lambda_
        else:
            factor = np.random.uniform(0,1)

        new_point = factor * point1 + (1 - factor) * point2
        synthetic_data.append(tuple(new_point))
    new_data = np.array(list(set(synthetic_data))) # set for avoiding repetition

    return new_data

In [None]:
def gaussian_noise(data, noise_level=0.01):
    noise = np.random.normal(0, noise_level * np.std(data, axis=0), data.shape)
    new_data = data + noise

    return new_data

In [None]:
def fake_gradient(model, data, noise_level):
    # inject noise to data (any method. In this case we're using gaussian_noise function before)
    noisy_data = gaussian_noise(data, noise_level)

    original_pred = model.predict(data)
    noisy_pred = model.predict(noisy_data)

    # difference value
    deltas = noisy_pred - original_pred 

    fake_gradient_direction = np.sign(deltas)
    fake_gradient_direction_expanded = np.tile(fake_gradient_direction[:, np.newaxis], (1, data.shape[1]))

    synthetic_point = data + fake_gradient_direction_expanded * noise_level

    return np.array(synthetic_point)

### Data augmented attack - Baseline 2

In [None]:
def substitute_model_a2(target, X, y, X_train_target, y_train_target, lambda_, augment_count=2, detection_variation=0.03, threshold=0.5, mode='li', surrogate_algo='RandomForestClassifier'):
    '''

    :Param X: Attack data
    :Param y: Attack label data
    :Param X_train: The data used to train the target model. This is not a black-box phase
    :Param y_train: The labels of data used to train the target model.
    '''

    # config
    max_iter = 600 # max iterations to avoid an eternal loop
    num_samples = 10 #stating number of each class sample. eg: 5 benign and 5 malicious samples
    file_name = 'surrogate_model_a2_'+target 
    substitute_model_path = os.path.join(os.getcwd(), file_name)
    
    # reset unpaired index and drop unlabeled rows
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    label_index = (y != -1)
    X_train = X[label_index]
    y_train = y[label_index]
    #classify_api(target, X_subsample, threshold=0.5, type='hard')

    # creating subsamples (index)
    index_0 = (y <= threshold)
    index_1 = (y > threshold)
    

    # test samples to be used in all of testes, including to calculate prediction value of the target model
    test_index_0 = np.random.choice(np.where(index_0)[0], 500, replace=False)
    test_index_1 = np.random.choice(np.where(index_1)[0], 500, replace=False)
    test_sample = np.concatenate((test_index_0, test_index_1))
    np.random.shuffle(test_sample)
    X_test = X_train.iloc[test_sample]
    y_test = y_train.iloc[test_sample]

    # Removing test data from original dataset, avoiding data leak
    X_train = X_train.drop(test_sample, axis=0).reset_index(drop=True)
    y_train = y_train.drop(test_sample, axis=0).reset_index(drop=True)

    # re-calculating the index without the test samples
    index_0 = (y_train <= threshold)   # index_0 - index_0(test)
    index_1 = (y_train > threshold)    # index_1 - index_1(test)

    # target prediction value -> This value could be added to the query number, or we could consider that attacks were done before, out of attack context.
    # if hard:
    target_classify = classify_api(target, X_test, threshold=0.5, type='hard')
    detection_rate = np.mean(target_classify.to_numpy() == y_test.to_numpy())
    # if soft, comment line above and uncomment line below
    #detection_rate = np.mean(classify_api(target, X_test, threshold=0.5, type='hard')[:,0] == y_test)

    for iter_count in tqdm.tqdm(range(max_iter), desc='Substitute model training'):
        print(f'Iteration {iter_count + 1}: Number of samples = {num_samples * 2}')
        balanced_sample_0 = np.random.choice(np.where(index_0)[0], num_samples, replace=False)
        balanced_sample_1 = np.random.choice(np.where(index_1)[0], num_samples, replace=False)
        balanced_sample = np.concatenate((balanced_sample_0, balanced_sample_1))
        np.random.shuffle(balanced_sample)

        #creating REAL-DATA subsampling
        X_subsample_real = X_train.iloc[balanced_sample]
        y_subsample_real = classify_api(target, X_subsample_real, threshold=0.5, type='hard')
        #y_subsample_real = y_train.iloc[balanced_sample]
        
        #creating synthetic-data subsampling
        if mode == 'li': #linear interpolation
            synthetic_data_0 = linear_interpolation(X_train.iloc[balanced_sample_0], (int(round(num_samples * augment_count))), lambda_)
            synthetic_data_1 = linear_interpolation(X_train.iloc[balanced_sample_1], (int(round(num_samples * augment_count))), lambda_)
            print(f'Creating {int(len(synthetic_data_0)) + int(len(synthetic_data_1))} synthetic samples using Linear Interpolation')

        elif mode == 'gn': # gaussian noise
            synthetic_data_0 = gaussian_noise(X_train.iloc[balanced_sample_0], noise_level=lambda_)
            synthetic_data_1 = gaussian_noise(X_train.iloc[balanced_sample_1], noise_level=lambda_)
            print(f'Creating {int(len(synthetic_data_0)) + int(len(synthetic_data_1))} syntetic samples using Gaussian noise')

        elif mode == 'ag': # Fake gradient or Approximated Gradient

            sub_substitute_model = RandomForestClassifier(random_state=20)
            sub_substitute_model.fit(X_subsample_real, y_subsample_real)
            synthetic_data_0 = fake_gradient(sub_substitute_model, X_train.iloc[balanced_sample_0], lambda_)
            synthetic_data_1 = fake_gradient(sub_substitute_model, X_train.iloc[balanced_sample_1], lambda_)
            print(f'Creating {int(len(synthetic_data_0)) + int(len(synthetic_data_1))} synthetic samples using fake gradient') 

        else:
            print("Incorrect option.")
            break
        
        # Concatenating real samples with synthetic samples
        X_subsample = np.concatenate((X_subsample_real, synthetic_data_0, synthetic_data_1))
        X_subsample_df = pd.DataFrame(X_subsample, columns=X.columns) # changuing to pd dataframe

        synthetic_y_0 = np.array([0] * len(synthetic_data_0))
        synthetic_y_1 = np.array([0] * len(synthetic_data_1))
        y_subsample = np.concatenate((y_subsample_real, synthetic_y_0, synthetic_y_1))
        y_subsample_df = pd.Series(y_subsample)

        
        print(f'Total of {len(X_subsample_df)} samples')
                                 
        # Training the substitute model. We can add another algorithms to be used as surrogate model
        if surrogate_algo == 'RandomForestClassifier':
            surrogate_model = RandomForestClassifier(random_state=38)
        elif surrogate_algo == 'KNNClassifier':
            surrogate_model = KNeighborsClassifier(n_neighbors = 5)
        elif surrogate_algo == 'QDAClassifier':
            surrogate_model = QuadraticDiscriminantAnalysis()
        elif surrogate_algo == 'ADABoostClassifier':
            surrogate_model = AdaBoostClassifier(random_state=38)
        elif surrogate_algo == 'LogisticRegression':
            surrogate_model = LogisticRegression(random_state=38)
        elif surrogate_algo == 'MLPClassifier':
            surrogate_model = MLPClassifier(hidden_layer_sizes=(154, 77), activation='relu', solver='sgd', learning_rate_init=0.001, max_iter=20, random_state=38, verbose=True)
        else:
            print(f"Option {surrogate_algo} isn't within algorithms options.")
            break
            
        surrogate_model.fit(X_subsample_df, y_subsample_df)
        dump(surrogate_model, "surrogate_model_intermediate_a2.joblib")

        surrogate_classify = classify_api("surrogate_model_intermediate_a2.joblib", X_test, threshold=0.5, type='hard')
        surrogate_model_detection_rate = np.mean(surrogate_classify.to_numpy() == y_test.to_numpy())
        print("=======================================================================================================")
        print(f'Detection rate of target model: {detection_rate:.4f}')
        print(f'Detection rate of surrogate model: {surrogate_model_detection_rate:.4f}')
        print("=======================================================================================================")
        

        temp_file = os.path.join(os.getcwd(), "surrogate_model_intermediate_a2.joblib")
        if os.path.exists(temp_file):
            os.remove(temp_file)

        if abs(surrogate_model_detection_rate - detection_rate) <= detection_variation:
            print("Saving substitute model")
            dump(surrogate_model, substitute_model_path)
            
            print(f'Convergence achieved after {iter_count +1}. Saving model at: {substitute_model_path}')
            break

        # if continuing, the number of samples is enlarged
        num_samples += 2
        if num_samples > len(index_0) or num_samples > len(index_1):
            print(f"Process finished after {iter_count +1}. The convergence wasn't achieved.")
            break
            
    print('Calculating EBFA...')
    # this phase is not black-box because it needs the train dataset of target model
    ave_ebfa = ebfa(target, substitute_model_path, X_train_target, y_train_target, X_subsample_df, y_subsample_df, X_test, y_test, k=25, p=0.0001) 
    #confusion_matrix_target = confusion_matrix(y_test, target_classify)
    #confusion_matrix_surrogate = confusion_matrix(y_test, surrogate_classify)
    return (num_samples * 2, surrogate_model_detection_rate, ave_ebfa)



In [None]:
def substitute_model_our_attack_v2(target, X, y, augment_count=2, string='aa', lambda_=0.5, ebfa_limit=0.6, mode='gn', surrogate_algo='RandomForestClassifier', threshold=0.5):
    '''

    :Param X: Attack data
    :Param y: Attack label-data
    :Param lambda_:
    :Param augment_count:
    :Param detection_variation:
    '''

    # config
    max_iter = 100 # max iterations to avoid an eternal loop
    num_samples = 200 #stating number of each class sample. eg: 5 benign and 5 malicious samples
    file_name = string+'ebfa_limit_'+str(ebfa_limit)+'_surrogate_model_our_attack_'+surrogate_algo+'_'+target+'_'+mode 
    substitute_model_path = os.path.join(os.getcwd(), file_name)
    target_model = load(target)
    if surrogate_algo == 'RandomForestClassifier':
        surrogate_model = RandomForestClassifier(random_state=38)
    elif surrogate_algo == 'DecisionTreeClassifier':
        surrogate_model = DecisionTreeClassifier(criterion='log_loss')
    elif surrogate_algo == 'KNNClassifier':
        surrogate_model = KNeighborsClassifier(n_neighbors = 5)
    elif surrogate_algo == 'QDAClassifier':
        surrogate_model = QuadraticDiscriminantAnalysis()
    elif surrogate_algo == 'ADABoostClassifier':
        surrogate_model = AdaBoostClassifier(random_state=38)
    elif surrogate_algo == 'LogisticRegression':
        surrogate_model = LogisticRegression(random_state=38)
    elif surrogate_algo == 'MLPClassifier':
        surrogate_model = MLPClassifier(hidden_layer_sizes=(154, 154), activation='relu', solver='sgd', alpha=1e-4, learning_rate_init=0.001, max_iter=100, random_state=38, verbose=True)
    else:
        print(f"Option {surrogate_algo} isn't within algorithms options.")
        return -1
            
    
    # reset unpaired index and drop unlabeled rows
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    label_index = (y != -1)
    X_train = X[label_index]
    y_train = y[label_index]

    # creating subsamples (index)
    index_0 = (y <= threshold)
    index_1 = (y > threshold)
    

    # test samples to be used in all of the validations, including to calculate the prediction value of the target model
    test_index_0 = np.random.choice(np.where(index_0)[0], 500, replace=False)
    test_index_1 = np.random.choice(np.where(index_1)[0], 500, replace=False)
    test_sample = np.concatenate((test_index_0, test_index_1))
    np.random.shuffle(test_sample)
    X_test = X_train.iloc[test_sample]
    y_test = y_train.iloc[test_sample]

    # Removing test data from original dataset, avoiding data leak
    X_train = X_train.drop(test_sample, axis=0).reset_index(drop=True)
    y_train = y_train.drop(test_sample, axis=0).reset_index(drop=True)

    # re-calculating the index without the test samples
    index_0 = (y_train <= threshold)   # index_0 - index_0(test)
    index_1 = (y_train > threshold)    # index_1 - index_1(test)

    # Shap config
    X_train_sample, _, y_train_sample, _ = train_test_split(X_train, y_train, train_size=0.01, stratify=y_train)
    _, X_test_sample, _, y_test_sample = train_test_split(X_test, y_test, test_size=0.1, stratify=y_test)
    X_test_sample = pd.DataFrame(X_test_sample, columns=X.columns)
    background_data = X_test_sample.map(lambda x: 0)

    
    # calculating shap-sampling to the target model
    print('Calculating Shap values of the target model')
    explainer_1 = shap.SamplingExplainer(target_model.predict, X_train_sample)
    shap_values_1 = explainer_1.shap_values(X_test_sample) 

    for iter_count in tqdm.tqdm(range(max_iter), desc='Substitute model training'):
        print(f'Iteration {iter_count + 1}: Number of samples = {num_samples * 2}')
        balanced_sample_0 = np.random.choice(np.where(index_0)[0], num_samples, replace=False)
        balanced_sample_1 = np.random.choice(np.where(index_1)[0], num_samples, replace=False)
        balanced_sample = np.concatenate((balanced_sample_0, balanced_sample_1))
        np.random.shuffle(balanced_sample)

        #creating REAL-DATA subsampling
        X_subsample_real = X_train.iloc[balanced_sample]
        y_subsample_real = classify_api(target, X_subsample_real, threshold=0.5, type='hard')
        
        #creating synthetic-data subsampling
        if mode == 'li': #linear interpolation
            synthetic_data_0 = linear_interpolation(X_train.iloc[balanced_sample_0], (int(round(num_samples * augment_count))), lambda_)
            synthetic_data_1 = linear_interpolation(X_train.iloc[balanced_sample_1], (int(round(num_samples * augment_count))), lambda_)
            print(f'Creating {int(len(synthetic_data_0)) + int(len(synthetic_data_1))} synthetic samples using Linear Interpolation')

        elif mode == 'gn': # gaussian noise
            synthetic_data_0 = gaussian_noise(X_train.iloc[balanced_sample_0], noise_level=lambda_)
            synthetic_data_1 = gaussian_noise(X_train.iloc[balanced_sample_1], noise_level=lambda_)
            print(f'Creating {int(len(synthetic_data_0)) + int(len(synthetic_data_1))} syntetic samples using Gaussian noise')

        else:
            print("Incorrect option.")
            break
        
        # Concatenating real samples with synthetic samples
        X_subsample = np.concatenate((X_subsample_real, synthetic_data_0, synthetic_data_1))
        X_subsample_df = pd.DataFrame(X_subsample, columns=X.columns) # changing to pd dataframe

        synthetic_y_0 = np.array([0] * len(synthetic_data_0))
        synthetic_y_1 = np.array([0] * len(synthetic_data_1))
        y_subsample = np.concatenate((y_subsample_real, synthetic_y_0, synthetic_y_1))
        y_subsample_df = pd.Series(y_subsample)

        
        print(f'Total of {len(X_subsample_df)} samples')
        surrogate_model.fit(X_subsample_df, y_subsample_df)
        dump(surrogate_model, "surrogate_model_intermediate_our_attack.joblib")
        print('Calculating EBFA...')
        explainer_2 = shap.SamplingExplainer(surrogate_model.predict, X_train_sample)
        shap_values_2 = explainer_2.shap_values(X_test_sample)
        ebfa_models = shap_values_importance(shap_values_1, shap_values_2, k=15)
        
        print("=======================================================================================================")
        print(f'Queries: {(num_samples*2):.4f}')
        print(f'Explainability-Based Feature Agreement: {ebfa_models:.4f}')
        print("=======================================================================================================")
        
        temp_file = os.path.join(os.getcwd(), "surrogate_model_intermediate_our_attack.joblib")
        if os.path.exists(temp_file):
            os.remove(temp_file)

        if abs(ebfa_models) >= ebfa_limit:
            print("Saving substitute model")
            dump(surrogate_model, substitute_model_path)
            
            print(f'Convergence achieved after {iter_count +1}. Saving model at: {substitute_model_path}')
            return (num_samples * 2, ebfa_models)

        # if continuing, the number of samples is enlarged
        num_samples += 200
        if num_samples > len(index_0) or num_samples > len(index_1):
            print(f"Process finished after {iter_count +1}. The convergence wasn't achieved.")
            return -1

# Threat Model

## Query-only attacks - Baseline 1

Here we execute the attacks comparing the results between the methods.

### Detection Variation: 0.0

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.00
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a1(target,X_data_extraction, y_data_extraction, X_train, y_train, detection_variation=Detection_variation, threshold=0.5, surrogate_algo=surrogate_algo)
    grades.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_0_a1 = dict(zip(target_list, grades))
        
print(f'Array of grades: {grades_detection_variation_0_a1}')

In [None]:
dados = [[key] + list(value) for key, value in grades_detection_variation_0_a1.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df = pd.DataFrame(dados, columns=columns)
df



In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_surrogate_randomforest = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.00
for target in target_list:
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a1(target,X_data_extraction, y_data_extraction, X_train, y_train, detection_variation=Detection_variation, threshold=0.5)
    grades_surrogate_randomforest.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_0_a1_surrogate_randomforest = dict(zip(target_list, grades_surrogate_randomforest))
        
print(f'Array of grades: {grades_detection_variation_0_a1_surrogate_randomforest}')

In [None]:
grades_detection_variation_0_a1_surrogate_randomforest

In [None]:
dados = [[key] + list(value) for key, value in grades_detection_variation_0_a1_surrogate_randomforest.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a1 = pd.DataFrame(dados, columns=columns)
df_randomforest_a1

### Detection Variation: 0.1

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_01_a1 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.01
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a1(target,X_data_extraction, y_data_extraction, X_train, y_train, detection_variation=Detection_variation, threshold=0.5, surrogate_algo=surrogate_algo)
    grades_01_a1.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_01_a1 = dict(zip(target_list, grades_01_a1))
        
print(f'Array of grades: {grades_detection_variation_01_a1}')

In [None]:
dados = [[key] + list(value) for key, value in grades_detection_variation_01_a1.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a1_01 = pd.DataFrame(dados, columns=columns)
df_randomforest_a1_01

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier technique
grades_01_a1_surrogate_randomforest = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.01
for target in target_list:
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a1(target,X_data_extraction, y_data_extraction, X_train, y_train, detection_variation=Detection_variation, threshold=0.5)
    grades_01_a1_surrogate_randomforest.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_01_a1_surrogate_randomforest = dict(zip(target_list, grades_01_a1_surrogate_randomforest))
        
print(f'Array of grades: {grades_detection_variation_01_a1_surrogate_randomforest}')

In [None]:
dados = [[key] + list(value) for key, value in grades_detection_variation_01_a1_surrogate_randomforest.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a1_01 = pd.DataFrame(dados, columns=columns)
df_randomforest_a1_01

### Detection Variation: 0.2

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_02_a1 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.02
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a1(target,X_data_extraction, y_data_extraction, X_train, y_train, detection_variation=Detection_variation, threshold=0.5, surrogate_algo=surrogate_algo)
    grades_02_a1.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_02_a1 = dict(zip(target_list, grades_02_a1))
        
print(f'Array of grades: {grades_detection_variation_02_a1}')

In [None]:
dados = [[key] + list(value) for key, value in grades_detection_variation_02_a1.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a1_02 = pd.DataFrame(dados, columns=columns)
df_randomforest_a1_02

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier technique
grades_02_a1_surrogate_randomforest = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.02
for target in target_list:
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a1(target,X_data_extraction, y_data_extraction, X_train, y_train, detection_variation=Detection_variation, threshold=0.5)
    grades_02_a1_surrogate_randomforest.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_02_a1_surrogate_randomforest = dict(zip(target_list, grades_02_a1_surrogate_randomforest))
        
print(f'Array of grades: {grades_detection_variation_02_a1_surrogate_randomforest}')

In [None]:
dados = [[key] + list(value) for key, value in grades_detection_variation_02_a1_surrogate_randomforest.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a1_03_surrogate_randomforest = pd.DataFrame(dados, columns=columns)
df_randomforest_a1_03_surrogate_randomforest

### Detection Variation: 0.3

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_03_a1 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.03
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a1(target,X_data_extraction, y_data_extraction, X_train, y_train, detection_variation=Detection_variation, threshold=0.5, surrogate_algo=surrogate_algo)
    grades_03_a1.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_03_a1 = dict(zip(target_list, grades_03_a1))
        
print(f'Array of grades: {grades_detection_variation_03_a1}')

In [None]:
dados = [[key] + list(value) for key, value in grades_detection_variation_03_a1.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a1_03_surrogate_randomforest = pd.DataFrame(dados, columns=columns)
df_randomforest_a1_03_surrogate_randomforest

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier technique
grades_03_a1_surrogate_randomforest = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.03
for target in target_list:
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a1(target,X_data_extraction, y_data_extraction, X_train, y_train, detection_variation=Detection_variation, threshold=0.5)
    grades_03_a1_surrogate_randomforest.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_03_a1_surrogate_randomforest = dict(zip(target_list, grades_03_a1_surrogate_randomforest))
        
print(f'Array of grades: {grades_detection_variation_03_a1_surrogate_randomforest}')

In [None]:
dados = [[key] + list(value) for key, value in grades_detection_variation_03_a1_surrogate_randomforest.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a1_03_surrogate_randomforest = pd.DataFrame(dados, columns=columns)
df_randomforest_a1_03_surrogate_randomforest

## Augmented-data attacks - Baseline 2

First type of attack, injecting Gaussian Noise into the data.

### Detection Variation: 0.0 and Gaussian Noise

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_a2_gn_00 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.00
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='gn', surrogate_algo=surrogate_algo)

    grades_a2_gn_00.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_0_a2_gn = dict(zip(target_list, grades_a2_gn_00))
        
print(f'Array of grades: {grades_detection_variation_0_a2_gn}')
dados = [[key] + list(value) for key, value in grades_detection_variation_0_a2_gn.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_00_gn = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_00_gn

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_a2_gn_00_surrogate_randomforestclassifier = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.00
for target in target_list:
    
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='gn')

    grades_a2_gn_00_surrogate_randomforestclassifier.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_0_a2_gn_surrogate_randomforestclassifier = dict(zip(target_list, grades_a2_gn_00_surrogate_randomforestclassifier))
        
print(f'Array of grades: {grades_detection_variation_0_a2_gn_surrogate_randomforestclassifier}')
dados = [[key] + list(value) for key, value in grades_detection_variation_0_a2_gn_surrogate_randomforestclassifier.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_00_gn_surrogate_randomforest = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_00_gn_surrogate_randomforest


### Detection Variation: 0.1 and Gaussian Noise

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_a2_gn_01 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.01
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='gn', surrogate_algo=surrogate_algo)

    grades_a2_gn_01.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_01_a2_gn = dict(zip(target_list, grades_a2_gn_01))
        
print(f'Array of grades: {grades_detection_variation_01_a2_gn}')
dados = [[key] + list(value) for key, value in grades_detection_variation_01_a2_gn.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_01_gn = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_01_gn

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_a2_gn_01_surrogate_randomforestclassifier = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.01
for target in target_list:
    
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='gn')

    grades_a2_gn_01_surrogate_randomforestclassifier.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_01_a2_gn_surrogate_randomforestclassifier = dict(zip(target_list, grades_a2_gn_01_surrogate_randomforestclassifier))
        
print(f'Array of grades: {grades_detection_variation_01_a2_gn_surrogate_randomforestclassifier}')
dados = [[key] + list(value) for key, value in grades_detection_variation_01_a2_gn_surrogate_randomforestclassifier.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_01_gn_surrogate_randomforest = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_01_gn_surrogate_randomforest

### Detection Variation: 0.2 and Gaussian Noise

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_a2_gn_02 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.02
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='gn', surrogate_algo=surrogate_algo)

    grades_a2_gn_02.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_02_a2_gn = dict(zip(target_list, grades_a2_gn_02))
        
print(f'Array of grades: {grades_detection_variation_02_a2_gn}')
dados = [[key] + list(value) for key, value in grades_detection_variation_02_a2_gn.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_02_gn = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_02_gn

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_a2_gn_02_surrogate_randomforestclassifier = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.02
for target in target_list:
    
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='gn')

    grades_a2_gn_02_surrogate_randomforestclassifier.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_02_a2_gn_surrogate_randomforestclassifier = dict(zip(target_list, grades_a2_gn_02_surrogate_randomforestclassifier))
        
print(f'Array of grades: {grades_detection_variation_02_a2_gn_surrogate_randomforestclassifier}')
dados = [[key] + list(value) for key, value in grades_detection_variation_02_a2_gn_surrogate_randomforestclassifier.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_02_gn_surrogate_randomforest = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_02_gn_surrogate_randomforest

### Detection Variation: 0.3 and Gaussian Noise

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_a2_gn_03 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.03
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='gn', surrogate_algo=surrogate_algo)

    grades_a2_gn_03.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_03_a2_gn = dict(zip(target_list, grades_a2_gn_03))
        
print(f'Array of grades: {grades_detection_variation_03_a2_gn}')
dados = [[key] + list(value) for key, value in grades_detection_variation_03_a2_gn.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_03_gn = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_03_gn

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_a2_gn_03_surrogate_randomforestclassifier = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.03
for target in target_list:
    
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='gn')

    grades_a2_gn_03_surrogate_randomforestclassifier.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_03_a2_gn_surrogate_randomforestclassifier = dict(zip(target_list, grades_a2_gn_03_surrogate_randomforestclassifier))
        
print(f'Array of grades: {grades_detection_variation_03_a2_gn_surrogate_randomforestclassifier}')
dados = [[key] + list(value) for key, value in grades_detection_variation_03_a2_gn_surrogate_randomforestclassifier.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_03_gn_surrogate_randomforest = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_03_gn_surrogate_randomforest

### Detection Variation: 0.0 and Approximated Gradient

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_a2_ag_00 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.00
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='ag', surrogate_algo=surrogate_algo)

    grades_a2_ag_00.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_00_a2_ag = dict(zip(target_list, grades_a2_ag_00))
        
print(f'Array of grades: {grades_detection_variation_00_a2_ag}')
dados = [[key] + list(value) for key, value in grades_detection_variation_00_a2_ag.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_00_ag = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_00_ag

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_a2_ag_00_surrogate_randomforestclassifier = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.03
for target in target_list:
    
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='ag')

    grades_a2_ag_00_surrogate_randomforestclassifier.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_00_a2_ag_surrogate_randomforestclassifier = dict(zip(target_list, grades_a2_ag_00_surrogate_randomforestclassifier))
        
print(f'Array of grades: {grades_detection_variation_00_a2_ag_surrogate_randomforestclassifier}')
dados = [[key] + list(value) for key, value in grades_detection_variation_00_a2_ag_surrogate_randomforestclassifier.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_00_ag_surrogate_randomforest = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_00_ag_surrogate_randomforest

### Detection Variation: 0.1 and Approximated Gradient

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_a2_ag_01 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.01
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='ag', surrogate_algo=surrogate_algo)

    grades_a2_ag_01.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_01_a2_ag = dict(zip(target_list, grades_a2_ag_01))
        
print(f'Array of grades: {grades_detection_variation_01_a2_ag}')
dados = [[key] + list(value) for key, value in grades_detection_variation_01_a2_ag.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_01_ag = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_01_ag

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_a2_ag_01_surrogate_randomforestclassifier = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.01
for target in target_list:
    
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='ag')

    grades_a2_ag_01_surrogate_randomforestclassifier.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_01_a2_ag_surrogate_randomforestclassifier = dict(zip(target_list, grades_a2_ag_01_surrogate_randomforestclassifier))
        
print(f'Array of grades: {grades_detection_variation_01_a2_ag_surrogate_randomforestclassifier}')
dados = [[key] + list(value) for key, value in grades_detection_variation_01_a2_ag_surrogate_randomforestclassifier.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_01_ag_surrogate_randomforest = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_01_ag_surrogate_randomforest

### Detection Variation: 0.2 and Approximated Gradient

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_a2_ag_02 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.02
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='ag', surrogate_algo=surrogate_algo)

    grades_a2_ag_02.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_02_a2_ag = dict(zip(target_list, grades_a2_ag_02))
        
print(f'Array of grades: {grades_detection_variation_02_a2_ag}')
dados = [[key] + list(value) for key, value in grades_detection_variation_02_a2_ag.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_02_ag = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_02_ag

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_a2_ag_02_surrogate_randomforestclassifier = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.02
for target in target_list:
    
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='ag')

    grades_a2_ag_02_surrogate_randomforestclassifier.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_02_a2_ag_surrogate_randomforestclassifier = dict(zip(target_list, grades_a2_ag_02_surrogate_randomforestclassifier))
        
print(f'Array of grades: {grades_detection_variation_02_a2_ag_surrogate_randomforestclassifier}')
dados = [[key] + list(value) for key, value in grades_detection_variation_02_a2_ag_surrogate_randomforestclassifier.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_02_ag_surrogate_randomforest = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_02_ag_surrogate_randomforest

### Detection Variation: 0.3 and Approximated Gradient

In [None]:
#Attacking models and using as surrogate model the same technique of main model
grades_a2_ag_03 = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.03
for target in target_list:
    surrogate_algo = target.split('.')[0]
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='ag', surrogate_algo=surrogate_algo)

    grades_a2_ag_03.append(grade)
    print(f'Targe: {target} and surrogate model: {surrogate_algo} and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_03_a2_ag = dict(zip(target_list, grades_a2_ag_03))
        
print(f'Array of grades: {grades_detection_variation_03_a2_ag}')
dados = [[key] + list(value) for key, value in grades_detection_variation_03_a2_ag.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_03_ag = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_03_ag

In [None]:
#Attacking models and using as surrogate model the RandomForestClassifier
grades_a2_ag_03_surrogate_randomforestclassifier = []
target_list = ['ADABoostClassifier.joblib', 'KNNClassifier.joblib', 'LogisticRegression.joblib', 'MLPClassifier.joblib', 'QDAClassifier.joblib', 'RandomForestClassifier.joblib']
Detection_variation = 0.03
for target in target_list:
    
    print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
    print("######################################################################")
    print(f'Starting attack at the target: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    grade = substitute_model_a2(target,X_data_extraction, y_data_extraction, X_train, y_train, lambda_=0.2, augment_count=2, detection_variation=Detection_variation, threshold=0.5, mode='ag')

    grades_a2_ag_03_surrogate_randomforestclassifier.append(grade)
    print(f'Targe: {target} and surrogate model: RandomForestClassifier and Detection Variation: {Detection_variation}\n')
    print("######################################################################")
    print(f'grade {grade}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    grades_detection_variation_03_a2_ag_surrogate_randomforestclassifier = dict(zip(target_list, grades_a2_ag_03_surrogate_randomforestclassifier))
        
print(f'Array of grades: {grades_detection_variation_03_a2_ag_surrogate_randomforestclassifier}')
dados = [[key] + list(value) for key, value in grades_detection_variation_03_a2_ag_surrogate_randomforestclassifier.items()]
columns = ['Model', 'Query', 'Detection Rate', 'EBFA']
df_randomforest_a2_03_ag_surrogate_randomforest = pd.DataFrame(dados, columns=columns)

In [None]:
df_randomforest_a2_03_ag_surrogate_randomforest

In [None]:
print('Feito!!')

In [None]:
def features_importance(model_file, n):
    '''
    Return top n features of a Forest model
    :param model_file: name of the saved model
    :param n: top n features
    '''
    
    a = load(model_file)
    a_importances = a.feature_importances_
    a_features_df = pd.DataFrame({
        'Feature': (X_test.columns), 
        'Importance': a_importances
    })
    a_features_df = a_features_df.sort_values(by='Importance', ascending=False)
    
    return a_features_df.head(n)

In [None]:
features_importance('RandomForestClassifier.joblib',15)

In [None]:
features_importance('surrogate_model_our_attack_RandomForestClassifier.joblib', 15)

## Final Augmented Attack - Our Attack

In [None]:
''' Creating clusters to use as background data in our attack ''' 
from sklearn.cluster import KMeans

def process_and_scale_data(X, y, n_clusters, n_init=10):
    ''' Generate normalized centroids'''

    df = X.copy()
    df['label'] = y
    
    # Dividindo o DataFrame com base nas labels
    df_0 = df[df['label'] == 0].drop('label', axis=1)
    df_1 = df[df['label'] == 1].drop('label', axis=1)

    # Treinando modelos KMeans
    kmeans_0 = KMeans(n_clusters=n_clusters, n_init=n_init, random_state=25).fit(df_0)
    kmeans_1 = KMeans(n_clusters=n_clusters, n_init=n_init, random_state=25).fit(df_1)

    # Extraindo centróides
    centroids_0 = kmeans_0.cluster_centers_
    centroids_1 = kmeans_1.cluster_centers_

    # Criando DataFrames para os centróides
    centroids_df_0 = pd.DataFrame(centroids_0, columns=df_0.columns)
    centroids_df_1 = pd.DataFrame(centroids_1, columns=df_1.columns)

    # Normalizando os dados
    scaler = MinMaxScaler()
    scaled_df_0 = pd.DataFrame(scaler.fit_transform(centroids_df_0), columns=centroids_df_0.columns)
    scaled_df_1 = pd.DataFrame(scaler.fit_transform(centroids_df_1), columns=centroids_df_1.columns)

    return scaled_df_0, scaled_df_1


In [None]:
# This functions is used with our attack
def shap_values_importance(shap_values_1, shap_values_2, k=15): 
    # Extract SHAP values for the instance
    importance_1 = ( np.sum(np.abs(shap_values_1), axis=1) / shap_values_1.shape[0] )
    importance_2 = ( np.sum(np.abs(shap_values_2), axis=1) / shap_values_2.shape[0] )

    # Determine top k features and convert indices to tuples
    top_feats_1 = set(tuple(np.argsort(-importance_1))[:k])
    top_feats_2 = set(tuple(np.argsort(-importance_2))[:k])

    # Calculate intersection and EBFA score
    intersection = top_feats_1.intersection(top_feats_2)
    ebfa_score = len(intersection) / k

    return ebfa_score
    # we can return the intersection feature list, return (ebfa_score, intersection). 

In [None]:
Ascaler = MinMaxScaler()
X_scaled = Ascaler.fit_transform(X_data_extraction)
X_scaled = pd.DataFrame(X_scaled, columns=list(X_data_extraction.columns))

In [None]:
from sklearn.cluster import KMeans

def process_and_scale_data(X, y, n_clusters, n_init=10):
    ''' Generate normalized centroids'''

    df = X.copy()
    df['label'] = y
    
    # Dividindo o DataFrame com base nas labels
    df_0 = df[df['label'] == 0].drop('label', axis=1)
    df_1 = df[df['label'] == 1].drop('label', axis=1)

    # Treinando modelos KMeans
    kmeans_0 = KMeans(n_clusters=n_clusters, n_init=n_init, random_state=25).fit(df_0)
    kmeans_1 = KMeans(n_clusters=n_clusters, n_init=n_init, random_state=25).fit(df_1)

    # Extraindo centróides
    centroids_0 = kmeans_0.cluster_centers_
    centroids_1 = kmeans_1.cluster_centers_

    # Criando DataFrames para os centróides
    centroids_df_0 = pd.DataFrame(centroids_0, columns=df_0.columns)
    centroids_df_1 = pd.DataFrame(centroids_1, columns=df_1.columns)

    # Normalizando os dados
    scaler = MinMaxScaler()
    scaled_df_0 = pd.DataFrame(scaler.fit_transform(centroids_df_0), columns=centroids_df_0.columns)
    scaled_df_1 = pd.DataFrame(scaler.fit_transform(centroids_df_1), columns=centroids_df_1.columns)

    return scaled_df_0, scaled_df_1


In [None]:
def substitute_model_our_attack(target, X, y, augment_count=2, string='aa', lambda_=0.7, ebfa_limit=0.6, mode='li', surrogate_algo='RandomForestClassifier', threshold=0.5):
    '''

    :Param X: Attack data
    :Param y: Attack label-data
    :Param lambda_:
    :Param augment_count:
    :Param detection_variation:
    '''

    # config
    max_iter = 20 # max iterations to avoid an eternal loop
    num_samples = 200 #stating number of each class sample. eg: 5 benign and 5 malicious samples
    file_name = string+'ebfa_limit_'+str(ebfa_limit)+'_surrogate_model_our_attack_'+surrogate_algo+'_'+target+'_'+mode 
    substitute_model_path = os.path.join(os.getcwd(), file_name)
    target_model = load(target)
    if surrogate_algo == 'RandomForestClassifier':
        surrogate_model = RandomForestClassifier(random_state=38)
    elif surrogate_algo == 'DecisionTreeClassifier':
        surrogate_model = DecisionTreeClassifier(criterion='log_loss')
    elif surrogate_algo == 'KNNClassifier':
        surrogate_model = KNeighborsClassifier(n_neighbors = 5)
    elif surrogate_algo == 'QDAClassifier':
        surrogate_model = QuadraticDiscriminantAnalysis()
    elif surrogate_algo == 'ADABoostClassifier':
        surrogate_model = AdaBoostClassifier(random_state=38)
    elif surrogate_algo == 'LogisticRegression':
        surrogate_model = LogisticRegression(random_state=38)
    elif surrogate_algo == 'MLPClassifier':
        surrogate_model = MLPClassifier(hidden_layer_sizes=(154, 154), activation='relu', solver='sgd', alpha=1e-4, learning_rate_init=0.001, max_iter=100, random_state=38, verbose=True)
    else:
        print(f"Option {surrogate_algo} isn't within algorithms options.")
        return -1
            
    
    # reset unpaired index and drop unlabeled rows
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    label_index = (y != -1)
    X_train = X[label_index]
    y_train = y[label_index]

    # creating subsamples (index)
    index_0 = (y <= threshold)
    index_1 = (y > threshold)
    

    # test samples to be used in all of the validations, including to calculate the prediction value of the target model
    test_index_0 = np.random.choice(np.where(index_0)[0], 500, replace=False)
    test_index_1 = np.random.choice(np.where(index_1)[0], 500, replace=False)
    test_sample = np.concatenate((test_index_0, test_index_1))
    np.random.shuffle(test_sample)
    X_test = X_train.iloc[test_sample]
    y_test = y_train.iloc[test_sample]

    # Removing test data from original dataset, avoiding data leak
    X_train = X_train.drop(test_sample, axis=0).reset_index(drop=True)
    y_train = y_train.drop(test_sample, axis=0).reset_index(drop=True)

    # re-calculating the index without the test samples
    index_0 = (y_train <= threshold)   # index_0 - index_0(test)
    index_1 = (y_train > threshold)    # index_1 - index_1(test)

    # Shap config
    X_train_sample, _, y_train_sample, _ = train_test_split(X_train, y_train, train_size=0.01, stratify=y_train)
    _, X_test_sample, _, y_test_sample = train_test_split(X_test, y_test, test_size=0.1, stratify=y_test)
    X_test_sample = pd.DataFrame(X_test_sample, columns=X.columns)
    background_data = X_test_sample.map(lambda x: 0)

    
    # calculating shap-sampling to the target model
    print('Calculating Shap values of the target model')
    explainer_1 = shap.SamplingExplainer(target_model.predict, pd.concat([process_and_scale_data(X_train, y_train, n_clusters=10, n_init=10)], columns=X_train.columns))
    shap_values_1 = explainer_1.shap_values(X_test_sample) 

    for iter_count in tqdm.tqdm(range(max_iter), desc='Substitute model training'):
        print(f'Iteration {iter_count + 1}: Number of samples = {num_samples * 2}')
        balanced_sample_0 = np.random.choice(np.where(index_0)[0], num_samples, replace=False)
        balanced_sample_1 = np.random.choice(np.where(index_1)[0], num_samples, replace=False)
        balanced_sample = np.concatenate((balanced_sample_0, balanced_sample_1))
        np.random.shuffle(balanced_sample)

        #creating REAL-DATA subsampling
        X_subsample_real = X_train.iloc[balanced_sample]
        y_subsample_real = classify_api(target, X_subsample_real, threshold=0.5, type='hard')
        
        #creating synthetic-data subsampling
        if mode == 'li': #linear interpolation
            synthetic_data_0 = linear_interpolation(X_train.iloc[balanced_sample_0], (int(round(num_samples * augment_count))), lambda_)
            synthetic_data_1 = linear_interpolation(X_train.iloc[balanced_sample_1], (int(round(num_samples * augment_count))), lambda_)
            print(f'Creating {int(len(synthetic_data_0)) + int(len(synthetic_data_1))} synthetic samples using Linear Interpolation')

        elif mode == 'gn': # gaussian noise
            synthetic_data_0 = gaussian_noise(X_train.iloc[balanced_sample_0], noise_level=lambda_)
            synthetic_data_1 = gaussian_noise(X_train.iloc[balanced_sample_1], noise_level=lambda_)
            print(f'Creating {int(len(synthetic_data_0)) + int(len(synthetic_data_1))} syntetic samples using Gaussian noise')

        else:
            print("Incorrect option.")
            break
        
        # Concatenating real samples with synthetic samples
        X_subsample = np.concatenate((X_subsample_real, synthetic_data_0, synthetic_data_1))
        X_subsample_df = pd.DataFrame(X_subsample, columns=X.columns) # changing to pd dataframe

        synthetic_y_0 = np.array([0] * len(synthetic_data_0))
        synthetic_y_1 = np.array([0] * len(synthetic_data_1))
        y_subsample = np.concatenate((y_subsample_real, synthetic_y_0, synthetic_y_1))
        y_subsample_df = pd.Series(y_subsample)

        
        print(f'Total of {len(X_subsample_df)} samples')
        surrogate_model.fit(X_subsample_df, y_subsample_df)
        dump(surrogate_model, "surrogate_model_intermediate_our_attack.joblib")
        print('Calculating EBFA...')
        explainer_2 = shap.SamplingExplainer(target_model.predict,  pd.concat([process_and_scale_data(X_train_sample, y_train_sample, n_clusters=10, n_init=10)], columns=X_train.columns))
        shap_values_2 = explainer_2.shap_values(X_test_sample)
        ebfa_models = shap_values_importance(shap_values_1, shap_values_2, k=15)
        
        print("=======================================================================================================")
        print(f'Queries: {(num_samples*2):.4f}')
        print(f'Explainability-Based Feature Agreement: {ebfa_models:.4f}')
        print("=======================================================================================================")
        
        temp_file = os.path.join(os.getcwd(), "surrogate_model_intermediate_our_attack.joblib")
        if os.path.exists(temp_file):
            os.remove(temp_file)

        if abs(ebfa_models) >= ebfa_limit:
            print("Saving substitute model")
            dump(surrogate_model, substitute_model_path)
            
            print(f'Convergence achieved after {iter_count +1}. Saving model at: {substitute_model_path}')
            return (num_samples * 2, ebfa_models)
        # if continuing, the number of samples is enlarged
        num_samples += 200
        if num_samples > len(index_0) or num_samples > len(index_1):
            print(f"Process finished after {iter_count +1}. The convergence wasn't achieved.")
            return -1

In [None]:
substitute_model_our_attack("RandomForestClassifier.joblib", X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.6, mode='li', surrogate_algo='DecisionTreeClassifier')


In [None]:
features_importance("RandomForestClassifier.joblib", 15)

In [None]:
features_importance("aaebfa_limit_0.6_surrogate_model_our_attack_DecisionTreeClassifier_RandomForestClassifier.joblib_li", 15)

In [None]:
substitute_model_our_attack("RandomForestClassifier.joblib", X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.8, mode='li', surrogate_algo='DecisionTreeClassifier')

## Our Attack against all attacks

In [None]:
substitute_model_our_attack("ADABoostClassifier.joblib", X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.8, mode='li', surrogate_algo='DecisionTreeClassifier')


In [None]:
substitute_model_our_attack('LogisticRegression.joblib', X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.8, mode='li', surrogate_algo='DecisionTreeClassifier')

In [None]:
substitute_model_our_attack('MLPClassifier.joblib', X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.8, mode='li', surrogate_algo='DecisionTreeClassifier')


In [None]:
substitute_model_our_attack('QDAClassifier.joblib', X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.8, mode='li', surrogate_algo='DecisionTreeClassifier')


In [None]:
substitute_model_our_attack('RandomForestClassifier.joblib', X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.6, mode='li', surrogate_algo='DecisionTreeClassifier')

In [None]:
substitute_model_our_attack('RandomForestClassifier.joblib', X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.6, mode='li', surrogate_algo='DecisionTreeClassifier')

In [None]:
substitute_model_our_attack('RandomForestClassifier.joblib', X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.6, mode='li', surrogate_algo='RandomForestClassifier')

In [None]:
features_importance('RandomForestClassifier.joblib', 15)

In [None]:
features_importance('aaebfa_limit_0.6_surrogate_model_our_attack_RandomForestClassifier_RandomForestClassifier.joblib_li', 15)

In [None]:
substitute_model_our_attack('KNNClassifier.joblib', X_scaled, y_data_extraction, lambda_=0.6, ebfa_limit=0.8, mode='li', surrogate_algo='DecisionTreeClassifier')