# Adversarial Attacks

The following code is loading the Tesseract dataset towards doing an adversarial attack.

In [None]:
import os
import json
import datetime
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.feature_extraction import DictVectorizer
from tesseract import evaluation, temporal 
from sklearn.model_selection import train_test_split

os.environ["PATH"] += os.pathsep + '/Library/TeX/texbin'

## Loading features
def load_dataset(dataset_path):
    """
    The function to load features in the Tesseract dataset. Please note that you have to parametrize the names of the files opened, to load the right file. 
    """
    print(f'Loading dataset from {dataset_path}')

    with open('{}-X-updated-reduced-10k.json'.format(dataset_path), 'r') as f:
        X = json.load(f)

    print('Loading labels...')
    with open('{}-y-updated.json'.format(dataset_path), 'rt') as f:
        y = json.load(f)

    print('Loading timestamps...')
    with open('{}-meta-updated.json'.format(dataset_path), 'rt') as f:
        T = json.load(f)
    T = [o['dex_date'] for o in T]
    T = np.array([datetime.datetime.strptime(o, '%Y-%m-%dT%H:%M:%S') if "T" in o
             else datetime.datetime.strptime(o, '%Y-%m-%d %H:%M:%S') for o in T])

    # Convert to numpy array and get feature names
    vec =  DictVectorizer()
    X = vec.fit_transform(X).astype("float32")
    y = np.asarray(y)
    feature_names = vec.get_feature_names_out()

    # Get time index of each sample for easy reference
    time_index = {}
    for i in range(len(T)):
        t = T[i]
        if t.year not in time_index:
            time_index[t.year] = {}
        if t.month not in time_index[t.year]:
            time_index[t.year][t.month] = []
        time_index[t.year][t.month].append(i)

    return X, y, time_index, feature_names, T

X, y, time_index, feature_names, T = load_dataset('/Users/fabio/work/mlsec-malware-class/labs/extended-features/extended-features')

# Following DREBIN feature-space
clf = LinearSVC(C=1)

# Random split of train-test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

# Training the classifier
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

## Exercise 1: Evasion of a single point (Linear Classifier)

Generate an adversarial attack for a single point. Get the most important benign feature of the svm (hint: look at `coef_[0]`), and swap it to a 1.0 (the feature space is binary).

Notes: you should attack only true positives!

## Exercise 2: Security Evaluation Curves (Linear Classifier)

Create a function that modifies the top-1, top-2, ... top-N features to all samples, and draws a security evaluation curve as follows: the number of modified features on the X-axis, and the attack success rate on the Y-axis.

Note: you should attack only true positives!