In [1]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

#from sklearn.metrics import roc_curve, roc_auc_score, classification_report, accuracy_score
from sklearn.metrics import accuracy_score, roc_curve
import numpy as np

import sys
import time
repo_root = "/data/quo.vadis/"
sys.path.append(repo_root)
from models import CompositeClassifier

def fit(models, x_trains, y_train, save=False):
    for model in models:
        #print(f"training late fusion model for {model}...")
        now = time.time()
        models[model].fit(x_trains[model], y_train)
        #print(f"training done for {model}... took: {time.time()-now:.2f}s")
        if save:
            os.makedirs("late_fusion_model_fit", exist_ok=True)
            models[model].save_late_fusion_model(filename="late_fusion_model_fit/"+model)
    return models

def get_adversarial_samples(folder):
    fullpaths = [repo_root+folder+x for x in os.listdir(repo_root+folder)]
    adversarial_samples = [x for x in fullpaths if not os.path.islink(x)]
    adversarial_samples.sort()
    return adversarial_samples

def get_metrics_adv_nonadv(model, x_test, y_test, x_adv, y_adv):
    probs = model.predict_proba(x_test)[:,1]
    probs_adv = model.predict_proba(x_adv)[:,1]
    
    preds = np.where(probs > 0.5, 1, 0)
    preds_adv = np.where(probs_adv > 0.5, 1, 0)

    print("Non-Adversarial Set accuracy:", end=" ")
    print(accuracy_score(y_test, preds))
    #print(classification_report(y_test, preds, zero_division=0))
    
    print("Adversarial Set accuracy:", end="     ")
    print(accuracy_score(y_adv, preds_adv))
    #print(classification_report(y_adv, preds_adv, zero_division=0))
    return probs, probs_adv

  from pandas import MultiIndex, Int64Index


In [2]:
EMBER_THRESHOLD = 0.8336

def adv_performance_report(ADV_SAMPLE_HASHES, ADV_REPORTS, ARRAY_FOLDER):
    print("Successfull adversarial emulation reports: ", len(ADV_REPORTS))
    adversarial_errors = len(ADV_SAMPLE_HASHES) - len(ADV_REPORTS)
    print("Errored adversarial emulation reports: ", adversarial_errors)
    print("Total adversarial samples: ", len(ADV_SAMPLE_HASHES))
    print(f"Emulation success rate: {len(ADV_REPORTS)/len(ADV_SAMPLE_HASHES)*100:.2f}%")
    print("="*16)

    # python3 early_fusion_pass.py
    x_adv_ember = np.load(ARRAY_FOLDER+"X-gamma-vs-ember-early-fusion-pass-only-adv.arr") 
    x_orig_ember = np.load(ARRAY_FOLDER+"X-gamma-vs-ember-early-fusion-pass-orig-only-adv.arr")

    y_adv = np.ones(len(x_adv_ember))

    # python3 run_ember_pass.py
    y_ember_orig = np.load(ARRAY_FOLDER+"y-gamma-vs-ember-scores-orig-only-adv.arr")
    y_ember_adv = np.load(ARRAY_FOLDER+"y-gamma-vs-ember-scores-only-adv.arr")

    y_ember_orig_int = (y_ember_orig > EMBER_THRESHOLD).astype(int)
    y_ember_adv_int = (y_ember_adv > EMBER_THRESHOLD).astype(int)

    x_train = np.load(repo_root+"evaluation/composite/X-1647041985-early-fusion-vectors-train.arr")
    y_train = np.load(repo_root+"evaluation/composite/y-1647041985-train.arr")

    x_test = np.load(repo_root+"evaluation/composite/X-1647097165-early-fusion-vectors-val.arr")
    y_test = np.load(repo_root+"evaluation/composite/y-1647097165-val.arr")

    # need to modify report path to original dataset
    modulelist = [["malconv"], ["filepaths"], ["emulation"], ["ember"],
                ["ember", "emulation"],
                ["ember", "filepaths", "emulation"],
                ["malconv", "ember", "filepaths", "emulation"]]
    models = {}
    x_trains = {}
    x_tests = {}

    x_ember_orig = {}
    x_ember_adv = {}

    for modules in modulelist:
        name = "_".join(modules)
        if len(modules) == 4:
            name = "all"

        models[name] = CompositeClassifier(modules=modules, root=repo_root, late_fusion_model="LogisticRegression")
        x_trains[name] = models[name].get_modular_x(modules, x_train)
        x_tests[name] = models[name].get_modular_x(modules, x_test)
        
        x_ember_orig[name] = models[name].get_modular_x(modules, x_orig_ember)
        x_ember_adv[name] = models[name].get_modular_x(modules, x_adv_ember)
        
        if "ember" in modules:
            ember_index = modules.index("ember")
            # replace ember column with y pass
            x_ember_orig[name][:,ember_index] = y_ember_orig
            x_ember_adv[name][:,ember_index] = y_ember_adv

    # Remember: this .fit() really trains only late fusion model
    models = fit(models, x_trains, y_train, save=False)

    print("====== ember (secml) ======")
    print("Non-Adversarial Set accuracy:", end=" ")
    print(accuracy_score(y_adv, y_ember_orig_int))
    print("Adversarial Set accuracy:", end="     ")
    print(accuracy_score(y_adv, y_ember_adv_int))

    probbs, probbs_adv = {}, {}
    for model in models:
        x_orig_t = x_ember_orig[model]
        x_adv_t = x_ember_adv[model]
        print("\n", "="*6, model, "="*6)
        probbs[model], probbs_adv[model] = get_metrics_adv_nonadv(models[model], x_orig_t, y_adv, x_adv_t, y_adv)
        # get_metrics_adv_nonadv(models[model], x_test_t, y_test, x_adv_t, y_adv) if you want against full set

    print(f" {((y_ember_orig_int).astype(int) == 0).sum()}: ember (secml) classifies as benign in orig malware set")
    print(f" {((y_ember_adv_int).astype(int) == 0).sum()}: ember (secml) classifies as benign in adversarial malware set")
    evasive = ((y_ember_adv_int).astype(int) == 0).sum() - ((y_ember_orig_int).astype(int) == 0).sum()
    evasive_ratio = evasive*100/len(y_ember_adv_int)
    print(f" {evasive}, {evasive_ratio:.2f}%: evasive samples and ratio against ember")
    print("="*16)

    for model in models:
        orig_benign = ((probbs[model] > 0.5).astype(int) == 0).sum()
        adv_benign = ((probbs_adv[model] > 0.5).astype(int) == 0).sum()
        evasive = adv_benign - orig_benign
        evasive_ratio = evasive*100/len(probbs[model])
        print(f" {orig_benign}: {model} classifies as benign in orig malware set")
        print(f" {adv_benign}: {model} classifies as benign in adversarial malware set")
        print(f" {evasive}, {evasive_ratio:.2f}%: evasive samples and ratio against {model} ")
        print()

# Evaluation of different attacks -- per number of sections

## 15 sections

In [5]:
ADVERSARIAL_EMULATED_SET_FOLDER = "data/adversarial.emulated/partial_reports_ember_15sections_10population"
ADVERSARIAL_RAW_SET_FOLDER = "data/adversarial.samples/samples_adversarial_testset_gamma_ember_15sections_10population/"
ADV_SAMPLES = get_adversarial_samples(ADVERSARIAL_RAW_SET_FOLDER)
ADV_SAMPLE_HASHES = [x.split("/")[-1] for x in ADV_SAMPLES]
ARRAY_FOLDER  = repo_root+"evaluation/adversarial/composite_adversarial_evaluation/arrays_ember_15sections_10population/"
adversarial_emulated_files = os.listdir(repo_root + ADVERSARIAL_EMULATED_SET_FOLDER)
adversarial_reports = [x.replace(".json","") for x in adversarial_emulated_files if x.endswith(".json")]
ADV_REPORTS = [x for x in adversarial_reports if x in ADV_SAMPLE_HASHES]
adv_performance_report(ADV_SAMPLE_HASHES, ADV_REPORTS, ARRAY_FOLDER)

Successfull adversarial emulation reports:  5399
Errored adversarial emulation reports:  3497
Total adversarial samples:  8896
Emulation success rate: 60.69%
Non-Adversarial Set accuracy: 0.9814780514910169
Adversarial Set accuracy:     0.7193924800889053

Non-Adversarial Set accuracy: 0.9855528801629931
Adversarial Set accuracy:     0.9803667345804778

Non-Adversarial Set accuracy: 0.9781441007593998
Adversarial Set accuracy:     0.9781441007593998

Non-Adversarial Set accuracy: 0.9955547323578441
Adversarial Set accuracy:     0.9764771253935914

Non-Adversarial Set accuracy: 1.0
Adversarial Set accuracy:     0.8705315799222079

Non-Adversarial Set accuracy: 0.9990739025745509
Adversarial Set accuracy:     0.9562882015187998

Non-Adversarial Set accuracy: 0.9887016114095203
Adversarial Set accuracy:     0.9851824411928135

Non-Adversarial Set accuracy: 0.9887016114095203
Adversarial Set accuracy:     0.9851824411928135
 100: ember (secml) classifies as benign in orig malware set
 1515

## 10 sections

In [4]:
ADVERSARIAL_EMULATED_SET_FOLDER = "data/adversarial.emulated/reports_ember_10sections_10population/"
ADVERSARIAL_RAW_SET_FOLDER = "data/adversarial.samples/samples_adversarial_testset_gamma_ember_sections/10/"
ARRAY_FOLDER  = repo_root + "evaluation/adversarial/composite_adversarial_evaluation/arrays_ember_10sections_10population/"
ADV_SAMPLES = get_adversarial_samples(ADVERSARIAL_RAW_SET_FOLDER)
ADV_SAMPLE_HASHES = [x.split("/")[-1] for x in ADV_SAMPLES]
adversarial_emulated_files = os.listdir(repo_root + ADVERSARIAL_EMULATED_SET_FOLDER)
adversarial_reports = [x.replace(".json","") for x in adversarial_emulated_files if x.endswith(".json")]
ADV_REPORTS = [x for x in adversarial_reports if x in ADV_SAMPLE_HASHES]
adv_performance_report(ADV_SAMPLE_HASHES, ADV_REPORTS, ARRAY_FOLDER)

Successfull adversarial emulation reports:  5438
Errored adversarial emulation reports:  3557
Total adversarial samples:  8995
Emulation success rate: 60.46%
Non-Adversarial Set accuracy: 0.981059212945936
Adversarial Set accuracy:     0.7774917248988599

Non-Adversarial Set accuracy: 0.9852887090842222
Adversarial Set accuracy:     0.9803236484001471

Non-Adversarial Set accuracy: 0.9766458256712026
Adversarial Set accuracy:     0.9766458256712026

Non-Adversarial Set accuracy: 0.964509010665686
Adversarial Set accuracy:     0.7892607576314822

Non-Adversarial Set accuracy: 1.0
Adversarial Set accuracy:     0.8832291283560132

Non-Adversarial Set accuracy: 1.0
Adversarial Set accuracy:     0.9554983449797719

Non-Adversarial Set accuracy: 0.9874954027215889
Adversarial Set accuracy:     0.9832659065833027

Non-Adversarial Set accuracy: 0.9874954027215889
Adversarial Set accuracy:     0.9832659065833027
 103: ember (secml) classifies as benign in orig malware set
 1210: ember (secml) c

### 5 sections

In [6]:
ADVERSARIAL_EMULATED_SET_FOLDER = "data/adversarial.emulated/reports_ember_5sections_10population/"
ADVERSARIAL_RAW_SET_FOLDER = "data/adversarial.samples/samples_adversarial_testset_gamma_ember_sections/5/"
ARRAY_FOLDER  = repo_root + "evaluation/adversarial/composite_adversarial_evaluation/arrays_ember_5sections_10population/"
ADV_SAMPLES = get_adversarial_samples(ADVERSARIAL_RAW_SET_FOLDER)
ADV_SAMPLE_HASHES = [x.split("/")[-1] for x in ADV_SAMPLES]
adversarial_emulated_files = os.listdir(repo_root + ADVERSARIAL_EMULATED_SET_FOLDER)
adversarial_reports = [x.replace(".json","") for x in adversarial_emulated_files if x.endswith(".json")]
ADV_REPORTS = [x for x in adversarial_reports if x in ADV_SAMPLE_HASHES]
adv_performance_report(ADV_SAMPLE_HASHES, ADV_REPORTS, ARRAY_FOLDER)


Successfull adversarial emulation reports:  5464
Errored adversarial emulation reports:  3556
Total adversarial samples:  9020
Emulation success rate: 60.58%
Non-Adversarial Set accuracy: 0.9806002928257687
Adversarial Set accuracy:     0.7822108345534406

Non-Adversarial Set accuracy: 0.9849926793557833
Adversarial Set accuracy:     0.9809663250366032

Non-Adversarial Set accuracy: 0.9771229868228404
Adversarial Set accuracy:     0.9771229868228404

Non-Adversarial Set accuracy: 0.9659590043923866
Adversarial Set accuracy:     0.7900805270863837

Non-Adversarial Set accuracy: 1.0
Adversarial Set accuracy:     0.8978770131771596

Non-Adversarial Set accuracy: 1.0
Adversarial Set accuracy:     0.9560761346998536

Non-Adversarial Set accuracy: 0.988103953147877
Adversarial Set accuracy:     0.9842606149341142

Non-Adversarial Set accuracy: 0.988103953147877
Adversarial Set accuracy:     0.9842606149341142
 106: ember (secml) classifies as benign in orig malware set
 1190: ember (secml) c