In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt    
import scipy.stats as stats

def read_file(file):
    with open(file, 'r') as f:
        data = json.load(f)
    print("There are {} results in the file: {}".format(len(data['benign']['fb_past']), file))
    return data

baselines = ['benign', 'malicious_uniform']

In [2]:
### Note: we are aggregating ALL the adversarial perturbations here (primary and secondary), and we are also grouping all algorithms (HGB and RF)
### Individual comparisons can also be made by selecting a reduced file list

In [3]:


SCS_malware_list = ['neris', 'rbot', 'virut']
SCS_input_files = ["../results/hgb_secondary_scs.txt", "../results/rf_secondary_scs.txt", "../results/hgb_primary_scs.txt", "../results/rf_primary_scs.txt"]


SCS_data = read_file(SCS_input_files[0])
for SCS_input_file in SCS_input_files[1:]:
    SCS_data_temp = read_file(SCS_input_file)
    for b in baselines:
        for key in SCS_data[b].keys():
            SCS_data[b][key] = SCS_data[b][key] + SCS_data_temp[b][key]
        
    for m in SCS_malware_list:
        for key in SCS_data[m]['recall'].keys():
            SCS_data[m]['recall'][key] = SCS_data[m]['recall'][key] + SCS_data_temp[m]['recall'][key]
            
LCS_malware_list = ['artemis', 'dridex', 'trickbot', 'trickster', 'wannacry']
LCS_input_files = ["../results/hgb_secondary_lcs.txt", "../results/rf_secondary_lcs.txt", "../results/hgb_primary_lcs.txt", "../results/rf_primary_lcs.txt"]

LCS_data = read_file(LCS_input_files[0])
for LCS_input_file in LCS_input_files[1:]:
    LCS_data_temp = read_file(LCS_input_file)
    for b in baselines:
        for key in LCS_data[b].keys():
            LCS_data[b][key] = LCS_data[b][key] + LCS_data_temp[b][key]
        
    for m in LCS_malware_list:
        for key in LCS_data[m]['recall'].keys():
            LCS_data[m]['recall'][key] = LCS_data[m]['recall'][key] + LCS_data_temp[m]['recall'][key]
            
SCS_benign_results = SCS_data['benign']
SCS_uniform_malicious_results = SCS_data['malicious_uniform']
for malware in SCS_malware_list:
    exec(f"SCS_{malware}_results_rec = SCS_data['{malware}']['recall']")
    exec(f"SCS_{malware}_results_miss = SCS_data['{malware}']['miss']")
    
    
LCS_benign_results = LCS_data['benign']
LCS_uniform_malicious_results = LCS_data['malicious_uniform']
for malware in LCS_malware_list:
    exec(f"LCS_{malware}_results_rec = LCS_data['{malware}']['recall']")
    exec(f"LCS_{malware}_results_miss = LCS_data['{malware}']['miss']")

There are 50 results in the file: ../results/hgb_secondary_scs.txt
There are 50 results in the file: ../results/rf_secondary_scs.txt
There are 50 results in the file: ../results/hgb_primary_scs.txt
There are 50 results in the file: ../results/rf_primary_scs.txt
There are 50 results in the file: ../results/hgb_secondary_lcs.txt
There are 50 results in the file: ../results/rf_secondary_lcs.txt
There are 50 results in the file: ../results/hgb_primary_lcs.txt
There are 50 results in the file: ../results/rf_primary_lcs.txt


In [4]:
def statistical_test(p1, p2, name=None, alpha=0.05, prediction=True):
    stat, pvalue = stats.ttest_ind(p1, p2)
  
    if stat > 0:
        pred = "[p1 is likely greater than p2]"
    else:
        pred = "[p1 is likely lower than p2]"
        
    if pvalue < alpha:
        # H0 is rejected, i.e., p1 is NOT EQUAL to p2
        res = "not equal (Yes)"
    else:
        res = "equal (No)"
        pred = ""
    
    if name is not None:
        if prediction is False:
            print("candidates: {}\tstatistic={:.5f},\tp-value={} --> conclusion: {}".format(name,stat,pvalue,res))
        else:
            print("candidates: {}\tstatistic={:.5f},\tp-value={} --> conclusion: {} {}".format(name,stat,pvalue,res,pred))
    return stat, pvalue, res

        

In [5]:
stats.ttest_ind(SCS_benign_results['fb_past'], SCS_benign_results['fb_future'])

Ttest_indResult(statistic=27.98518938957879, pvalue=4.75231895147127e-96)

In [6]:
print("\nIs there a drop in past vs future? [BENIGN concept drift check] (SCS)\n")

statistical_test(SCS_benign_results['fb_past'], SCS_benign_results['fb_future'], 'Full Binary')
statistical_test(SCS_benign_results['ens_past'], SCS_benign_results['ens_future'], 'Ensemble')
for SCS_malware in SCS_malware_list:
    exec(f"statistical_test(SCS_benign_results['{SCS_malware}_past'], SCS_benign_results['{SCS_malware}_future'], '{SCS_malware}')")

## Is there a drop in past vs future (defense)? [concept drift check] 

statistical_test(SCS_benign_results['d_fb_past'], SCS_benign_results['d_fb_future'], 'Full Binary (defense)')
statistical_test(SCS_benign_results['d_ens_past'], SCS_benign_results['d_ens_future'], 'Ensemble (defense)')
for SCS_malware in SCS_malware_list:
    exec(f"statistical_test(SCS_benign_results['d_{SCS_malware}_past'], SCS_benign_results['d_{SCS_malware}_future'], '{SCS_malware} (defense)')")
    

print("\nIs there a drop in past vs future? [BENIGN concept drift check] (LCS)\n")

statistical_test(LCS_benign_results['fb_past'], LCS_benign_results['fb_future'], 'Full Binary')
statistical_test(LCS_benign_results['ens_past'], LCS_benign_results['ens_future'], 'Ensemble')
for LCS_malware in LCS_malware_list:
    exec(f"statistical_test(LCS_benign_results['{LCS_malware}_past'], LCS_benign_results['{LCS_malware}_future'], '{LCS_malware}')")

## Is there a drop in past vs future (defense)? [concept drift check] 

statistical_test(LCS_benign_results['d_fb_past'], LCS_benign_results['d_fb_future'], 'Full Binary (defense)')
statistical_test(LCS_benign_results['d_ens_past'], LCS_benign_results['d_ens_future'], 'Ensemble (defense)')
for LCS_malware in LCS_malware_list:
    exec(f"statistical_test(LCS_benign_results['d_{LCS_malware}_past'], LCS_benign_results['d_{LCS_malware}_future'], '{LCS_malware} (defense)')")
    




Is there a drop in past vs future? [BENIGN concept drift check] (SCS)

candidates: Full Binary	statistic=27.98519,	p-value=4.75231895147127e-96 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: Ensemble	statistic=13.19158,	p-value=3.235358168167447e-33 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: neris	statistic=18.86960,	p-value=3.43726309379291e-57 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: rbot	statistic=1.03590,	p-value=0.3008764111371138 --> conclusion: equal (No) 
candidates: virut	statistic=6.19028,	p-value=1.4958421845585343e-09 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: Full Binary (defense)	statistic=15.95375,	p-value=1.1933461923746116e-44 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: Ensemble (defense)	statistic=9.10852,	p-value=4.115258731856702e-18 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: ner

In [7]:
print("\nIs there a drop in past vs future? [MALICIOUS concept drift check] (SCS)\n")

statistical_test(SCS_uniform_malicious_results['fb_past'], SCS_uniform_malicious_results['fb_future'], 'Full Binary')
statistical_test(SCS_uniform_malicious_results['ens_past'], SCS_uniform_malicious_results['ens_future'], 'Ensemble')
for SCS_malware in SCS_malware_list:
    exec(f"statistical_test(SCS_uniform_malicious_results['{SCS_malware}_past'], SCS_uniform_malicious_results['{SCS_malware}_future'], '{SCS_malware}')")

## Is there a drop in past vs future (defense)? [concept drift check] 

statistical_test(SCS_uniform_malicious_results['d_fb_past'], SCS_uniform_malicious_results['d_fb_future'], 'Full Binary (defense)')
statistical_test(SCS_uniform_malicious_results['d_ens_past'], SCS_uniform_malicious_results['d_ens_future'], 'Ensemble (defense)')
for SCS_malware in SCS_malware_list:
    exec(f"statistical_test(SCS_uniform_malicious_results['d_{SCS_malware}_past'], SCS_uniform_malicious_results['d_{SCS_malware}_future'], '{SCS_malware} (defense)')")
    

print("\nIs there a drop in past vs future? [MALICIOUS concept drift check] (LCS)\n")

statistical_test(LCS_uniform_malicious_results['fb_past'], LCS_uniform_malicious_results['fb_future'], 'Full Binary')
statistical_test(LCS_uniform_malicious_results['ens_past'], LCS_uniform_malicious_results['ens_future'], 'Ensemble')
for LCS_malware in LCS_malware_list:
    exec(f"statistical_test(LCS_uniform_malicious_results['{LCS_malware}_past'], LCS_uniform_malicious_results['{LCS_malware}_future'], '{LCS_malware}')")

## Is there a drop in past vs future (defense)? [concept drift check] 

statistical_test(LCS_uniform_malicious_results['d_fb_past'], LCS_uniform_malicious_results['d_fb_future'], 'Full Binary (defense)')
statistical_test(LCS_uniform_malicious_results['d_ens_past'], LCS_uniform_malicious_results['d_ens_future'], 'Ensemble (defense)')
for LCS_malware in LCS_malware_list:
    exec(f"statistical_test(LCS_uniform_malicious_results['d_{LCS_malware}_past'], LCS_uniform_malicious_results['d_{LCS_malware}_future'], '{LCS_malware} (defense)')")
    




Is there a drop in past vs future? [MALICIOUS concept drift check] (SCS)

candidates: Full Binary	statistic=539.26417,	p-value=0.0 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: Ensemble	statistic=41.61936,	p-value=4.662550736219221e-147 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: neris	statistic=45.05655,	p-value=2.235329845763777e-158 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: rbot	statistic=1551.80290,	p-value=0.0 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: virut	statistic=16.01366,	p-value=6.6450152503675e-45 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: Full Binary (defense)	statistic=326.88218,	p-value=0.0 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: Ensemble (defense)	statistic=53.67754,	p-value=2.325605990166366e-184 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: neris (d

In [8]:
### SHORT TERM CASE STUDY (SCS)

## VANILLA, non-adversarial

# aggregate
fb_future_udp = SCS_data['virut']['recall']['fb_future_udp'] + SCS_data['neris']['recall']['fb_future_udp'] + SCS_data['rbot']['recall']['fb_future_udp']
ens_future_udp = SCS_data['virut']['recall']['ens_future_udp'] + SCS_data['neris']['recall']['ens_future_udp'] + SCS_data['rbot']['recall']['ens_future_udp']
neris_future_udp = SCS_data['neris']['recall']['neris_future_udp']
rbot_future_udp = SCS_data['rbot']['recall']['rbot_future_udp']
virut_future_udp = SCS_data['virut']['recall']['virut_future_udp']

fb_future_tcp = SCS_data['virut']['recall']['fb_future_tcp'] + SCS_data['neris']['recall']['fb_future_tcp'] + SCS_data['rbot']['recall']['fb_future_tcp']
ens_future_tcp = SCS_data['virut']['recall']['ens_future_tcp'] + SCS_data['neris']['recall']['ens_future_tcp'] + SCS_data['rbot']['recall']['ens_future_tcp']
neris_future_tcp = SCS_data['neris']['recall']['neris_future_tcp']
rbot_future_tcp = SCS_data['rbot']['recall']['rbot_future_tcp']
virut_future_tcp = SCS_data['virut']['recall']['virut_future_tcp']

# compute


## VANILLA, adversarial

# aggregate
fb_future_udp_adversarial = SCS_data['virut']['recall']['fb_future_udp_adversarial'] + SCS_data['neris']['recall']['fb_future_udp_adversarial'] + SCS_data['rbot']['recall']['fb_future_udp_adversarial']
ens_future_udp_adversarial = SCS_data['virut']['recall']['ens_future_udp_adversarial'] + SCS_data['neris']['recall']['ens_future_udp_adversarial'] + SCS_data['rbot']['recall']['ens_future_udp_adversarial']
neris_future_udp_adversarial = SCS_data['neris']['recall']['neris_future_udp_adversarial']
rbot_future_udp_adversarial = SCS_data['rbot']['recall']['rbot_future_udp_adversarial']
virut_future_udp_adversarial = SCS_data['virut']['recall']['virut_future_udp_adversarial']

fb_future_tcp_adversarial = SCS_data['virut']['recall']['fb_future_tcp_adversarial'] + SCS_data['neris']['recall']['fb_future_tcp_adversarial'] + SCS_data['rbot']['recall']['fb_future_tcp_adversarial']
ens_future_tcp_adversarial = SCS_data['virut']['recall']['ens_future_tcp_adversarial'] + SCS_data['neris']['recall']['ens_future_tcp_adversarial'] + SCS_data['rbot']['recall']['ens_future_tcp_adversarial']
neris_future_tcp_adversarial = SCS_data['neris']['recall']['neris_future_tcp_adversarial']
rbot_future_tcp_adversarial = SCS_data['rbot']['recall']['rbot_future_tcp_adversarial']
virut_future_tcp_adversarial = SCS_data['virut']['recall']['virut_future_tcp_adversarial']

## DEFENSE, non-adversarial

# aggregate
d_fb_future_udp = SCS_data['virut']['recall']['d_fb_future_udp'] + SCS_data['neris']['recall']['d_fb_future_udp'] + SCS_data['rbot']['recall']['d_fb_future_udp']
d_ens_future_udp = SCS_data['virut']['recall']['d_ens_future_udp'] + SCS_data['neris']['recall']['d_ens_future_udp'] + SCS_data['rbot']['recall']['d_ens_future_udp']
d_neris_future_udp = SCS_data['neris']['recall']['d_neris_future_udp']
d_rbot_future_udp = SCS_data['rbot']['recall']['d_rbot_future_udp']
d_virut_future_udp = SCS_data['virut']['recall']['d_virut_future_udp']


d_fb_future_tcp = SCS_data['virut']['recall']['fb_future_tcp'] + SCS_data['neris']['recall']['fb_future_tcp'] + SCS_data['rbot']['recall']['fb_future_tcp']
d_ens_future_tcp = SCS_data['virut']['recall']['ens_future_tcp'] + SCS_data['neris']['recall']['ens_future_tcp'] + SCS_data['rbot']['recall']['ens_future_tcp']
d_neris_future_tcp = SCS_data['neris']['recall']['d_neris_future_tcp']
d_rbot_future_tcp = SCS_data['rbot']['recall']['d_rbot_future_tcp']
d_virut_future_tcp = SCS_data['virut']['recall']['d_virut_future_tcp']

## DEFENSE, adversarial

# aggregate
d_fb_future_udp_adversarial = SCS_data['virut']['recall']['d_fb_future_udp_adversarial'] + SCS_data['neris']['recall']['d_fb_future_udp_adversarial'] + SCS_data['rbot']['recall']['d_fb_future_udp_adversarial']
d_ens_future_udp_adversarial = SCS_data['virut']['recall']['d_ens_future_udp_adversarial'] + SCS_data['neris']['recall']['d_ens_future_udp_adversarial'] + SCS_data['rbot']['recall']['d_ens_future_udp_adversarial']
d_neris_future_udp_adversarial = SCS_data['neris']['recall']['d_neris_future_udp_adversarial']
d_rbot_future_udp_adversarial = SCS_data['rbot']['recall']['d_rbot_future_udp_adversarial']
d_virut_future_udp_adversarial = SCS_data['virut']['recall']['d_virut_future_udp_adversarial']


d_fb_future_tcp_adversarial = SCS_data['virut']['recall']['fb_future_tcp_adversarial'] + SCS_data['neris']['recall']['fb_future_tcp_adversarial'] + SCS_data['rbot']['recall']['fb_future_tcp_adversarial']
d_ens_future_tcp_adversarial = SCS_data['virut']['recall']['ens_future_tcp_adversarial'] + SCS_data['neris']['recall']['ens_future_tcp_adversarial'] + SCS_data['rbot']['recall']['ens_future_tcp_adversarial']
d_neris_future_tcp_adversarial = SCS_data['neris']['recall']['d_neris_future_tcp_adversarial']
d_rbot_future_tcp_adversarial = SCS_data['rbot']['recall']['d_rbot_future_tcp_adversarial']
d_virut_future_tcp_adversarial = SCS_data['virut']['recall']['d_virut_future_tcp_adversarial']





In [9]:
## We group tcp and udp perturbations

fb_future_nonadversarial = fb_future_udp + fb_future_tcp
ens_future_nonadversarial = ens_future_udp + ens_future_tcp
for SCS_malware in SCS_malware_list:
    exec(f"{SCS_malware}_future_nonadversarial = {SCS_malware}_future_udp + {SCS_malware}_future_tcp")
    
d_fb_future_nonadversarial = d_fb_future_udp + d_fb_future_tcp
d_ens_future_nonadversarial = d_ens_future_udp + d_ens_future_tcp
for SCS_malware in SCS_malware_list:
    exec(f"d_{SCS_malware}_future_nonadversarial = d_{SCS_malware}_future_udp + d_{SCS_malware}_future_tcp")
    
fb_future_adversarial = fb_future_udp_adversarial + fb_future_tcp_adversarial
ens_future_adversarial = ens_future_udp_adversarial + ens_future_tcp_adversarial
for SCS_malware in SCS_malware_list:
    exec(f"{SCS_malware}_future_adversarial = {SCS_malware}_future_udp_adversarial + {SCS_malware}_future_tcp_adversarial")
    
    
d_fb_future_adversarial = d_fb_future_udp_adversarial + d_fb_future_tcp_adversarial
d_ens_future_adversarial = d_ens_future_udp_adversarial + d_ens_future_tcp_adversarial
for SCS_malware in SCS_malware_list:
    exec(f"d_{SCS_malware}_future_adversarial = d_{SCS_malware}_future_udp_adversarial + d_{SCS_malware}_future_tcp_adversarial")
    

In [10]:
print("\nIs there a difference in adversarial vs non-adversarial? (SCS)\n")

statistical_test(fb_future_nonadversarial, fb_future_adversarial, 'Full Binary')
statistical_test(ens_future_nonadversarial, ens_future_adversarial, 'Ensemble')
for SCS_malware in SCS_malware_list:
    exec(f"statistical_test({SCS_malware}_future_nonadversarial, {SCS_malware}_future_adversarial, '{SCS_malware}')")

print("\nIs there a difference in defense vs vanilla (in adversarial settings?) (SCS)\n")

statistical_test(fb_future_adversarial, d_fb_future_adversarial, 'd_Full Binary')
statistical_test(ens_future_adversarial, d_ens_future_adversarial, 'd_Ensemble')
for SCS_malware in SCS_malware_list:
    exec(f"statistical_test({SCS_malware}_future_adversarial, d_{SCS_malware}_future_adversarial, 'd_{SCS_malware}')")


Is there a difference in adversarial vs non-adversarial? (SCS)

candidates: Full Binary	statistic=3.56817,	p-value=0.00036653445338002787 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: Ensemble	statistic=4.38698,	p-value=1.1990319131711031e-05 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: neris	statistic=5.61937,	p-value=2.6486769331944926e-08 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: rbot	statistic=6.13724,	p-value=1.3217827170758674e-09 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: virut	statistic=2.88808,	p-value=0.003980926650161274 --> conclusion: not equal (Yes) [p1 is likely greater than p2]

Is there a difference in defense vs vanilla (in adversarial settings?) (SCS)

candidates: d_Full Binary	statistic=-0.68027,	p-value=0.4964011131303333 --> conclusion: equal (No) 
candidates: d_Ensemble	statistic=12.52935,	p-value=6.246001934665037e-35 --> conclusion: not e

In [11]:
#### LONG TERM CASE STUDY (LCS)

## VANILLA, non-adversarial

# aggregate
fb_future_udp = LCS_data['trickbot']['recall']['fb_future_udp'] + LCS_data['artemis']['recall']['fb_future_udp'] + LCS_data['dridex']['recall']['fb_future_udp'] + LCS_data['trickster']['recall']['fb_future_udp'] + LCS_data['wannacry']['recall']['fb_future_udp']
ens_future_udp = LCS_data['trickbot']['recall']['ens_future_udp'] + LCS_data['artemis']['recall']['ens_future_udp'] + LCS_data['dridex']['recall']['ens_future_udp'] + LCS_data['trickster']['recall']['ens_future_udp'] + LCS_data['wannacry']['recall']['ens_future_udp']
artemis_future_udp = LCS_data['artemis']['recall']['artemis_future_udp']
dridex_future_udp = LCS_data['dridex']['recall']['dridex_future_udp']
trickbot_future_udp = LCS_data['trickbot']['recall']['trickbot_future_udp']
trickster_future_udp = LCS_data['trickster']['recall']['trickster_future_udp']
wannacry_future_udp = LCS_data['wannacry']['recall']['wannacry_future_udp']


fb_future_tcp = LCS_data['trickbot']['recall']['fb_future_tcp'] + LCS_data['artemis']['recall']['fb_future_tcp'] + LCS_data['dridex']['recall']['fb_future_tcp'] + LCS_data['trickster']['recall']['fb_future_tcp'] + LCS_data['wannacry']['recall']['fb_future_tcp']
ens_future_tcp = LCS_data['trickbot']['recall']['ens_future_tcp'] + LCS_data['artemis']['recall']['ens_future_tcp'] + LCS_data['dridex']['recall']['ens_future_tcp'] + LCS_data['trickster']['recall']['ens_future_tcp'] + LCS_data['wannacry']['recall']['ens_future_tcp']
artemis_future_tcp = LCS_data['artemis']['recall']['artemis_future_tcp']
dridex_future_tcp = LCS_data['dridex']['recall']['dridex_future_tcp']
trickbot_future_tcp = LCS_data['trickbot']['recall']['trickbot_future_tcp']
trickster_future_tcp = LCS_data['trickster']['recall']['trickster_future_tcp']
wannacry_future_tcp = LCS_data['wannacry']['recall']['wannacry_future_tcp']


## VANILLA, adversarial

# aggregate
fb_future_udp_adversarial = LCS_data['trickbot']['recall']['fb_future_udp_adversarial'] + LCS_data['artemis']['recall']['fb_future_udp_adversarial'] + LCS_data['dridex']['recall']['fb_future_udp_adversarial'] + LCS_data['trickster']['recall']['fb_future_udp_adversarial'] + LCS_data['wannacry']['recall']['fb_future_udp_adversarial']
ens_future_udp_adversarial = LCS_data['trickbot']['recall']['ens_future_udp_adversarial'] + LCS_data['artemis']['recall']['ens_future_udp_adversarial'] + LCS_data['dridex']['recall']['ens_future_udp_adversarial'] + LCS_data['trickster']['recall']['ens_future_udp_adversarial'] + LCS_data['wannacry']['recall']['ens_future_udp_adversarial']
artemis_future_udp_adversarial = LCS_data['artemis']['recall']['artemis_future_udp_adversarial']
dridex_future_udp_adversarial = LCS_data['dridex']['recall']['dridex_future_udp_adversarial']
trickbot_future_udp_adversarial = LCS_data['trickbot']['recall']['trickbot_future_udp_adversarial']
trickster_future_udp_adversarial = LCS_data['trickster']['recall']['trickster_future_udp_adversarial']
wannacry_future_udp_adversarial = LCS_data['wannacry']['recall']['wannacry_future_udp_adversarial']

fb_future_tcp_adversarial = LCS_data['trickbot']['recall']['fb_future_tcp_adversarial'] + LCS_data['artemis']['recall']['fb_future_tcp_adversarial'] + LCS_data['dridex']['recall']['fb_future_tcp_adversarial'] + LCS_data['trickster']['recall']['fb_future_tcp_adversarial'] + LCS_data['wannacry']['recall']['fb_future_tcp_adversarial']
ens_future_tcp_adversarial = LCS_data['trickbot']['recall']['ens_future_tcp_adversarial'] + LCS_data['artemis']['recall']['ens_future_tcp_adversarial'] + LCS_data['dridex']['recall']['ens_future_tcp_adversarial'] + LCS_data['trickster']['recall']['ens_future_tcp_adversarial'] + LCS_data['wannacry']['recall']['ens_future_tcp_adversarial']
artemis_future_tcp_adversarial = LCS_data['artemis']['recall']['artemis_future_tcp_adversarial']
dridex_future_tcp_adversarial = LCS_data['dridex']['recall']['dridex_future_tcp_adversarial']
trickbot_future_tcp_adversarial = LCS_data['trickbot']['recall']['trickbot_future_tcp_adversarial']
trickster_future_tcp_adversarial = LCS_data['trickster']['recall']['trickster_future_tcp_adversarial']
wannacry_future_tcp_adversarial = LCS_data['wannacry']['recall']['wannacry_future_tcp_adversarial']




## DEFENSE, non-adversarial

# aggregate
d_fb_future_udp = LCS_data['trickbot']['recall']['d_fb_future_udp'] + LCS_data['artemis']['recall']['d_fb_future_udp'] + LCS_data['dridex']['recall']['d_fb_future_udp'] + LCS_data['trickster']['recall']['d_fb_future_udp'] + LCS_data['wannacry']['recall']['d_fb_future_udp']
d_ens_future_udp = LCS_data['trickbot']['recall']['d_ens_future_udp'] + LCS_data['artemis']['recall']['d_ens_future_udp'] + LCS_data['dridex']['recall']['d_ens_future_udp'] + LCS_data['trickster']['recall']['d_ens_future_udp'] + LCS_data['wannacry']['recall']['d_ens_future_udp']
d_artemis_future_udp = LCS_data['artemis']['recall']['d_artemis_future_udp']
d_dridex_future_udp = LCS_data['dridex']['recall']['d_dridex_future_udp']
d_trickbot_future_udp = LCS_data['trickbot']['recall']['d_trickbot_future_udp']
d_trickster_future_udp = LCS_data['trickster']['recall']['d_trickster_future_udp']
d_wannacry_future_udp = LCS_data['wannacry']['recall']['d_wannacry_future_udp']


d_fb_future_tcp = LCS_data['trickbot']['recall']['fb_future_tcp'] + LCS_data['artemis']['recall']['fb_future_tcp'] + LCS_data['dridex']['recall']['fb_future_tcp'] + LCS_data['trickster']['recall']['fb_future_tcp'] + LCS_data['wannacry']['recall']['fb_future_tcp']
d_ens_future_tcp = LCS_data['trickbot']['recall']['ens_future_tcp'] + LCS_data['artemis']['recall']['ens_future_tcp'] + LCS_data['dridex']['recall']['ens_future_tcp'] + LCS_data['trickster']['recall']['ens_future_tcp'] + LCS_data['wannacry']['recall']['ens_future_tcp']
d_artemis_future_tcp = LCS_data['artemis']['recall']['d_artemis_future_tcp']
d_dridex_future_tcp = LCS_data['dridex']['recall']['d_dridex_future_tcp']
d_trickbot_future_tcp = LCS_data['trickbot']['recall']['d_trickbot_future_tcp']
d_trickster_future_tcp = LCS_data['trickster']['recall']['d_trickster_future_tcp']
d_wannacry_future_tcp = LCS_data['wannacry']['recall']['d_wannacry_future_tcp']



## DEFENSE, non-adversarial

# aggregate
d_fb_future_udp_adversarial = LCS_data['trickbot']['recall']['d_fb_future_udp_adversarial'] + LCS_data['artemis']['recall']['d_fb_future_udp_adversarial'] + LCS_data['dridex']['recall']['d_fb_future_udp_adversarial'] + LCS_data['trickster']['recall']['d_fb_future_udp_adversarial'] + LCS_data['wannacry']['recall']['d_fb_future_udp_adversarial']
d_ens_future_udp_adversarial = LCS_data['trickbot']['recall']['d_ens_future_udp_adversarial'] + LCS_data['artemis']['recall']['d_ens_future_udp_adversarial'] + LCS_data['dridex']['recall']['d_ens_future_udp_adversarial'] + LCS_data['trickster']['recall']['d_ens_future_udp_adversarial'] + LCS_data['wannacry']['recall']['d_ens_future_udp_adversarial']
d_artemis_future_udp_adversarial = LCS_data['artemis']['recall']['d_artemis_future_udp_adversarial']
d_dridex_future_udp_adversarial = LCS_data['dridex']['recall']['d_dridex_future_udp_adversarial']
d_trickbot_future_udp_adversarial = LCS_data['trickbot']['recall']['d_trickbot_future_udp_adversarial']
d_trickster_future_udp_adversarial = LCS_data['trickster']['recall']['d_trickster_future_udp_adversarial']
d_wannacry_future_udp_adversarial = LCS_data['wannacry']['recall']['d_wannacry_future_udp_adversarial']


d_fb_future_tcp_adversarial = LCS_data['trickbot']['recall']['fb_future_tcp_adversarial'] + LCS_data['artemis']['recall']['fb_future_tcp_adversarial'] + LCS_data['dridex']['recall']['fb_future_tcp_adversarial'] + LCS_data['trickster']['recall']['fb_future_tcp_adversarial'] + LCS_data['wannacry']['recall']['fb_future_tcp_adversarial']
d_ens_future_tcp_adversarial = LCS_data['trickbot']['recall']['ens_future_tcp_adversarial'] + LCS_data['artemis']['recall']['ens_future_tcp_adversarial'] + LCS_data['dridex']['recall']['ens_future_tcp_adversarial'] + LCS_data['trickster']['recall']['ens_future_tcp_adversarial'] + LCS_data['wannacry']['recall']['ens_future_tcp_adversarial']
d_artemis_future_tcp_adversarial = LCS_data['artemis']['recall']['d_artemis_future_tcp_adversarial']
d_dridex_future_tcp_adversarial = LCS_data['dridex']['recall']['d_dridex_future_tcp_adversarial']
d_trickbot_future_tcp_adversarial = LCS_data['trickbot']['recall']['d_trickbot_future_tcp_adversarial']
d_trickster_future_tcp_adversarial = LCS_data['trickster']['recall']['d_trickster_future_tcp_adversarial']
d_wannacry_future_tcp_adversarial = LCS_data['wannacry']['recall']['d_wannacry_future_tcp_adversarial']

In [12]:
## We group tcp and udp perturbations

fb_future_nonadversarial = fb_future_udp + fb_future_tcp
ens_future_nonadversarial = ens_future_udp + ens_future_tcp
for LCS_malware in LCS_malware_list:
    exec(f"{LCS_malware}_future_nonadversarial = {LCS_malware}_future_udp + {LCS_malware}_future_tcp")
    
d_fb_future_nonadversarial = d_fb_future_udp + d_fb_future_tcp
d_ens_future_nonadversarial = d_ens_future_udp + d_ens_future_tcp
for LCS_malware in LCS_malware_list:
    exec(f"d_{LCS_malware}_future_nonadversarial = d_{LCS_malware}_future_udp + d_{LCS_malware}_future_tcp")
    
fb_future_adversarial = fb_future_udp_adversarial + fb_future_tcp_adversarial
ens_future_adversarial = ens_future_udp_adversarial + ens_future_tcp_adversarial
for LCS_malware in LCS_malware_list:
    exec(f"{LCS_malware}_future_adversarial = {LCS_malware}_future_udp_adversarial + {LCS_malware}_future_tcp_adversarial")
    
    
d_fb_future_adversarial = d_fb_future_udp_adversarial + d_fb_future_tcp_adversarial
d_ens_future_adversarial = d_ens_future_udp_adversarial + d_ens_future_tcp_adversarial
for LCS_malware in LCS_malware_list:
    exec(f"d_{LCS_malware}_future_adversarial = d_{LCS_malware}_future_udp_adversarial + d_{LCS_malware}_future_tcp_adversarial")
    

In [13]:
print("\nIs there a difference in adversarial vs non-adversarial? (LCS)\n")

statistical_test(fb_future_nonadversarial, fb_future_adversarial, 'Full Binary')
statistical_test(ens_future_nonadversarial, ens_future_adversarial, 'Ensemble')
for LCS_malware in LCS_malware_list:
    exec(f"statistical_test({LCS_malware}_future_nonadversarial, {LCS_malware}_future_adversarial, '{LCS_malware}')")

print("\nIs there a difference in defense vs vanilla (in adversarial settings?) (LCS)\n")

statistical_test(fb_future_adversarial, d_fb_future_adversarial, 'd_Full Binary')
statistical_test(ens_future_adversarial, d_ens_future_adversarial, 'd_Ensemble')
for LCS_malware in LCS_malware_list:
    exec(f"statistical_test({LCS_malware}_future_adversarial, d_{LCS_malware}_future_adversarial, 'd_{LCS_malware}')")


Is there a difference in adversarial vs non-adversarial? (LCS)

candidates: Full Binary	statistic=0.83300,	p-value=0.40489341135316337 --> conclusion: equal (No) 
candidates: Ensemble	statistic=6.78468,	p-value=1.3344780989317828e-11 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: artemis	statistic=-1.41519,	p-value=0.15740178991817824 --> conclusion: equal (No) 
candidates: dridex	statistic=12.00482,	p-value=1.2220535727187508e-30 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: trickbot	statistic=18.11912,	p-value=1.0137813385038412e-61 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: trickster	statistic=7.00336,	p-value=5.309581449329273e-12 --> conclusion: not equal (Yes) [p1 is likely greater than p2]
candidates: wannacry	statistic=0.37275,	p-value=0.7094317062750186 --> conclusion: equal (No) 

Is there a difference in defense vs vanilla (in adversarial settings?) (LCS)

candidates: d_Full Binary	stat

In [14]:
## RQ4 (this includes HGB and RF, as well as primary+secondary)

print("Do our TCP perturbations to the traffic by the Artemis malware lead to a different TPR by the Full-binary classifier?\n")

X = LCS_data['artemis']['recall']['fb_future_tcp']
Y = LCS_data['artemis']['recall']['fb_future_tcp_adversarial']

statistical_test(X, Y, 'Full Binary', prediction=True)


print("\n\nDo our TCP perturbations to the traffic by the Artemis malware lead to a different TPR by the Ensemble classifier?\n")

X = LCS_data['artemis']['recall']['ens_future_tcp']
Y = LCS_data['artemis']['recall']['ens_future_tcp_adversarial']

statistical_test(X, Y, 'Ensemble', prediction=True)
print("")

Do our TCP perturbations to the traffic by the Artemis malware lead to a different TPR by the Full-binary classifier?

candidates: Full Binary	statistic=-4.52941,	p-value=7.828390044957843e-06 --> conclusion: not equal (Yes) [p1 is likely lower than p2]


Do our TCP perturbations to the traffic by the Artemis malware lead to a different TPR by the Ensemble classifier?

candidates: Ensemble	statistic=17.05821,	p-value=2.294694791057546e-49 --> conclusion: not equal (Yes) [p1 is likely greater than p2]



In [15]:
## Only RF on Primary

LCS_input_files = ["../results/rf_primary_lcs.txt"]

LCS_data = read_file(LCS_input_files[0])
for LCS_input_file in LCS_input_files[1:]:
    LCS_data_temp = read_file(LCS_input_file)
    for b in baselines:
        for key in LCS_data[b].keys():
            LCS_data[b][key] = LCS_data[b][key] + LCS_data_temp[b][key]
        
    for m in LCS_malware_list:
        for key in LCS_data[m]['recall'].keys():
            LCS_data[m]['recall'][key] = LCS_data[m]['recall'][key] + LCS_data_temp[m]['recall'][key]

    
    
LCS_benign_results = LCS_data['benign']
LCS_uniform_malicious_results = LCS_data['malicious_uniform']
for malware in LCS_malware_list:
    exec(f"LCS_{malware}_results_rec = LCS_data['{malware}']['recall']")
    exec(f"LCS_{malware}_results_miss = LCS_data['{malware}']['miss']")

There are 50 results in the file: ../results/rf_primary_lcs.txt


In [16]:
print("Do our TCP perturbations to the traffic by the Artemis malware lead to a different TPR by the Full-binary classifier?\n")

X = LCS_data['artemis']['recall']['fb_future_tcp']
Y = LCS_data['artemis']['recall']['fb_future_tcp_adversarial']

statistical_test(X, Y, 'Full Binary', prediction=True)


print("\n\nDo our TCP perturbations to the traffic by the Artemis malware lead to a different TPR by the Ensemble classifier?\n")

X = LCS_data['artemis']['recall']['ens_future_tcp']
Y = LCS_data['artemis']['recall']['ens_future_tcp_adversarial']

statistical_test(X, Y, 'Ensemble', prediction=True)
print("")

Do our TCP perturbations to the traffic by the Artemis malware lead to a different TPR by the Full-binary classifier?

candidates: Full Binary	statistic=-5.23520,	p-value=9.404817293139686e-07 --> conclusion: not equal (Yes) [p1 is likely lower than p2]


Do our TCP perturbations to the traffic by the Artemis malware lead to a different TPR by the Ensemble classifier?

candidates: Ensemble	statistic=50.68247,	p-value=4.0778348648027407e-72 --> conclusion: not equal (Yes) [p1 is likely greater than p2]

