# Explaining Deep Learning in Android Malware Detection
The objective of this project is to explain deep learning malware detection.

Libraries:
- myutils: methods for explaining deep learning.

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="7"

homepath=%pwd
print homepath

import json
import re
import sys
from collections import Counter, OrderedDict

import numpy as np
import pandas as pd
from decimal import *

import cPickle as pkl

import glob
from myutils import *

/workspace/codes/lemna


Using TensorFlow backend.

 
  libSM.so.6: cannot open shared object file: No such file or directory



# Load Trained Model
- Models are implemented in keras with tensorflow backend.
    - word embeddings learn a featurised representation.
    - training set is used to train models with hyperparameters 100 batch
size, 32 hidden units and 100 epochs. 
    - validation set is used to evaluate the
performance of a given model and fine-tune the model hyperparameters based
on validation metric F$_1$ score. 
    - Based on validation performance, the best model
is chosen and then evaluated using the test set. 
    - Batch normalisation and dropout reduce overfitting, helping models generalise to the test set.
    
See Appendix A for more information.

In [2]:
emb_layer = pretrained_embedding_layer(homepath, \
                                       max_nb_words=MAX_NB_WORDS, \
                                       emb_dim=EMBEDDING_DIM)

stamp = '2k10_1biLSTM_keras_masked'

model = build_model_1birnn_keras(nb_class=nb_classes, \
                                input_shape=(MAX_SEQ_LEN,MAX_SENT_LEN), \
                                stamp=stamp, \
                                emb_layer=emb_layer, \
                                mask=True,
                                lstm=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 2000, 10)          0         
_________________________________________________________________
masking_1 (Masking)          (None, 2000, 10)          0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 2000, 10, 20)      415220    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 2000, 200)         0         
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 2000, 200)         0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 64)                59904     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
__________

# Predict 
The dataset is randomly split once into 44116 training, 5515 validation and 5513 test
sets.

In [21]:
print '\nLoad previous trained model and predict ....'

data = pkl.load(open(homepath+'/data2k/test.sequence.pkl','rb'))
print 'testing data size >> ', len(data)
x_test = load_data(data)


labeldir = list(open(homepath+'/data2k/test.label', 'r').readlines())
labeldir = [item.strip() for item in labeldir]
y_test = loadlabel(labeldir)

print 'shape of testing data and label tensor >> ', x_test.shape, y_test.shape


Load previous trained model and predict ....
testing data size >>  5513
shape of processed data >>>  (5513, 2000, 10)
shape of testing data and label tensor >>  (5513, 2000, 10) (5513, 2)


In [4]:
model.load_weights('Results/2k10_bilstm_1layer_masked/2k10_1biLSTM_keras_masked 2019-06-14 09:52:29/2k10_1biLSTM_keras_masked-improvement-96-0.963.h5')
predict(model, x_test, y_test)

all 5513 right 5288 n 2388 p: 3125 tn 2309 tp 2979 fn 146 fp 79
model pool accuracy:  95.92 tpr:  95.33 fpr:  3.31
f1:  96.36 precision:  97.42 recall = tpr:  95.33


(Decimal('96.36098981077147016011644833'),
 Decimal('95.328'),
 Decimal('3.308207705192629815745393635'),
 Decimal('95.91873752947578450934155632'),
 Decimal('97.41661216481360366252452583'))

# Explain
Make sure lemna_2/bin is in the path for R environment.

In [4]:
os.environ.get("PATH")

'/root/anaconda3/envs/lemna_2/bin:/root/anaconda3/envs/lemna_2/bin:/root/anaconda3/envs/lemna_2/bin/R:/root/anaconda3/bin:/root/anaconda3/envs/networkx/bin/dot:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin'

Load following if path is not found.

In [None]:
os.environ["PATH"] += ':'+'/root/anaconda3/envs/lemna_2/bin'
os.environ.get("PATH")

Import R libraries for explanation method

In [5]:
import numpy as np
import cPickle as pickle
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from scipy import io
from rpy2 import robjects
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri
from keras.models import load_model
np.random.seed(1234)

r = robjects.r
rpy2.robjects.numpy2ri.activate()

importr('genlasso')
importr('gsubfn')

rpy2.robjects.packages.Package as a <module 'gsubfn' (built-in)>

# Generate Synthetic Samples

An input sequence can be perturbed by setting one of the features (or positions) to null. If the prediction output changes, this feature will be considered
important to the classifier result. By observing n such synthetic sequences, with
p features in each sequence, together with the classifier prediction value of each
sequence, the p features are ranked important to the classifier result.
- X is the original test input sequence
- y is the softmax predicted output of X in the format [benign, malicious]
- synthetic y is the n × 1 vector of prediction values based on n synthetic sequences. As mentioned in section 3.2, input sequences can have any length and this project sets maximum length to 2000.
    - To generate the first synthetic sequence, only the first feature of the API call sequence is nullified, and the classifier predicts whether this synthetic sequence is malicious or benign. This shall be the first value in the n × 1 vector of prediction values. 
    - Next, to generate the second synthetic sequence, only the second feature of the API call
sequence is set to null, then the classifier predicts and generates the second value
in the n × 1 vector. This process repeats until every position has been nullified
once (n = p = 2000).
- X sequence len is the original length of sequence before it's padded to length 2000.
- Prediction time = elapsed time is duration for generating 'synthetic y'. 
    - Original purpose of elapsed time was to calculate blackbox explanation time, however this is moved to the next step due to crashes from long sequences.
- output is saved for future reuse.

In [7]:
result_df = gen_synthetic_y(model, x_test, data)

os.chdir(homepath)
# print 'current wd', os.getcwd()

modelname = stamp
make_ndir(modelname+'_XAI_synthetic_y')
result_df.to_json('result_df_json', orient='records')
result_df.head()

Unnamed: 0,seq,X,y,synthetic y,X sequence len,prediction time,elapsed time
0,0,"[[3.0, 1839.0, 134.0, 20760.0, 20760.0, 20760....","[[0.00068575045, 0.9993143]]","[[0.0054127597, 0.9945872], [0.0003411252, 0.9...",53,5.538669,5.53867
0,1,"[[3.0, 20760.0, 20760.0, 20760.0, 20760.0, 207...","[[0.00027286285, 0.9997271]]","[[0.001013361, 0.99898666], [0.00026930144, 0....",33,5.484383,5.484387
0,2,"[[9925.0, 9925.0, 9925.0, 9925.0, 9925.0, 2076...","[[0.0006477182, 0.9993523]]","[[0.0268402, 0.9731598], [0.0006916912, 0.9993...",2000,5.356246,5.356248
0,3,"[[1125.0, 20760.0, 20760.0, 20760.0, 20760.0, ...","[[5.2194497e-05, 0.9999478]]","[[0.00090998545, 0.9990901], [7.9581056e-05, 0...",5,5.374176,5.374179
0,4,"[[3.0, 20760.0, 20760.0, 20760.0, 20760.0, 207...","[[7.91222e-05, 0.99992085]]","[[0.0007140192, 0.999286], [0.00015080118, 0.9...",2000,5.416512,5.416514


# Trendfilter

Fused lasso is suitable for values that are constant for an interval and change in jumps while trendfilter is suitable for values that change gradually. 

2 options:
- trendname = 'fusedlasso '
- trendname = 'trend1'

Method Design:
- can generate both fused lasso and trendfilter at one go but may potentially crash if notebook runs for too long. Safer to generate each method separately.
-  trendfilter orders 2 and 3 can be also performed but may be unstable according to author.
- load_df_json is the saved content from result_df above.

In [None]:
os.chdir(homepath)

load_df_json = pd.read_json(homepath+'/Results/2k10_bilstm_1layer_masked/2k10_1biLSTM_keras_masked_XAI_synthetic_y 2019-06-15 23:44:12/result_df_json')

trendname = 'fusedlasso'
result_df = gen_ranking(load_df_json, trendname)

In [7]:
modelname=stamp
os.chdir(homepath)
make_ndir(modelname+'_gen_ranking_' + trendname)
result_df.to_json(trendname+'_df_json', orient='records')
result_df.head()

Unnamed: 0,fusedlasso coef,fusedlasso ranking
0,"[0.0054127597, 0.00034112520000000013, 0.00051...","[0, 5, 51, 8, 48, 6, 1508, 1506, 1507, 1510, 1..."
0,"[0.0010133610000000002, 0.00026930140000000003...","[11, 20, 0, 15, 18, 30, 19, 22, 23, 28, 12, 21..."
0,"[0.026840200600000003, 0.0006916912, 0.0007674...","[0, 1998, 1997, 11, 4, 1950, 1959, 1956, 2, 19..."
0,"[0.0009099855, 7.95811e-05, 6.00802e-05, 0.000...","[0, 3, 4, 1, 2, 1508, 1509, 1510, 341, 342, 34..."
0,"[0.0007140192, 0.0001508012, 0.0001499077, 0.0...","[0, 1999, 1996, 1997, 1, 2, 1995, 3, 1993, 199..."


# Load and Analyse
Load saved fused lasso or trendfilter output from above. See Appendix B for more information.

In [None]:
os.chdir(homepath)
print 'current wd', os.getcwd()

In [10]:
columnsList = [trendname+' coef',trendname+' ranking']
load_df = pd.read_json(homepath+'/Results/2k10_bilstm_1layer_masked/2k10_1biLSTM_keras_masked_gen_ranking_fusedlasso 2019-06-17 06:21:20/fusedlasso_df_json')

In [11]:
load_df.head()

Unnamed: 0,fusedlasso coef,fusedlasso ranking
0,"[0.0054127597, 0.0003411252, 0.0005129133, 0.0...","[0, 5, 51, 8, 48, 6, 1508, 1506, 1507, 1510, 1..."
1,"[0.001013361, 0.0002693014, 0.0001939829, 0.00...","[11, 20, 0, 15, 18, 30, 19, 22, 23, 28, 12, 21..."
2,"[0.0268402006, 0.0006916912, 0.0007674248, 0.0...","[0, 1998, 1997, 11, 4, 1950, 1959, 1956, 2, 19..."
3,"[0.0009099855, 7.95811e-05, 6.00802e-05, 0.000...","[0, 3, 4, 1, 2, 1508, 1509, 1510, 341, 342, 34..."
4,"[0.0007140192, 0.0001508012, 0.0001499077, 0.0...","[0, 1999, 1996, 1997, 1, 2, 1995, 3, 1993, 199..."


In [12]:
for c in columnsList:
    load_df_json[c] = load_df[c]

load_df_json.head()

Unnamed: 0,X,X sequence len,elapsed time,prediction time,seq,synthetic y,y,fusedlasso coef,fusedlasso ranking
0,"[[3.0, 1839.0, 134.0, 20760.0, 20760.0, 20760....",53,5.53867,5.538669,0,"[[0.0054127597, 0.994587183], [0.0003411252, 0...","[[0.0006857504, 0.9993143082]]","[0.0054127597, 0.0003411252, 0.0005129133, 0.0...","[0, 5, 51, 8, 48, 6, 1508, 1506, 1507, 1510, 1..."
1,"[[3.0, 20760.0, 20760.0, 20760.0, 20760.0, 207...",33,5.484387,5.484383,1,"[[0.001013361, 0.9989866614], [0.0002693014, 0...","[[0.0002728628, 0.9997270703]]","[0.001013361, 0.0002693014, 0.0001939829, 0.00...","[11, 20, 0, 15, 18, 30, 19, 22, 23, 28, 12, 21..."
2,"[[9925.0, 9925.0, 9925.0, 9925.0, 9925.0, 2076...",2000,5.356248,5.356246,2,"[[0.0268402006, 0.97315979], [0.0006916912, 0....","[[0.0006477182, 0.9993522763]]","[0.0268402006, 0.0006916912, 0.0007674248, 0.0...","[0, 1998, 1997, 11, 4, 1950, 1959, 1956, 2, 19..."
3,"[[1125.0, 20760.0, 20760.0, 20760.0, 20760.0, ...",5,5.374179,5.374176,3,"[[0.0009099855, 0.9990900755], [7.95811e-05, 0...","[[5.21945e-05, 0.9999477863]]","[0.0009099855, 7.95811e-05, 6.00802e-05, 0.000...","[0, 3, 4, 1, 2, 1508, 1509, 1510, 341, 342, 34..."
4,"[[3.0, 20760.0, 20760.0, 20760.0, 20760.0, 207...",2000,5.416514,5.416512,4,"[[0.0007140192, 0.999285996], [0.0001508012, 0...","[[7.91222e-05, 0.999920845]]","[0.0007140192, 0.0001508012, 0.0001499077, 0.0...","[0, 1999, 1996, 1997, 1, 2, 1995, 3, 1993, 199..."


# Analyse by Number
Top k synthetic test provides a fine-grained evaluation.

In [14]:
os.chdir(homepath)
print 'current wd', os.getcwd()
make_ndir(modelname+'_Topk_'+trendname)

analyse_df = topk_test(model, trendname+' ranking', load_df_json, y_test )

os.chdir(homepath)
print 'current wd', os.getcwd()

make_ndir(modelname+'_Synthetic_topk_'+trendname)
analyse_df.to_json(modelname+'_synthetic_topk_df_json_'+trendname, orient='records')
analyse_df.head()

current wd /workspace/codes/lemna
top k: 0
all 5513 right 3125 n 2388 p: 3125 tn 0 tp 3125 fn 0 fp 2388
model pool accuracy:  56.68 tpr:  100.0 fpr:  100.0
f1:  72.35 precision:  56.68 recall = tpr:  100.0
top k: 1
all 5513 right 3832 n 2388 p: 3125 tn 758 tp 3074 fn 51 fp 1630
model pool accuracy:  69.51 tpr:  98.37 fpr:  68.26
f1:  78.53 precision:  65.35 recall = tpr:  98.37
top k: 2
all 5513 right 4567 n 2388 p: 3125 tn 1662 tp 2905 fn 220 fp 726
model pool accuracy:  82.84 tpr:  92.96 fpr:  30.4
f1:  86.0 precision:  80.01 recall = tpr:  92.96
top k: 3
all 5513 right 4746 n 2388 p: 3125 tn 1793 tp 2953 fn 172 fp 595
model pool accuracy:  86.09 tpr:  94.5 fpr:  24.92
f1:  88.51 precision:  83.23 recall = tpr:  94.5
top k: 4
all 5513 right 4967 n 2388 p: 3125 tn 2022 tp 2945 fn 180 fp 366
model pool accuracy:  90.1 tpr:  94.24 fpr:  15.33
f1:  91.52 precision:  88.95 recall = tpr:  94.24
top k: 5
all 5513 right 5043 n 2388 p: 3125 tn 2085 tp 2958 fn 167 fp 303
model pool accuracy:  

all 5513 right 5265 n 2388 p: 3125 tn 2279 tp 2986 fn 139 fp 109
model pool accuracy:  95.5 tpr:  95.55 fpr:  4.56
f1:  96.01 precision:  96.48 recall = tpr:  95.55
top k: 390
all 5513 right 5267 n 2388 p: 3125 tn 2282 tp 2985 fn 140 fp 106
model pool accuracy:  95.54 tpr:  95.52 fpr:  4.44
f1:  96.04 precision:  96.57 recall = tpr:  95.52
top k: 400
all 5513 right 5267 n 2388 p: 3125 tn 2283 tp 2984 fn 141 fp 105
model pool accuracy:  95.54 tpr:  95.49 fpr:  4.4
f1:  96.04 precision:  96.6 recall = tpr:  95.49
top k: 410
all 5513 right 5266 n 2388 p: 3125 tn 2281 tp 2985 fn 140 fp 107
model pool accuracy:  95.52 tpr:  95.52 fpr:  4.48
f1:  96.03 precision:  96.54 recall = tpr:  95.52
top k: 420
all 5513 right 5267 n 2388 p: 3125 tn 2282 tp 2985 fn 140 fp 106
model pool accuracy:  95.54 tpr:  95.52 fpr:  4.44
f1:  96.04 precision:  96.57 recall = tpr:  95.52
top k: 430
all 5513 right 5268 n 2388 p: 3125 tn 2283 tp 2985 fn 140 fp 105
model pool accuracy:  95.56 tpr:  95.52 fpr:  4.4
f1:

all 5513 right 5276 n 2388 p: 3125 tn 2288 tp 2988 fn 137 fp 100
model pool accuracy:  95.7 tpr:  95.62 fpr:  4.19
f1:  96.19 precision:  96.76 recall = tpr:  95.62
top k: 860
all 5513 right 5274 n 2388 p: 3125 tn 2286 tp 2988 fn 137 fp 102
model pool accuracy:  95.66 tpr:  95.62 fpr:  4.27
f1:  96.15 precision:  96.7 recall = tpr:  95.62
top k: 870
all 5513 right 5274 n 2388 p: 3125 tn 2286 tp 2988 fn 137 fp 102
model pool accuracy:  95.66 tpr:  95.62 fpr:  4.27
f1:  96.15 precision:  96.7 recall = tpr:  95.62
top k: 880
all 5513 right 5273 n 2388 p: 3125 tn 2286 tp 2987 fn 138 fp 102
model pool accuracy:  95.65 tpr:  95.58 fpr:  4.27
f1:  96.14 precision:  96.7 recall = tpr:  95.58
top k: 890
all 5513 right 5272 n 2388 p: 3125 tn 2285 tp 2987 fn 138 fp 103
model pool accuracy:  95.63 tpr:  95.58 fpr:  4.31
f1:  96.12 precision:  96.67 recall = tpr:  95.58
top k: 900
all 5513 right 5272 n 2388 p: 3125 tn 2285 tp 2987 fn 138 fp 103
model pool accuracy:  95.63 tpr:  95.58 fpr:  4.31
f1:

all 5513 right 5275 n 2388 p: 3125 tn 2290 tp 2985 fn 140 fp 98
model pool accuracy:  95.68 tpr:  95.52 fpr:  4.1
f1:  96.17 precision:  96.82 recall = tpr:  95.52
top k: 1330
all 5513 right 5276 n 2388 p: 3125 tn 2290 tp 2986 fn 139 fp 98
model pool accuracy:  95.7 tpr:  95.55 fpr:  4.1
f1:  96.18 precision:  96.82 recall = tpr:  95.55
top k: 1340
all 5513 right 5276 n 2388 p: 3125 tn 2290 tp 2986 fn 139 fp 98
model pool accuracy:  95.7 tpr:  95.55 fpr:  4.1
f1:  96.18 precision:  96.82 recall = tpr:  95.55
top k: 1350
all 5513 right 5276 n 2388 p: 3125 tn 2290 tp 2986 fn 139 fp 98
model pool accuracy:  95.7 tpr:  95.55 fpr:  4.1
f1:  96.18 precision:  96.82 recall = tpr:  95.55
top k: 1360
all 5513 right 5276 n 2388 p: 3125 tn 2290 tp 2986 fn 139 fp 98
model pool accuracy:  95.7 tpr:  95.55 fpr:  4.1
f1:  96.18 precision:  96.82 recall = tpr:  95.55
top k: 1370
all 5513 right 5273 n 2388 p: 3125 tn 2287 tp 2986 fn 139 fp 101
model pool accuracy:  95.65 tpr:  95.55 fpr:  4.23
f1:  96.

all 5513 right 5272 n 2388 p: 3125 tn 2287 tp 2985 fn 140 fp 101
model pool accuracy:  95.63 tpr:  95.52 fpr:  4.23
f1:  96.12 precision:  96.73 recall = tpr:  95.52
top k: 1800
all 5513 right 5274 n 2388 p: 3125 tn 2288 tp 2986 fn 139 fp 100
model pool accuracy:  95.66 tpr:  95.55 fpr:  4.19
f1:  96.15 precision:  96.76 recall = tpr:  95.55
top k: 1810
all 5513 right 5273 n 2388 p: 3125 tn 2288 tp 2985 fn 140 fp 100
model pool accuracy:  95.65 tpr:  95.52 fpr:  4.19
f1:  96.14 precision:  96.76 recall = tpr:  95.52
top k: 1820
all 5513 right 5273 n 2388 p: 3125 tn 2288 tp 2985 fn 140 fp 100
model pool accuracy:  95.65 tpr:  95.52 fpr:  4.19
f1:  96.14 precision:  96.76 recall = tpr:  95.52
top k: 1830
all 5513 right 5274 n 2388 p: 3125 tn 2289 tp 2985 fn 140 fp 99
model pool accuracy:  95.66 tpr:  95.52 fpr:  4.15
f1:  96.15 precision:  96.79 recall = tpr:  95.52
top k: 1840
all 5513 right 5273 n 2388 p: 3125 tn 2289 tp 2984 fn 141 fp 99
model pool accuracy:  95.65 tpr:  95.49 fpr:  4

Unnamed: 0,k,f1,tpr,fpr,accuracy,precision
0,0,72.355,100.0,100.0,56.684,56.684
0,1,78.529,98.368,68.258,69.508,65.349
0,2,85.998,92.96,30.402,82.841,80.006
0,3,88.506,94.496,24.916,86.087,83.23
0,4,91.516,94.24,15.327,90.096,88.946


# Analyse by Percent
Top k examines every position and is too fine-grained. Since API call sequences
have various lengths, Top Percentage test normalises them at 0%, 1%, 2%, ...,
100% for comparison.

In [13]:
os.chdir(homepath)
print 'current wd', os.getcwd()
make_ndir(modelname+'_Topk_percent_'+trendname)

analyse_percent_df = topk_percent_test(model, trendname+' ranking', load_df_json, y_test )

os.chdir(homepath)
# print 'current wd', os.getcwd()

make_ndir(modelname+'_Synthetic_topk_percent_'+trendname)
analyse_percent_df.to_json(modelname+'_synthetic_df_json_percent_'+trendname, orient='records')
analyse_percent_df.head()

current wd /workspace/codes/lemna
top k percent: 0
all 5513 right 3125 n 2388 p: 3125 tn 0 tp 3125 fn 0 fp 2388
model pool accuracy:  56.68 tpr:  100.0 fpr:  100.0
f1:  72.35 precision:  56.68 recall = tpr:  100.0
top k percent: 1
all 5513 right 4974 n 2388 p: 3125 tn 2015 tp 2959 fn 166 fp 373
model pool accuracy:  90.22 tpr:  94.69 fpr:  15.62
f1:  91.65 precision:  88.81 recall = tpr:  94.69
top k percent: 2
all 5513 right 4997 n 2388 p: 3125 tn 2048 tp 2949 fn 176 fp 340
model pool accuracy:  90.64 tpr:  94.37 fpr:  14.24
f1:  91.96 precision:  89.66 recall = tpr:  94.37
top k percent: 3
all 5513 right 5047 n 2388 p: 3125 tn 2065 tp 2982 fn 143 fp 323
model pool accuracy:  91.55 tpr:  95.42 fpr:  13.53
f1:  92.75 precision:  90.23 recall = tpr:  95.42
top k percent: 4
all 5513 right 5058 n 2388 p: 3125 tn 2072 tp 2986 fn 139 fp 316
model pool accuracy:  91.75 tpr:  95.55 fpr:  13.23
f1:  92.92 precision:  90.43 recall = tpr:  95.55
top k percent: 5
all 5513 right 5091 n 2388 p: 312

all 5513 right 5271 n 2388 p: 3125 tn 2282 tp 2989 fn 136 fp 106
model pool accuracy:  95.61 tpr:  95.65 fpr:  4.44
f1:  96.11 precision:  96.58 recall = tpr:  95.65
top k percent: 46
all 5513 right 5272 n 2388 p: 3125 tn 2282 tp 2990 fn 135 fp 106
model pool accuracy:  95.63 tpr:  95.68 fpr:  4.44
f1:  96.13 precision:  96.58 recall = tpr:  95.68
top k percent: 47
all 5513 right 5272 n 2388 p: 3125 tn 2282 tp 2990 fn 135 fp 106
model pool accuracy:  95.63 tpr:  95.68 fpr:  4.44
f1:  96.13 precision:  96.58 recall = tpr:  95.68
top k percent: 48
all 5513 right 5274 n 2388 p: 3125 tn 2284 tp 2990 fn 135 fp 104
model pool accuracy:  95.66 tpr:  95.68 fpr:  4.36
f1:  96.16 precision:  96.64 recall = tpr:  95.68
top k percent: 49
all 5513 right 5273 n 2388 p: 3125 tn 2283 tp 2990 fn 135 fp 105
model pool accuracy:  95.65 tpr:  95.68 fpr:  4.4
f1:  96.14 precision:  96.61 recall = tpr:  95.68
top k percent: 50
all 5513 right 5274 n 2388 p: 3125 tn 2284 tp 2990 fn 135 fp 104
model pool accur

all 5513 right 5271 n 2388 p: 3125 tn 2287 tp 2984 fn 141 fp 101
model pool accuracy:  95.61 tpr:  95.49 fpr:  4.23
f1:  96.1 precision:  96.73 recall = tpr:  95.49
top k percent: 91
all 5513 right 5270 n 2388 p: 3125 tn 2287 tp 2983 fn 142 fp 101
model pool accuracy:  95.59 tpr:  95.46 fpr:  4.23
f1:  96.09 precision:  96.73 recall = tpr:  95.46
top k percent: 92
all 5513 right 5270 n 2388 p: 3125 tn 2288 tp 2982 fn 143 fp 100
model pool accuracy:  95.59 tpr:  95.42 fpr:  4.19
f1:  96.09 precision:  96.76 recall = tpr:  95.42
top k percent: 93
all 5513 right 5271 n 2388 p: 3125 tn 2288 tp 2983 fn 142 fp 100
model pool accuracy:  95.61 tpr:  95.46 fpr:  4.19
f1:  96.1 precision:  96.76 recall = tpr:  95.46
top k percent: 94
all 5513 right 5270 n 2388 p: 3125 tn 2287 tp 2983 fn 142 fp 101
model pool accuracy:  95.59 tpr:  95.46 fpr:  4.23
f1:  96.09 precision:  96.73 recall = tpr:  95.46
top k percent: 95
all 5513 right 5269 n 2388 p: 3125 tn 2289 tp 2980 fn 145 fp 99
model pool accurac

Unnamed: 0,percent k,f1,tpr,fpr,accuracy,precision
0,0,72.355,100.0,100.0,56.684,56.684
0,1,91.652,94.688,15.62,90.223,88.806
0,2,91.955,94.368,14.238,90.64,89.663
0,3,92.753,95.424,13.526,91.547,90.227
0,4,92.92,95.552,13.233,91.747,90.43


# Appendix A - Load Trained Models
Test set F$_1$ score of various trained models:
- 1-Layer Bi-directional LSTM 96.361
- 2-Layer Bi-directional LSTM 98.031
- 3-Layer Bi-directional LSTM 98.338
- LSTM-based Attention 99.377

## Attention

In [24]:
os.chdir(homepath)

from attention import AttentionWithContext

def load_model_attn(stamp):
    """ 
    """

    json_file = open(stamp+'.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json, {'AttentionWithContext': AttentionWithContext})
    #model = model_from_json(loaded_model_json)


    model.load_weights(stamp+'.h5')
    print("Loaded model from disk")

    model.summary()

    adam = Adam(lr=learning_rate)
    model.compile(loss='binary_crossentropy',
        optimizer=adam,
        metrics=[f1_score])
    
    return model

model = load_model_attn(homepath+'/attention_for_lemna/final')   

predict(model, x_test, y_test)

Loaded model from disk
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 2000, 10)          0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 2000, 64)          433268    
_________________________________________________________________
dropout_1 (Dropout)          (None, 2000, 64)          0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 2000, 64)          25088     
_________________________________________________________________
batch_normalization_1 (Batch (None, 2000, 64)          256       
_________________________________________________________________
attention_with_context_2 (At (None, 64)                4224      
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)              

(Decimal('99.37669809813009429439028288'),
 Decimal('99.488'),
 Decimal('0.9631490787269681742043551089'),
 Decimal('99.29258117177580264828586976'),
 Decimal('99.26564495530012771392081737'))

## 1-Layer Bi-directional LSTM
See code above.

## 2-Layer Bi-directional LSTM
See code in 'TF cpu 2k10 biRNN'.

## 3-Layer Bi-directional LSTM
From 'lstm_fast_nb-bilstm-3layer-keras-predict-MASKED crossval'

In [23]:
stamp = '3bilstm_keras_masked'

model = build_model_3biLSTM_keras(nb_class=nb_classes, \
                                input_shape=(MAX_SEQ_LEN,MAX_SENT_LEN), \
                                stamp=stamp, \
                                emb_layer=emb_layer, \
                                mask=True)

model.load_weights('Results/2k10_bilstm_3layer_masked/bi-lstm-improvement-38-0.984.h5')

predict(model, x_test, y_test)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 2000, 10)          0         
_________________________________________________________________
masking_2 (Masking)          (None, 2000, 10)          0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 2000, 10, 20)      415220    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 2000, 200)         0         
_________________________________________________________________
spatial_dropout1d_2 (Spatial (None, 2000, 200)         0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 2000, 64)          59904     
_________________________________________________________________
bidirectional_3 (Bidirection (None, 2000, 64)          25088     
__________

(Decimal('98.33759590792838874680306905'),
 Decimal('98.432'),
 Decimal('2.303182579564489112227805695'),
 Decimal('98.11354979140214039542898603'),
 Decimal('98.24337272436921111465985308'))

# Appendix B - Saved Ranking

## Attention

In [15]:
load_df_json = pd.read_json(homepath+'/Results/2k10_attention/XAI_2k10 synthetic_y_Attention 2019-05-31 14:35:47/result_df_json')
load_df_json_2 = pd.read_json(homepath+'/Results/2k10_attention//Gen_2k10_attention_fusedlasso 2019-06-01 02:24:49/fusedlasso_df_json')
load_df_json_3 = pd.read_json(homepath+'/Results/2k10_attention/Gen_2k10_attention_trend1 2019-06-02 10:48:02/trend1_df_json')

In [16]:
load_df_json.head(3)

Unnamed: 0,X,X sequence len,elapsed time,prediction time,seq,synthetic y,y
0,"[[3.0, 1839.0, 134.0, 20760.0, 20760.0, 20760....",53,6.462275,6.462261,0,"[[2.9147e-06, 0.999997139], [3.2104e-06, 0.999...","[[2.9724e-06, 0.9999970198]]"
1,"[[3.0, 20760.0, 20760.0, 20760.0, 20760.0, 207...",33,6.468237,6.468213,1,"[[1.10733e-05, 0.9999889135], [1.23101e-05, 0....","[[1.22946e-05, 0.9999877214]]"
2,"[[9925.0, 9925.0, 9925.0, 9925.0, 9925.0, 2076...",2000,6.299239,6.299223,2,"[[1.0533e-06, 0.9999989271], [1.0608e-06, 0.99...","[[1.0587e-06, 0.9999989271]]"


In [17]:
load_df_json_2.head(3)

Unnamed: 0,fusedlasso coef,fusedlasso ranking
0,"[2.9147e-06, 3.2104e-06, 3.1061e-06, 3.0616e-0...","[34, 51, 46, 29, 50, 36, 49, 31, 33, 23, 24, 3..."
1,"[1.10733e-05, 1.23101e-05, 1.37786e-05, 1.4874...","[13, 27, 28, 14, 22, 19, 24, 17, 26, 6, 29, 21..."
2,"[1.0533e-06, 1.0608e-06, 1.0585e-06, 1.0573e-0...","[168, 182, 166, 164, 165, 167, 172, 163, 177, ..."


In [18]:
load_df_json_3.head(3)

Unnamed: 0,trend1 coef,trend1 ranking
0,"[2.9147e-06, 3.2104e-06, 3.1061e-06, 3.0616e-0...","[34, 51, 46, 29, 50, 36, 49, 31, 33, 23, 24, 3..."
1,"[1.10569e-05, 1.23068e-05, 1.35567e-05, 1.4806...","[13, 27, 28, 14, 22, 19, 26, 24, 17, 6, 29, 21..."
2,"[1.0586e-06, 1.0586e-06, 1.0586e-06, 1.0585e-0...","[166, 167, 165, 168, 164, 169, 163, 170, 162, ..."


## Attention Mechanism
From 'lstm_fast_nb-analyse-2k10-attention mechanism comparison'.
- attention.2000: ranking retrieved from Dr Xu Ke

In [28]:
load_df_json = pd.read_json(homepath+'/Results/2k10_attention/XAI_2k10 synthetic_y_Attention 2019-05-31 14:35:47/result_df_json')
load_df = pkl.load(open(homepath+'/attention_comparison/attention.2000','rb'))

In [29]:
load_df_json.head(3)

Unnamed: 0,X,X sequence len,elapsed time,prediction time,seq,synthetic y,y
0,"[[3.0, 1839.0, 134.0, 20760.0, 20760.0, 20760....",53,6.462275,6.462261,0,"[[2.9147e-06, 0.999997139], [3.2104e-06, 0.999...","[[2.9724e-06, 0.9999970198]]"
1,"[[3.0, 20760.0, 20760.0, 20760.0, 20760.0, 207...",33,6.468237,6.468213,1,"[[1.10733e-05, 0.9999889135], [1.23101e-05, 0....","[[1.22946e-05, 0.9999877214]]"
2,"[[9925.0, 9925.0, 9925.0, 9925.0, 9925.0, 2076...",2000,6.299239,6.299223,2,"[[1.0533e-06, 0.9999989271], [1.0608e-06, 0.99...","[[1.0587e-06, 0.9999989271]]"


In [42]:
%pprint
load_df[0]

Pretty printing has been turned OFF


[22, 10, 13, 23, 40, 15, 37, 36, 24, 11, 35, 43, 42, 2, 29, 25, 7, 3, 45, 0, 26, 12, 47, 5, 28, 9, 32, 8, 41, 21, 17, 1, 30, 44, 46, 19, 18, 31, 34, 16, 50, 20, 51, 49, 39, 14, 33, 38, 48, 52, 27, 6, 4]

## 1-Layer Bi-directional LSTM
See code above.

## 2-Layer Bi-directional LSTM
From 'TF cpu 2k10 biRNN'. Trendfilter not generated because not used in experiment.

In [25]:
load_df_json = pd.read_json(homepath+'/Results/2k10_tf2bilstm/2k10_tf_cpu_2bilstm_XAI_2k10 synthetic_y 2019-06-10 15:10:55/result_df_json')
load_df_json_2 = pd.read_json(homepath+'/Results/2k10_tf2bilstm/2k10_tf_cpu_2bilstm_gen_ranking_fusedlasso 2019-06-11 18:16:43/fusedlasso_df_json')

In [26]:
load_df_json.head(3)

Unnamed: 0,X,X sequence len,elapsed time,prediction time,seq,synthetic y,y
0,"[[3.0, 1839.0, 134.0, 20760.0, 20760.0, 20760....",53,1.022923,1.02292,0,"[[0.0008202412, 0.9991797805], [0.0002873832, ...","[[0.0012531201, 0.9987468719]]"
1,"[[3.0, 20760.0, 20760.0, 20760.0, 20760.0, 207...",33,0.839905,0.839891,1,"[[0.0002538302, 0.9997461438], [0.0003885677, ...","[[0.0003537792, 0.9996461868]]"
2,"[[9925.0, 9925.0, 9925.0, 9925.0, 9925.0, 2076...",2000,5.724815,5.724805,2,"[[3.05533e-05, 0.9999694824], [2.8938e-05, 0.9...","[[2.66694e-05, 0.9999732971]]"


In [27]:
load_df_json_2.head(3)

Unnamed: 0,fusedlasso coef,fusedlasso ranking
0,"[0.0008202412, 0.0002873832, 0.0008172597, 0.0...","[42, 51, 40, 45, 52, 43, 22, 38, 29, 48, 26, 3..."
1,"[0.0002538302, 0.0003885677, 0.0004090854, 0.0...","[11, 22, 13, 28, 14, 15, 21, 27, 9, 8, 10, 7, ..."
2,"[3.05533e-05, 2.8938e-05, 2.7954e-05, 2.67422e...","[162, 1998, 164, 1999, 160, 177, 166, 157, 167..."


## 3-Layer Bi-directional LSTM
From 'lstm_fast_nb-analyse-2k10-bilstm-3layer-masked-keras'.

In [13]:
load_df_json = pd.read_json(homepath+'/Results/2k10_bilstm_3layer_masked/XAI_2k10_bilstm_3layer_masked /result_df_json')
load_df_json_2 = pd.read_json(homepath+'/Results/2k10_bilstm_3layer_masked/XAI_2k10_bilstm_3layer_masked trend1 static 2019-05-22 18:57:47/trend1_df_json')

In [11]:
# ranking and coef and fused lasso
load_df_json.head(3)

Unnamed: 0,X,coef,elapsed time,prediction time,ranking,seq,synthetic y,y
0,"[[3.0, 1839.0, 134.0, 20760.0, 20760.0, 20760....","[0.0105111282, 0.014664867, 0.0125666643, 0.00...",21.586928,14.456283,"[39, 1, 2, 0, 38, 33, 7, 24, 4, 6, 3, 18, 877,...",0,"[[0.0105111282, 0.9894888997], [0.014664867, 0...","[[0.004624072, 0.9953759909]]"
1,"[[3.0, 20760.0, 20760.0, 20760.0, 20760.0, 207...","[0.0003858794, 0.0002715626, 0.0004641892, 0.0...",16.813192,13.639798,"[28, 13, 74, 72, 73, 71, 70, 76, 75, 66, 67, 6...",1,"[[0.0003858794, 0.9996141195], [0.0002715626, ...","[[0.0006192886, 0.9993807077]]"
2,"[[9925.0, 9925.0, 9925.0, 9925.0, 9925.0, 2076...","[0.0017128468, 0.0001408479, 0.000219571, 0.00...",24.090123,13.427257,"[0, 286, 182, 180, 181, 185, 1875, 287, 285, 2...",2,"[[0.0017128468, 0.9982871413], [0.0001408479, ...","[[0.0002300165, 0.9997699857]]"


In [14]:
load_df_json_2.head(3)

Unnamed: 0,trend1 coef,trend1 ranking
0,"[0.010525087, 0.0146509082, 0.0125387468, 0.00...","[39, 1, 2, 0, 38, 33, 7, 24, 4, 6, 3, 18, 885,..."
1,"[0.0003841664, 0.0002767015, 0.0004607633, 0.0...","[28, 13, 902, 901, 900, 899, 898, 897, 896, 89..."
2,"[0.0011219247, 0.0006910886, 0.0002602525, 0.0...","[0, 1, 181, 182, 180, 183, 179, 184, 178, 185,..."
