In [1]:
import keras
import numpy as np
import pandas as pd
from os.path import join as PJOIN
import os
from keras import optimizers, regularizers
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

Using TensorFlow backend.


In [2]:
DATA_DIR = "../DATA/GENERATED/TRAIN/"
DATA_FILES = ["train_libpng_cal.csv", "train_dealii_cal.csv", "train_server_cal.csv", "handcrafted.csv"]

In [3]:
def get_all_training_data():
    all_files = []
    if DATA_FILES[0] == 'all':
        for file in os.listdir(DATA_DIR):
            if file[:2] == 'X_':
                all_files.append(file[2:])
    else:
        all_files = DATA_FILES
    
    all_x = []
    all_y = []
    for file in all_files:
        train_x = pd.read_csv(PJOIN(DATA_DIR,"X_"+file),header=None)
        all_x.append(np.array(train_x))
        train_y = pd.read_csv(PJOIN(DATA_DIR,"Y_"+file),header=None)
        all_y.append(train_y)
    
    all_x = np.concatenate(all_x)
    all_y = np.concatenate(all_y)
    print(all_x.shape,all_y.shape)
    all_y = all_y.reshape(all_y.shape[0])    
    return all_x, all_y

In [4]:
def normalize_data(x):
    return (x - np.mean(x,axis=0))/np.std(x,axis=0)

In [5]:
train_x, train_y = get_all_training_data()
train_x = normalize_data(train_x)
train_y = train_y-1
print(train_x.shape, train_y.shape)
print(np.sum(train_y==0), np.sum(train_y==1), np.sum(train_y==3))

(10747, 12) (10747, 1)
(10747, 12) (10747,)
8173 416 0


In [6]:
perm = np.random.permutation(len(train_x))
train_x = train_x[perm]
train_y = train_y[perm]

In [209]:
class SimpleMLP(keras.Model):

    def __init__(self, use_bn=False, use_dp=False, num_classes=3):
        super(SimpleMLP, self).__init__(name='mlp')
        self.use_bn = use_bn
        self.use_dp = use_dp
        self.num_classes = num_classes
        
        #self.dense1 = keras.layers.Dense(24, activation='relu',kernel_regularizer=regularizers.l2(0.001))
        self.dense2 = keras.layers.Dense(20, activation='relu',kernel_regularizer=regularizers.l2(0.5))
        self.dense3 = keras.layers.Dense(8, activation='relu',kernel_regularizer=regularizers.l2(0.5))
        self.op = keras.layers.Dense(num_classes, activation='softmax')
        if self.use_dp:
            self.dp1 = keras.layers.Dropout(0.5)
            self.dp2 = keras.layers.Dropout(0.5)
            self.dp3 = keras.layers.Dropout(0.5)
            
        if self.use_bn:
            self.bn1 = keras.layers.BatchNormalization(axis=-1)
            self.bn2 = keras.layers.BatchNormalization(axis=-1)
            self.bn3 = keras.layers.BatchNormalization(axis=-1)
            

    def call(self, inputs):
#         x = self.dense1(inputs)
#         if self.use_dp:
#             x = self.dp1(x)
#         if self.use_bn:
#             x = self.bn1(x)
        x = self.dense2(inputs)
        if self.use_dp:
            x = self.dp2(x)
        if self.use_bn:
            x = self.bn2(x)
        x = self.dense3(x)
        if self.use_dp:
            x = self.dp3(x)
        if self.use_bn:
            x = self.bn3(x)
        return self.op(x)

In [210]:
model = SimpleMLP()

In [211]:
rmsprop = optimizers.rmsprop(lr=1*1e-5)
model.compile(loss='categorical_crossentropy',
                 optimizer=rmsprop,
                 metrics=['acc','categorical_accuracy'])

In [217]:
model.fit(train_x,train_y,epochs=50,class_weight={0:1,1:4,2:2})

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f7e8cad7a20>

In [218]:
preds = model.predict(train_x)
preds = preds.argmax(axis=1)

In [219]:
print(np.sum(preds==0), np.sum(preds==1), np.sum(preds==2))

382 8569 1796


In [220]:
print(np.sum(train_y==0), np.sum(train_y==1), np.sum(train_y==2))

8173 416 2158


In [221]:
test = train_y
print(precision_recall_fscore_support(test, preds))
print(accuracy_score(test,preds))
print(np.sum(test==0), np.sum(test==1), np.sum(test==1))
print(np.sum(preds==0), np.sum(preds==1), np.sum(preds==2))

(array([0.58638743, 0.02450694, 0.31737194]), array([0.02740732, 0.50480769, 0.26413346]), array([0.05236704, 0.04674457, 0.28831563]), array([8173,  416, 2158]))
0.09342141993114357
8173 416 416
382 8569 1796
