In [0]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from keras.models import Model, load_model
from keras.layers import Input, Dense, Dropout
from keras import regularizers
from sklearn.model_selection import train_test_split

import time
import copy
import math

In [0]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
cols = ['acc x', 'acc y', 'acc z', 'gyro x', 'gyro y', 'gyro z']
data = pd.DataFrame(columns=cols)

for i in range(1,7):
  df = pd.read_csv('/content/drive/My Drive/attack detection/project/drone23-5-19/data' + str(i) + '.csv', delimiter=',', usecols=cols)
  df = df.loc[:, ~df.columns.str.startswith('com')]
  df = df.dropna()
  data = pd.concat([data, df])
  
data = data.reset_index(drop=True)

In [0]:
data.head()

Unnamed: 0,acc x,acc y,acc z,gyro x,gyro y,gyro z
0,0.097519,-0.019211,-0.99856,0.26,-0.69,0.43
1,0.097408,-0.020178,-0.999914,0.29,-0.68,0.44
2,0.098908,-0.019471,-1.000024,0.33,-0.69,0.46
3,0.100166,-0.019388,-0.998979,0.32,-0.71,0.47
4,0.099962,-0.01938,-0.999187,0.22,-0.77,-14372176.9


In [0]:
bad_idx = []
threshold = 1e+03

for col in data.columns:
    idx = (data[data[col].abs() > threshold]).index
    bad_idx = bad_idx + idx.values.tolist()
    
data = data.drop(bad_idx, axis=0)
data = data.reset_index(drop=True)

In [0]:
data.head()

Unnamed: 0,acc x,acc y,acc z,gyro x,gyro y,gyro z
0,0.097519,-0.019211,-0.99856,0.26,-0.69,0.43
1,0.097408,-0.020178,-0.999914,0.29,-0.68,0.44
2,0.098908,-0.019471,-1.000024,0.33,-0.69,0.46
3,0.100166,-0.019388,-0.998979,0.32,-0.71,0.47
4,0.09989,-0.019025,-0.999337,0.15,-0.76,0.61


In [0]:
X_df = data

In [0]:
X_df.shape

(11996, 6)

In [0]:
window = 5
X_df = np.stack([X_df.iloc[i - window: i].values.flatten() for i in range(window, len(X_df))])
X_df = pd.DataFrame(X_df)

y_df = pd.Series(np.zeros(X_df.shape[0]))

In [0]:
y_df.shape

(11991,)

In [0]:
X_df.shape

(11991, 30)

In [0]:
X_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
0,0.097519,-0.019211,-0.99856,0.26,-0.69,0.43,0.097408,-0.020178,-0.999914,0.29,-0.68,0.44,0.098908,-0.019471,-1.000024,0.33,-0.69,0.46,0.100166,-0.019388,-0.998979,0.32,-0.71,0.47,0.09989,-0.019025,-0.999337,0.15,-0.76,0.61
1,0.097408,-0.020178,-0.999914,0.29,-0.68,0.44,0.098908,-0.019471,-1.000024,0.33,-0.69,0.46,0.100166,-0.019388,-0.998979,0.32,-0.71,0.47,0.09989,-0.019025,-0.999337,0.15,-0.76,0.61,0.099534,-0.018312,-0.998102,0.15,-0.74,0.63
2,0.098908,-0.019471,-1.000024,0.33,-0.69,0.46,0.100166,-0.019388,-0.998979,0.32,-0.71,0.47,0.09989,-0.019025,-0.999337,0.15,-0.76,0.61,0.099534,-0.018312,-0.998102,0.15,-0.74,0.63,0.098814,-0.01709,-0.996961,0.16,-0.72,0.66
3,0.100166,-0.019388,-0.998979,0.32,-0.71,0.47,0.09989,-0.019025,-0.999337,0.15,-0.76,0.61,0.099534,-0.018312,-0.998102,0.15,-0.74,0.63,0.098814,-0.01709,-0.996961,0.16,-0.72,0.66,0.098683,-0.017794,-0.996055,0.22,-0.71,0.67
4,0.09989,-0.019025,-0.999337,0.15,-0.76,0.61,0.099534,-0.018312,-0.998102,0.15,-0.74,0.63,0.098814,-0.01709,-0.996961,0.16,-0.72,0.66,0.098683,-0.017794,-0.996055,0.22,-0.71,0.67,0.098517,-0.01905,-0.994932,0.32,-0.71,0.66


In [0]:
y_df.head()

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
dtype: float64

In [0]:
n_to_change = 1000

idx = X_df.sample(n=n_to_change).index
num_feature_to_change = 3

for i in idx:
  rand_vals = np.random.rand(num_feature_to_change)
  rand_idx = np.random.randint(X_df.shape[1], size=num_feature_to_change)
  for j in range(num_feature_to_change):    
    X_df.iloc[i, rand_idx[j]] = rand_vals[j]
  y_df.iloc[i] = 1

In [0]:
train_idx = y_df[y_df==0.0].index.values

X_train = X_df.iloc[train_idx]
y_train = y_df[train_idx]
X_test = X_df
y_test = y_df

In [0]:
nb_epoch = 500
batch_size = 64


# Input dimension size (first and last autoencoder layer size)
input_dim = X_train.shape[1]

# Setting the auto encoder layers
input_layer = Input(shape=(input_dim, ))

encoder = Dense(20, activation="linear")(input_layer) 

encoder = Dense(10, activation="linear")(encoder) 

decoder = Dense(20, activation="linear")(encoder)

decoder = Dense(input_dim, activation='linear')(decoder) 

autoencoder = Model(inputs=input_layer, outputs=decoder)

autoencoder.summary()

W0712 08:14:45.805614 140653779240832 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0712 08:14:45.820450 140653779240832 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0712 08:14:45.825625 140653779240832 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 30)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 20)                620       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_3 (Dense)              (None, 20)                220       
_________________________________________________________________
dense_4 (Dense)              (None, 30)                630       
Total params: 1,680
Trainable params: 1,680
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Configures the learning process of the network
autoencoder.compile(optimizer='adam',loss='mean_squared_error',metrics=['mse'])

# Train the autoencoder based on the best epoch, returns history object
history = autoencoder.fit(X_train, X_train, epochs=nb_epoch, batch_size=batch_size, verbose=2).history # , shuffle=True, validation_split=0.1

W0712 08:14:46.876780 140653779240832 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0712 08:14:47.023267 140653779240832 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

W0712 08:14:47.141183 140653779240832 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.



Epoch 1/500
 - 0s - loss: 377.3473 - mean_squared_error: 377.3473
Epoch 2/500
 - 0s - loss: 16.8742 - mean_squared_error: 16.8742
Epoch 3/500
 - 0s - loss: 10.8226 - mean_squared_error: 10.8226
Epoch 4/500
 - 0s - loss: 9.6864 - mean_squared_error: 9.6864
Epoch 5/500
 - 0s - loss: 8.8544 - mean_squared_error: 8.8544
Epoch 6/500
 - 0s - loss: 8.2311 - mean_squared_error: 8.2311
Epoch 7/500
 - 0s - loss: 7.6839 - mean_squared_error: 7.6839
Epoch 8/500
 - 0s - loss: 7.2846 - mean_squared_error: 7.2846
Epoch 9/500
 - 0s - loss: 6.8794 - mean_squared_error: 6.8794
Epoch 10/500
 - 0s - loss: 6.5272 - mean_squared_error: 6.5272
Epoch 11/500
 - 0s - loss: 6.1985 - mean_squared_error: 6.1985
Epoch 12/500
 - 0s - loss: 5.8963 - mean_squared_error: 5.8963
Epoch 13/500
 - 0s - loss: 5.5644 - mean_squared_error: 5.5644
Epoch 14/500
 - 0s - loss: 5.2867 - mean_squared_error: 5.2867
Epoch 15/500
 - 0s - loss: 4.9673 - mean_squared_error: 4.9673
Epoch 16/500
 - 0s - loss: 4.6900 - mean_squared_error: 

In [0]:
# Predictions and results for the TEST set

predictions = autoencoder.predict(X_test)
square_errors = np.power(X_test - predictions, 2)
mse = np.mean(square_errors, axis=1)
mse_series = pd.Series(mse)
 
most_anomal_trx = mse_series.sort_values(ascending=False)
columns=["id", "mse_all_columns"]
columns.extend(["squared_error_" + str(x) for x in list(X_test.columns)])
items = []
for x in most_anomal_trx.iteritems():
    item = [x[0], x[1]]
    item.extend(square_errors.loc[x[0]])
    items.append(item)
    
df_anomal_trx = pd.DataFrame(items, columns=columns)
df_anomal_trx.set_index('id', inplace=True)

print('df_anomal_trx size', df_anomal_trx.shape)

top_trx_to_explain = df_anomal_trx.head(1000).index

In [0]:
df_anomal_trx.head()

In [0]:
y_test[top_trx_to_explain].sum()