In [0]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
import os
import sys
import time
import numpy as np
import pandas as pd

In [0]:
rltv_path = 'drive/My Drive/project'

In [0]:
!git clone https://github.com/ymirsky/KitNET-py

fatal: destination path 'KitNET-py' already exists and is not an empty directory.


In [0]:
sys.path.append('KitNET-py')
import KitNET as kit

In [0]:
os.listdir(rltv_path + '/data')

['sync_data_averaging.csv',
 'sync_data_oversampling.csv',
 'sync_data_downsampling.csv',
 'sync_data_averaging_anomal_2_min_max.csv',
 'sync_data_oversampling_anomal_2_min_max.csv',
 'sync_data_downsampling_anomal_2_min_max.csv',
 'sync_data_averaging_anomal_3_min_max.csv',
 'sync_data_oversampling_anomal_3_min_max.csv',
 'sync_data_downsampling_anomal_3_min_max.csv',
 'sync_data_averaging_anomal_3_standard.csv',
 'sync_data_oversampling_anomal_3_standard.csv',
 'sync_data_downsampling_anomal_3_standard.csv']

In [0]:
# method = 'averaging'
# method = 'oversampling'
method = 'downsampling'

### Malicious samples

In [0]:
def get_anomaly_indices(path):
  mal_data = pd.read_csv(os.path.join(rltv_path, path)).as_matrix()
  malicious_labels = np.where(mal_data[:, -1]==1)[0]
  return malicious_labels

# Data shuffled with anomalies within (3 datasets)
mm2_mal = get_anomaly_indices('data/sync_data_{}_anomal_2_min_max.csv'.format(method))
mm3_mal = get_anomaly_indices('data/sync_data_{}_anomal_3_min_max.csv'.format(method))
std_mal = get_anomaly_indices('data/sync_data_{}_anomal_3_standard.csv'.format(method))

  


### Streaming data

In [0]:
# The clean samples, which is used for training our model 
clean_data = pd.read_csv(os.path.join(rltv_path, 'data/sync_data_averaging.csv')).as_matrix()

def get_streaming_data(path):
  mal_data = pd.read_csv(os.path.join(rltv_path, path)).as_matrix()
  streaming_data = np.concatenate([clean_data, mal_data[:,:-1]], axis=0)  
  return streaming_data

# Concatenation the benign with the the anomalies samples
mm2_strm = get_streaming_data('data/sync_data_{}_anomal_2_min_max.csv'.format(method))
mm3_strm = get_streaming_data('data/sync_data_{}_anomal_3_min_max.csv'.format(method))
std_strm = get_streaming_data('data/sync_data_{}_anomal_3_standard.csv'.format(method))

  """Entry point for launching an IPython kernel.
  after removing the cwd from sys.path.


### Train

In [0]:
def get_data_RMSEs(steaming_data):
  clean_data_len = clean_data.shape[0]
  
  data_samples = steaming_data.shape[0]
  data_features = steaming_data.shape[1]
  
  # Number of neurons in the encoding layers
  maxAE = 4
  
  # Samples used to train the features and the anomalies detector relatievly
  FMgrace = clean_data_len // 25
  ADgrace = clean_data_len - FMgrace
  
  # Initialize KitNET
  K = kit.KitNET(data_features, maxAE, FMgrace, ADgrace)

  # A place to save the RMSEs scores
  RMSEs = np.zeros(data_samples) 
  
  # Train during the grace periods, then execute on all the rest.
  for i in range(data_samples):     
    RMSEs[i] = K.process(steaming_data[i,]) 
  
  # Get the indices of the 1000 samples with the most RMSE score,
  # label them as anomalies
  return RMSEs[FMgrace + ADgrace:].argsort()[-1000:]

### Evaluate

In [0]:
def anomalies_detected(pred_indices, true_indices):
  return np.intersect1d(pred_indices, true_indices).shape

std_detected = anomalies_detected(get_data_RMSEs(std_strm), std_mal) 
mm2_detected = anomalies_detected(get_data_RMSEs(mm2_strm), mm2_mal) 
mm3_detected = anomalies_detected(get_data_RMSEs(mm3_strm), mm3_mal) 

Feature-Mapper: train-mode, Anomaly-Detector: off-mode
The Feature-Mapper found a mapping: 6 features to 3 autoencoders.
Feature-Mapper: execute-mode, Anomaly-Detector: train-mode
Feature-Mapper: execute-mode, Anomaly-Detector: exeute-mode
Feature-Mapper: train-mode, Anomaly-Detector: off-mode
The Feature-Mapper found a mapping: 6 features to 3 autoencoders.
Feature-Mapper: execute-mode, Anomaly-Detector: train-mode
Feature-Mapper: execute-mode, Anomaly-Detector: exeute-mode
Feature-Mapper: train-mode, Anomaly-Detector: off-mode
The Feature-Mapper found a mapping: 6 features to 3 autoencoders.
Feature-Mapper: execute-mode, Anomaly-Detector: train-mode
Feature-Mapper: execute-mode, Anomaly-Detector: exeute-mode


In [0]:
mm2_detected, mm3_detected, std_detected

((222,), (295,), (0,))