# SCADA Anomaly Detection

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [None]:
data_with_attacks = pd.read_csv('data/01_Lev_fault_Temp_corr_seed_11_vars_23.csv')

## Cleaning Data

* Fixing column names
* Reading Time as datetime

In [None]:
cols = list(data_with_attacks.columns)
cols.append(cols.pop(cols.index('ATTACK')))

In [None]:
data_with_attacks = data_with_attacks.loc[:,cols]

In [None]:
data = data_with_attacks[data_with_attacks.ATTACK == 0]

In [None]:
data.head(2)

In [None]:
data_with_attacks.head(2)

In [None]:
data.describe().T

In [None]:
data_with_attacks.describe().T

## Visualizations

### Data without attacks

* Plots saved in the plots folder

In [None]:
fig, a = plt.subplots(nrows = data.shape[1]-2, ncols = 1, figsize = (12, 7 * data.shape[1]), dpi = 200)
for i in range(1, data.shape[1]-1):
    data.iloc[:,[0,i]].plot(x = 'Time', y = data.columns[i], ax = a[i-1])
    a[i-1].grid()
    a[i-1].set_title(data.columns[i]) 
fig.autofmt_xdate() # make space for and rotate the x-axis tick labels
plt.suptitle('Data without attacks',fontsize=14)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig('plots/data_timeseries.pdf')
plt.close()

### Data with attacks

In [None]:
fig, a = plt.subplots(nrows = data_with_attacks.shape[1]-2, ncols = 1, 
                      figsize = (12, 7 * data_with_attacks.shape[1]), dpi = 200)
for i in range(1, data_with_attacks.shape[1]-1):
    data_with_attacks.iloc[:,[0,i]]\
    .plot(x = 'Time', y = data_with_attacks.columns[i], ax = a[i-1], color = 'blue', label = 'Normal')
    to_plot = data_with_attacks.iloc[:,[0,i]][data_with_attacks['ATTACK'] == 1]
    a[i-1].scatter(x = to_plot['Time'], y = to_plot[data_with_attacks.columns[i]], color = 'red', 
                   label = 'Attack', marker = 'x')
    a[i-1].grid()
    a[i-1].set_title(data.columns[i])
    a[i-1].legend(loc="upper left")
plt.suptitle('Data with attacks',fontsize=14)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig('plots/data_with_attack_timeseries.pdf')
plt.close()

## Bivariate Analysis

* Correlation Plot of all the continuous variables

In [None]:
plt.figure(figsize=(16, 9), dpi = 200)
corr_plot = data_with_attacks.drop(['Time'], axis = 1).corr()
mask = np.zeros_like(corr_plot, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr_plot,
            square=False, 
            linewidth=.1,
            vmin=-1,
            vmax=1,
            cmap='coolwarm',
            annot=True,
            mask = mask)
plt.show()

In [None]:
corr_plot['ATTACK'].sort_values()