In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

---

In [2]:
file_path = "WADI.A2_19 Nov 2019/WADI_14days_new.csv"
df = pd.read_csv(file_path)

In [3]:
df.head()

Unnamed: 0,Row,Date,Time,1_AIT_001_PV,1_AIT_002_PV,1_AIT_003_PV,1_AIT_004_PV,1_AIT_005_PV,1_FIT_001_PV,1_LS_001_AL,...,3_MV_001_STATUS,3_MV_002_STATUS,3_MV_003_STATUS,3_P_001_STATUS,3_P_002_STATUS,3_P_003_STATUS,3_P_004_STATUS,LEAK_DIFF_PRESSURE,PLANT_START_STOP_LOG,TOTAL_CONS_REQUIRED_FLOW
0,1,9/25/2017,00:00.0,171.155,0.619473,11.5759,504.645,0.318319,0.001157,0,...,1,1,1,1,1,1,1,67.9651,1,0.68
1,2,9/25/2017,00:01.0,171.155,0.619473,11.5759,504.645,0.318319,0.001157,0,...,1,1,1,1,1,1,1,67.9651,1,0.68
2,3,9/25/2017,00:02.0,171.155,0.619473,11.5759,504.645,0.318319,0.001157,0,...,1,1,1,1,1,1,1,67.9651,1,0.68
3,4,9/25/2017,00:03.0,171.155,0.607477,11.5725,504.673,0.318438,0.001207,0,...,1,1,1,1,1,1,1,67.1948,1,0.68
4,5,9/25/2017,00:04.0,171.155,0.607477,11.5725,504.673,0.318438,0.001207,0,...,1,1,1,1,1,1,1,67.1948,1,0.68


In [4]:
print(df.dtypes)
print(df.shape)

Row                           int64
Date                         object
Time                         object
1_AIT_001_PV                float64
1_AIT_002_PV                float64
1_AIT_003_PV                float64
1_AIT_004_PV                float64
1_AIT_005_PV                float64
1_FIT_001_PV                float64
1_LS_001_AL                   int64
1_LS_002_AL                   int64
1_LT_001_PV                 float64
1_MV_001_STATUS               int64
1_MV_002_STATUS               int64
1_MV_003_STATUS               int64
1_MV_004_STATUS               int64
1_P_001_STATUS                int64
1_P_002_STATUS                int64
1_P_003_STATUS                int64
1_P_004_STATUS                int64
1_P_005_STATUS                int64
1_P_006_STATUS                int64
2_DPIT_001_PV               float64
2_FIC_101_CO                float64
2_FIC_101_PV                float64
2_FIC_101_SP                float64
2_FIC_201_CO                float64
2_FIC_201_PV                

In [5]:
df.drop(["Row", "Date", "Time"], axis=1, inplace=True)
df.head()

Unnamed: 0,1_AIT_001_PV,1_AIT_002_PV,1_AIT_003_PV,1_AIT_004_PV,1_AIT_005_PV,1_FIT_001_PV,1_LS_001_AL,1_LS_002_AL,1_LT_001_PV,1_MV_001_STATUS,...,3_MV_001_STATUS,3_MV_002_STATUS,3_MV_003_STATUS,3_P_001_STATUS,3_P_002_STATUS,3_P_003_STATUS,3_P_004_STATUS,LEAK_DIFF_PRESSURE,PLANT_START_STOP_LOG,TOTAL_CONS_REQUIRED_FLOW
0,171.155,0.619473,11.5759,504.645,0.318319,0.001157,0,0,47.8911,1,...,1,1,1,1,1,1,1,67.9651,1,0.68
1,171.155,0.619473,11.5759,504.645,0.318319,0.001157,0,0,47.8911,1,...,1,1,1,1,1,1,1,67.9651,1,0.68
2,171.155,0.619473,11.5759,504.645,0.318319,0.001157,0,0,47.8911,1,...,1,1,1,1,1,1,1,67.9651,1,0.68
3,171.155,0.607477,11.5725,504.673,0.318438,0.001207,0,0,47.7503,1,...,1,1,1,1,1,1,1,67.1948,1,0.68
4,171.155,0.607477,11.5725,504.673,0.318438,0.001207,0,0,47.7503,1,...,1,1,1,1,1,1,1,67.1948,1,0.68


In [6]:
print(df.shape)
df.isnull().sum()

(784571, 127)


1_AIT_001_PV                     0
1_AIT_002_PV                    12
1_AIT_003_PV                     0
1_AIT_004_PV                     6
1_AIT_005_PV                     0
1_FIT_001_PV                     0
1_LS_001_AL                      0
1_LS_002_AL                      0
1_LT_001_PV                      0
1_MV_001_STATUS                  0
1_MV_002_STATUS                  0
1_MV_003_STATUS                  0
1_MV_004_STATUS                  0
1_P_001_STATUS                   0
1_P_002_STATUS                   0
1_P_003_STATUS                   0
1_P_004_STATUS                   0
1_P_005_STATUS                   0
1_P_006_STATUS                   0
2_DPIT_001_PV                    0
2_FIC_101_CO                     0
2_FIC_101_PV                     0
2_FIC_101_SP                     0
2_FIC_201_CO                     0
2_FIC_201_PV                     0
2_FIC_201_SP                     0
2_FIC_301_CO                     0
2_FIC_301_PV                     0
2_FIC_301_SP        

In [7]:
df.dropna(thresh=df.shape[0]*0.6, axis=1, inplace=True)

print(df.shape)
df.isnull().sum()

(784571, 123)


1_AIT_001_PV                 0
1_AIT_002_PV                12
1_AIT_003_PV                 0
1_AIT_004_PV                 6
1_AIT_005_PV                 0
1_FIT_001_PV                 0
1_LS_001_AL                  0
1_LS_002_AL                  0
1_LT_001_PV                  0
1_MV_001_STATUS              0
1_MV_002_STATUS              0
1_MV_003_STATUS              0
1_MV_004_STATUS              0
1_P_001_STATUS               0
1_P_002_STATUS               0
1_P_003_STATUS               0
1_P_004_STATUS               0
1_P_005_STATUS               0
1_P_006_STATUS               0
2_DPIT_001_PV                0
2_FIC_101_CO                 0
2_FIC_101_PV                 0
2_FIC_101_SP                 0
2_FIC_201_CO                 0
2_FIC_201_PV                 0
2_FIC_201_SP                 0
2_FIC_301_CO                 0
2_FIC_301_PV                 0
2_FIC_301_SP                 0
2_FIC_401_CO                 0
2_FIC_401_PV                 0
2_FIC_401_SP                 0
2_FIC_50

In [8]:
df.dropna(inplace=True)

print(df.shape)
df.isnull().sum()

(784537, 123)


1_AIT_001_PV                0
1_AIT_002_PV                0
1_AIT_003_PV                0
1_AIT_004_PV                0
1_AIT_005_PV                0
1_FIT_001_PV                0
1_LS_001_AL                 0
1_LS_002_AL                 0
1_LT_001_PV                 0
1_MV_001_STATUS             0
1_MV_002_STATUS             0
1_MV_003_STATUS             0
1_MV_004_STATUS             0
1_P_001_STATUS              0
1_P_002_STATUS              0
1_P_003_STATUS              0
1_P_004_STATUS              0
1_P_005_STATUS              0
1_P_006_STATUS              0
2_DPIT_001_PV               0
2_FIC_101_CO                0
2_FIC_101_PV                0
2_FIC_101_SP                0
2_FIC_201_CO                0
2_FIC_201_PV                0
2_FIC_201_SP                0
2_FIC_301_CO                0
2_FIC_301_PV                0
2_FIC_301_SP                0
2_FIC_401_CO                0
2_FIC_401_PV                0
2_FIC_401_SP                0
2_FIC_501_CO                0
2_FIC_501_

In [9]:
def get_highly_correlated_features(correlation_matrix, threshold):
  correlated_pairs = []
  for i in range(len(correlation_matrix.columns)):
    for j in range(i):
      if abs(correlation_matrix.iloc[i, j]) > threshold:
        pair = (correlation_matrix.columns[i], correlation_matrix.columns[j])
        coefficient = correlation_matrix.iloc[i, j]
        correlated_pairs.append((pair, coefficient))
  return sorted(correlated_pairs, key= lambda pair: pair[1], reverse=True)

In [10]:
corr_matrix = df.corr().abs()
correlation_list = get_highly_correlated_features(corr_matrix, 0.95)

correlation_list[:10]

[(('1_P_003_STATUS', '1_P_001_STATUS'), np.float64(1.0)),
 (('2_FQ_501_PV', '2_FIC_501_PV'), np.float64(0.9994612556122823)),
 (('2_FQ_201_PV', '2_FIC_201_PV'), np.float64(0.9992881349503935)),
 (('1_P_001_STATUS', '1_FIT_001_PV'), np.float64(0.9992508314216384)),
 (('1_P_003_STATUS', '1_FIT_001_PV'), np.float64(0.9992508314216384)),
 (('2_FQ_301_PV', '2_FIC_301_PV'), np.float64(0.9992261450611793)),
 (('2_FQ_601_PV', '2_FIC_601_PV'), np.float64(0.9991457587647133)),
 (('2_PIT_003_PV', '2_PIC_003_PV'), np.float64(0.9990773256818822)),
 (('2_FQ_401_PV', '2_FIC_401_PV'), np.float64(0.999067708119473)),
 (('2_FQ_101_PV', '2_FIC_101_PV'), np.float64(0.9989998446430937))]

In [11]:
f2drop = []
for feature_pair, _ in correlation_list:
  if feature_pair[0] not in f2drop and feature_pair[1] not in f2drop:
    f2drop.append(feature_pair[1])

f2drop

['1_P_001_STATUS',
 '2_FIC_501_PV',
 '2_FIC_201_PV',
 '1_FIT_001_PV',
 '2_FIC_301_PV',
 '2_FIC_601_PV',
 '2_PIC_003_PV',
 '2_FIC_401_PV',
 '2_FIC_101_PV',
 '1_MV_001_STATUS',
 '2_DPIT_001_PV',
 '2_P_003_SPEED',
 '1_P_005_STATUS',
 '1_AIT_001_PV',
 '2A_AIT_001_PV',
 '2_MV_006_STATUS',
 '2_FIC_301_CO',
 '2_FIC_601_CO']

In [12]:
df.drop(f2drop, axis=1, inplace=True)
print(df.shape)
df.head()

(784537, 105)


Unnamed: 0,1_AIT_002_PV,1_AIT_003_PV,1_AIT_004_PV,1_AIT_005_PV,1_LS_001_AL,1_LS_002_AL,1_LT_001_PV,1_MV_002_STATUS,1_MV_003_STATUS,1_MV_004_STATUS,...,3_MV_001_STATUS,3_MV_002_STATUS,3_MV_003_STATUS,3_P_001_STATUS,3_P_002_STATUS,3_P_003_STATUS,3_P_004_STATUS,LEAK_DIFF_PRESSURE,PLANT_START_STOP_LOG,TOTAL_CONS_REQUIRED_FLOW
0,0.619473,11.5759,504.645,0.318319,0,0,47.8911,1,1,1,...,1,1,1,1,1,1,1,67.9651,1,0.68
1,0.619473,11.5759,504.645,0.318319,0,0,47.8911,1,1,1,...,1,1,1,1,1,1,1,67.9651,1,0.68
2,0.619473,11.5759,504.645,0.318319,0,0,47.8911,1,1,1,...,1,1,1,1,1,1,1,67.9651,1,0.68
3,0.607477,11.5725,504.673,0.318438,0,0,47.7503,1,1,1,...,1,1,1,1,1,1,1,67.1948,1,0.68
4,0.607477,11.5725,504.673,0.318438,0,0,47.7503,1,1,1,...,1,1,1,1,1,1,1,67.1948,1,0.68


In [13]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)

np.save("wadi_train.npy", scaled_data)

---

In [14]:
attack_file_path = "WADI.A2_19 Nov 2019/WADI_attackdataLABLE.csv"
df_attack = pd.read_csv(attack_file_path)

In [15]:
print(df_attack.shape)
df_attack.head()

(172803, 131)


Unnamed: 0,Row,Date,Time,1_AIT_001_PV,1_AIT_002_PV,1_AIT_003_PV,1_AIT_004_PV,1_AIT_005_PV,1_FIT_001_PV,1_LS_001_AL,...,3_MV_002_STATUS,3_MV_003_STATUS,3_P_001_STATUS,3_P_002_STATUS,3_P_003_STATUS,3_P_004_STATUS,LEAK_DIFF_PRESSURE,PLANT_START_STOP_LOG,TOTAL_CONS_REQUIRED_FLOW,"Attack LABLE (1:No Attack, -1:Attack)"
0,1.0,10/9/17,00:00.0,164.21,0.529486,11.9972,482.48,0.331167,0.001273,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1
1,2.0,10/9/17,00:01.0,164.21,0.529486,11.9972,482.48,0.331167,0.001273,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1
2,3.0,10/9/17,00:02.0,164.21,0.529486,11.9972,482.48,0.331167,0.001273,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1
3,4.0,10/9/17,00:03.0,164.21,0.529486,11.9972,482.48,0.331167,0.001273,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1
4,5.0,10/9/17,00:04.0,164.21,0.529486,11.9972,482.48,0.331167,0.001273,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1


In [16]:
df_attack.drop(["Row", "Date", "Time"], axis=1, inplace=True)

print(df_attack.shape)
df_attack.isnull().sum()

(172803, 128)


1_AIT_001_PV                                  2
1_AIT_002_PV                                  2
1_AIT_003_PV                                  2
1_AIT_004_PV                                  2
1_AIT_005_PV                                  2
1_FIT_001_PV                                  2
1_LS_001_AL                                   2
1_LS_002_AL                                   2
1_LT_001_PV                                   2
1_MV_001_STATUS                               2
1_MV_002_STATUS                               2
1_MV_003_STATUS                               2
1_MV_004_STATUS                               2
1_P_001_STATUS                                2
1_P_002_STATUS                                2
1_P_003_STATUS                                2
1_P_004_STATUS                                2
1_P_005_STATUS                                2
1_P_006_STATUS                                2
2_DPIT_001_PV                                 2
2_FIC_101_CO                            

In [17]:
df_attack.dropna(thresh=df_attack.shape[0]*0.6, axis=1, inplace=True)

print(df_attack.shape)
df_attack.isnull().sum()

(172803, 124)


1_AIT_001_PV                             2
1_AIT_002_PV                             2
1_AIT_003_PV                             2
1_AIT_004_PV                             2
1_AIT_005_PV                             2
1_FIT_001_PV                             2
1_LS_001_AL                              2
1_LS_002_AL                              2
1_LT_001_PV                              2
1_MV_001_STATUS                          2
1_MV_002_STATUS                          2
1_MV_003_STATUS                          2
1_MV_004_STATUS                          2
1_P_001_STATUS                           2
1_P_002_STATUS                           2
1_P_003_STATUS                           2
1_P_004_STATUS                           2
1_P_005_STATUS                           2
1_P_006_STATUS                           2
2_DPIT_001_PV                            2
2_FIC_101_CO                             2
2_FIC_101_PV                             2
2_FIC_101_SP                             2
2_FIC_201_C

In [18]:
df_attack.dropna(inplace=True)

print(df_attack.shape)
df_attack.isnull().sum()

(172801, 124)


1_AIT_001_PV                             0
1_AIT_002_PV                             0
1_AIT_003_PV                             0
1_AIT_004_PV                             0
1_AIT_005_PV                             0
1_FIT_001_PV                             0
1_LS_001_AL                              0
1_LS_002_AL                              0
1_LT_001_PV                              0
1_MV_001_STATUS                          0
1_MV_002_STATUS                          0
1_MV_003_STATUS                          0
1_MV_004_STATUS                          0
1_P_001_STATUS                           0
1_P_002_STATUS                           0
1_P_003_STATUS                           0
1_P_004_STATUS                           0
1_P_005_STATUS                           0
1_P_006_STATUS                           0
2_DPIT_001_PV                            0
2_FIC_101_CO                             0
2_FIC_101_PV                             0
2_FIC_101_SP                             0
2_FIC_201_C

In [19]:
# change the label of the attack to 0 and normal to 1
df_attack.iloc[:, -1] = df_attack.iloc[:, -1].apply(lambda x: 0 if x == -1 else 1)

df_attack.sample(5)

Unnamed: 0,1_AIT_001_PV,1_AIT_002_PV,1_AIT_003_PV,1_AIT_004_PV,1_AIT_005_PV,1_FIT_001_PV,1_LS_001_AL,1_LS_002_AL,1_LT_001_PV,1_MV_001_STATUS,...,3_MV_002_STATUS,3_MV_003_STATUS,3_P_001_STATUS,3_P_002_STATUS,3_P_003_STATUS,3_P_004_STATUS,LEAK_DIFF_PRESSURE,PLANT_START_STOP_LOG,TOTAL_CONS_REQUIRED_FLOW,"Attack LABLE (1:No Attack, -1:Attack)"
162060,176.346,0.571482,11.9256,477.597,0.331401,0.001191,0.0,0.0,45.9107,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,59.8531,1.0,0.25,1
128512,179.487,0.679466,11.8537,473.696,0.325057,0.00107,0.0,0.0,47.243,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,59.5892,1.0,0.26,1
31510,170.983,0.631472,11.9491,435.204,0.234879,0.001161,0.0,0.0,66.79,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,64.2029,1.0,0.69,1
99150,181.312,0.697464,11.9192,435.877,0.234838,1.91272,0.0,0.0,55.3337,2.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.975,1.0,0.64,1
75726,180.383,0.649469,12.0327,440.154,0.228129,1.93495,0.0,0.0,39.4927,2.0,...,1.0,1.0,1.0,1.0,1.0,1.0,61.123,1.0,0.71,1


In [20]:
df_attack.drop(f2drop, axis=1, inplace=True)
print(df_attack.shape)
df_attack.head()

(172801, 106)


Unnamed: 0,1_AIT_002_PV,1_AIT_003_PV,1_AIT_004_PV,1_AIT_005_PV,1_LS_001_AL,1_LS_002_AL,1_LT_001_PV,1_MV_002_STATUS,1_MV_003_STATUS,1_MV_004_STATUS,...,3_MV_002_STATUS,3_MV_003_STATUS,3_P_001_STATUS,3_P_002_STATUS,3_P_003_STATUS,3_P_004_STATUS,LEAK_DIFF_PRESSURE,PLANT_START_STOP_LOG,TOTAL_CONS_REQUIRED_FLOW,"Attack LABLE (1:No Attack, -1:Attack)"
0,0.529486,11.9972,482.48,0.331167,0.0,0.0,48.482,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1
1,0.529486,11.9972,482.48,0.331167,0.0,0.0,48.482,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1
2,0.529486,11.9972,482.48,0.331167,0.0,0.0,48.482,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1
3,0.529486,11.9972,482.48,0.331167,0.0,0.0,48.482,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1
4,0.529486,11.9972,482.48,0.331167,0.0,0.0,48.482,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,62.6226,1.0,0.39,1


In [21]:
df_features = df_attack.drop(columns=['Attack LABLE (1:No Attack, -1:Attack)'])
df_labels = df_attack['Attack LABLE (1:No Attack, -1:Attack)']
scaled_data = scaler.transform(df_features)

df_features = pd.DataFrame(scaled_data, columns=df_features.columns)
attack_data = pd.concat([df_features, df_labels], axis=1)

attack_data.head()

Unnamed: 0,1_AIT_002_PV,1_AIT_003_PV,1_AIT_004_PV,1_AIT_005_PV,1_LS_001_AL,1_LS_002_AL,1_LT_001_PV,1_MV_002_STATUS,1_MV_003_STATUS,1_MV_004_STATUS,...,3_MV_002_STATUS,3_MV_003_STATUS,3_P_001_STATUS,3_P_002_STATUS,3_P_003_STATUS,3_P_004_STATUS,LEAK_DIFF_PRESSURE,PLANT_START_STOP_LOG,TOTAL_CONS_REQUIRED_FLOW,"Attack LABLE (1:No Attack, -1:Attack)"
0,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1
1,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1
2,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1
3,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1
4,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1


In [22]:
df_labels.value_counts()

Attack LABLE (1:No Attack, -1:Attack)
1    162824
0      9977
Name: count, dtype: int64

In [23]:
attack_data = attack_data.reset_index(drop=True)  # Ensure rows are 0-indexed
attack_data["order"] = np.arange(len(attack_data))  # Add order column

# Identify anomaly indices
anomaly_indices = attack_data.index[attack_data['Attack LABLE (1:No Attack, -1:Attack)'] == 0].tolist()

# Define buffer size around anomalies (e.g., keep 2 samples before/after each anomaly)
buffer = 2
buffer_indices = []
for idx in anomaly_indices:
    buffer_indices.extend(range(idx - buffer, idx + buffer + 1))

# Remove invalid indices (e.g., negative or out-of-bound)
buffer_indices = [x for x in buffer_indices if x >= 0 and x < len(attack_data)]
buffer_indices = list(set(buffer_indices))  # Deduplicate

# Split data into components
anomalies = attack_data[attack_data['Attack LABLE (1:No Attack, -1:Attack)'] == 0]
buffer_points = attack_data[attack_data.index.isin(buffer_indices) & ~(attack_data['Attack LABLE (1:No Attack, -1:Attack)'] == 0)]  # Exclude anomalies
normal_points = attack_data[~attack_data.index.isin(buffer_indices) & ~(attack_data['Attack LABLE (1:No Attack, -1:Attack)'] == 0)]

# Downsample normal data (adjust `downsample_factor` as needed)
downsample_factor = 10  # Keeps ~1/10th of normal points
normal_downsampled = normal_points.iloc[::downsample_factor, :]

# Combine and sort
combined = pd.concat([anomalies, buffer_points, normal_downsampled])
downsampled_attack_data = combined.sort_values("order").drop(columns=["order"])  # Restore original order

# Verify
print(f"Original size: {len(attack_data)} | Downsampled size: {len(downsampled_attack_data)}")

downsampled_attack_data['Attack LABLE (1:No Attack, -1:Attack)'].value_counts()

Original size: 172801 | Downsampled size: 26310


Attack LABLE (1:No Attack, -1:Attack)
1    16333
0     9977
Name: count, dtype: int64

In [24]:
attack_data['Attack LABLE (1:No Attack, -1:Attack)'].value_counts()

Attack LABLE (1:No Attack, -1:Attack)
1    162824
0      9977
Name: count, dtype: int64

In [25]:
max_values = downsampled_attack_data.max()
min_values = downsampled_attack_data.min()

max_values

1_AIT_002_PV                               2.913583
1_AIT_003_PV                               1.008033
1_AIT_004_PV                               0.920870
1_AIT_005_PV                               0.669821
1_LS_001_AL                                0.000000
1_LS_002_AL                                0.000000
1_LT_001_PV                                1.109644
1_MV_002_STATUS                            1.000000
1_MV_003_STATUS                            1.000000
1_MV_004_STATUS                            1.000000
1_P_002_STATUS                             0.000000
1_P_003_STATUS                             1.000000
1_P_004_STATUS                             0.000000
1_P_006_STATUS                             1.000000
2_FIC_101_CO                               1.000000
2_FIC_101_SP                               0.976190
2_FIC_201_CO                               1.000000
2_FIC_201_SP                               0.837209
2_FIC_301_SP                               0.953488
2_FIC_401_CO

In [27]:
attack_data.drop(columns=['order'], inplace=True)
attack_data.head()

Unnamed: 0,1_AIT_002_PV,1_AIT_003_PV,1_AIT_004_PV,1_AIT_005_PV,1_LS_001_AL,1_LS_002_AL,1_LT_001_PV,1_MV_002_STATUS,1_MV_003_STATUS,1_MV_004_STATUS,...,3_MV_002_STATUS,3_MV_003_STATUS,3_P_001_STATUS,3_P_002_STATUS,3_P_003_STATUS,3_P_004_STATUS,LEAK_DIFF_PRESSURE,PLANT_START_STOP_LOG,TOTAL_CONS_REQUIRED_FLOW,"Attack LABLE (1:No Attack, -1:Attack)"
0,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1
1,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1
2,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1
3,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1
4,0.257117,0.99866,0.916341,0.575661,0.0,0.0,0.294758,0.0,0.0,0.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.169052,0.0,0.172566,1


In [28]:
attack_data = attack_data.to_numpy()
np.save("wadi_attack.npy", attack_data)

In [None]:
# downsampled_attack_data = downsampled_attack_data.to_numpy()
# np.save("wadi_attack.npy", downsampled_attack_data)