In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from reservoirpy import ESN

In [5]:
df = pd.read_csv('SWaT.csv')
df.columns = df.columns.str.strip()
df['Normal/Attack'] = (df['Normal/Attack'] == 'Attack').astype(int)
X_raw = df.drop(['Timestamp', 'Normal/Attack'], axis=1)
y = df['Normal/Attack']

scaler = MinMaxScaler()
X_raw = pd.DataFrame(scaler.fit_transform(X_raw), columns=X_raw.columns)
X_train, X_test, y_train, y_test = train_test_split(
    X_raw, y, test_size=0.2, stratify=y, random_state=42
)
print('done - preprocessing and split')

done - preprocessing and split


In [6]:
print('Running: Correlation-based feature selection ...')

corr_matrix = X_train.corrwith(y_train).abs()
top_10_corr = corr_matrix.nlargest(10).index
print('Top 10 correlated features:', list(top_10_corr))

X_train_corr = X_train[top_10_corr]
X_test_corr = X_test[top_10_corr]

Running: Correlation-based feature selection ...


  c /= stddev[:, None]
  c /= stddev[None, :]


Top 10 correlated features: ['FIT401', 'FIT504', 'FIT503', 'UV401', 'P501', 'PIT501', 'FIT501', 'PIT503', 'FIT502', 'P402']


In [7]:
# Remove constant columns
X_train_clean = X_train_corr.loc[:, X_train_corr.std() > 0]
X_test_clean = X_test_corr[X_train_clean.columns]

# Remove duplicate columns if any (often not needed with top correlation, but safe)
X_train_clean = X_train_clean.T.drop_duplicates().T
X_test_clean = X_test_clean[X_train_clean.columns]

In [8]:
y_train_arr = np.array(y_train.values).reshape(-1, 1)

In [9]:
reservoir_sizes = list(range(10, 60, 10))
spectral_radii = [0.1, 0.3, 0.5, 0.7, 0.9,1.0]
input_scalings = [0.01, 0.1, 0.5, 0.58, 1.0]

In [10]:
esn_results = []

for res_size in reservoir_sizes:
    for spec_rad in spectral_radii:
        for in_scale in input_scalings:
            print(f"Running ESN: Reservoir={res_size}, Spectral Radius={spec_rad}, Input Scaling={in_scale}")
            esn = ESN(
                units=res_size,
                sr=spec_rad,
                input_scaling=in_scale,
                ridge=1e-6,    # Regularization avoids singular matrix error!
                seed=42
            )
            esn = esn.fit(X_train_clean.values, y_train_arr)
            y_pred_test = esn.run(X_test_clean.values)
            y_pred_test = (y_pred_test > 0.5).astype(int).ravel()

            acc = accuracy_score(y_test.values, y_pred_test)
            f1 = f1_score(y_test.values, y_pred_test, average='weighted')
            print(f"\tAccuracy: {acc:.4f}  F1: {f1:.4f}")

            esn_results.append({
                'reservoir_size': res_size,
                'spectral_radius': spec_rad,
                'input_scaling': in_scale,
                'accuracy': acc,
                'f1': f1
            })

print('All ESN runs complete.')

best_run = max(esn_results, key=lambda x: x['f1'])
print(f"Best ESN F1: {best_run['f1']:.4f} with Reservoir={best_run['reservoir_size']}, Spectral Radius={best_run['spectral_radius']}, Input Scaling={best_run['input_scaling']}")

Running ESN: Reservoir=10, Spectral Radius=0.1, Input Scaling=0.01
	Accuracy: 0.9531  F1: 0.9482
Running ESN: Reservoir=10, Spectral Radius=0.1, Input Scaling=0.1
	Accuracy: 0.9531  F1: 0.9482
Running ESN: Reservoir=10, Spectral Radius=0.1, Input Scaling=0.5
	Accuracy: 0.9531  F1: 0.9482
Running ESN: Reservoir=10, Spectral Radius=0.1, Input Scaling=0.58
	Accuracy: 0.9531  F1: 0.9482
Running ESN: Reservoir=10, Spectral Radius=0.1, Input Scaling=1.0
	Accuracy: 0.9531  F1: 0.9482
Running ESN: Reservoir=10, Spectral Radius=0.29, Input Scaling=0.01
	Accuracy: 0.9531  F1: 0.9482
Running ESN: Reservoir=10, Spectral Radius=0.29, Input Scaling=0.1
	Accuracy: 0.9531  F1: 0.9482
Running ESN: Reservoir=10, Spectral Radius=0.29, Input Scaling=0.5
	Accuracy: 0.9523  F1: 0.9471
Running ESN: Reservoir=10, Spectral Radius=0.29, Input Scaling=0.58
	Accuracy: 0.9523  F1: 0.9471
Running ESN: Reservoir=10, Spectral Radius=0.29, Input Scaling=1.0
	Accuracy: 0.9523  F1: 0.9472
Running ESN: Reservoir=10, Spec