In [1]:
############################
# 1. Imports and File Check
############################
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif, VarianceThreshold
from sklearn.ensemble import RandomForestClassifier
from reservoirpy import ESN

# Check available files and set correct filename
print("Files in current directory:", os.listdir('.'))
csv_name = "SWaT.csv"  # <-- Change if needed!

if not os.path.isfile(csv_name):
    raise FileNotFoundError(f"Could not find '{csv_name}'. Is the filename or path correct?")

###########################
# 2. Load & Preprocess Data
###########################
df = pd.read_csv(csv_name)
df.columns = df.columns.str.strip()
df['Normal/Attack'] = (df['Normal/Attack'] == 'Attack').astype(int)
X_raw = df.drop(['Timestamp', 'Normal/Attack'], axis=1)
y = df['Normal/Attack']

scaler = MinMaxScaler()
X_raw = pd.DataFrame(scaler.fit_transform(X_raw), columns=X_raw.columns)
X_train, X_test, y_train, y_test = train_test_split(
    X_raw, y, test_size=0.2, stratify=y, random_state=42
)
y_train_arr = np.array(y_train.values).reshape(-1, 1)

##############################
# 3. Feature Selection Methods
##############################

k = 10  # Number of features to select

# Correlation
corr_matrix = X_train.corrwith(y_train).abs()
top_corr = corr_matrix.nlargest(k).index
X_train_corr = X_train[top_corr]
X_test_corr = X_test[top_corr]

# Mutual Information
mi_selector = SelectKBest(mutual_info_classif, k=k)
mi_selector.fit(X_train, y_train)
mi_features = X_train.columns[mi_selector.get_support()]
X_train_mi = X_train[mi_features]
X_test_mi = X_test[mi_features]

# ANOVA F-score
f_selector = SelectKBest(f_classif, k=k)
f_selector.fit(X_train, y_train)
f_features = X_train.columns[f_selector.get_support()]
X_train_f = X_train[f_features]
X_test_f = X_test[f_features]

# Variance Threshold
threshold = 0.01
vt_selector = VarianceThreshold(threshold)
vt_selector.fit(X_train)
vt_features = X_train.columns[vt_selector.get_support()]
X_train_vt = X_train[vt_features]
X_test_vt = X_test[vt_features]
# For consistency, take only first k:
X_train_vt = X_train_vt.iloc[:, :k]
X_test_vt = X_test_vt.iloc[:, :k]

# Random Forest Importance
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
importances = rf.feature_importances_
indices = np.argsort(importances)[::-1][:k]
rf_features = X_train.columns[indices]
X_train_rf = X_train[rf_features]
X_test_rf = X_test[rf_features]

#####################
# 4. ESN Evaluation
#####################
def run_esn(X_train_sel, X_test_sel, y_train_arr, y_test, name):
    esn = ESN(units=10, sr=0.1, input_scaling=1.0, ridge=1e-6, seed=42)
    esn = esn.fit(X_train_sel.values, y_train_arr)
    y_pred_test = esn.run(X_test_sel.values)
    y_pred_test = (y_pred_test > 0.5).astype(int).ravel()
    acc = accuracy_score(y_test.values, y_pred_test)
    f1 = f1_score(y_test.values, y_pred_test, average='weighted')
    print(f"{name:20} | Accuracy: {acc:.4f} | F1: {f1:.4f}")
    return acc, f1

print('\nESN Results by Feature Selection Method:')
results_all = []
for features, X_tr, X_te, label in [
    (list(top_corr), X_train_corr, X_test_corr, "Correlation"),
    (list(mi_features), X_train_mi, X_test_mi, "Mutual Info"),
    (list(f_features), X_train_f, X_test_f, "ANOVA F"),
    (list(vt_features[:k]), X_train_vt, X_test_vt, "VarianceThres"),
    (list(rf_features), X_train_rf, X_test_rf, "RF Importance"),
]:
    acc, f1 = run_esn(X_tr, X_te, y_train_arr, y_test, label)
    results_all.append({'Method': label, 'Features': features, 'Accuracy': acc, 'F1': f1})

#########################
# 5. View as Summary Table
#########################
import pandas as pd
summary_df = pd.DataFrame(results_all)
display(summary_df)

Files in current directory: ['.ipynb_checkpoints', 'Correlation+ESN_bestvalues_resource_constraint.ipynb', 'Correlation-shuffle-value-try.ipynb', 'correlationONLYresults.csv', 'Correlation_ESN_with_custom_values.ipynb', 'ESN-PSO-Builtin.ipynb', 'otherfeaturemethods-toconfirmcorrelationworksornot.ipynb', 'particle-swarm-master', 'ParticleSwarm.py', 'ParticleSwarmUtility.py', 'PSO+individual+feature_testrun3.ipynb', 'PSO+individual+feature_testrun4.ipynb', 'PSO+individual+feature_testrun5.ipynb', 'PSO+individual_feature+testrun6_chisquare.ipynb', 'PSO+individual_feature+testrun7_RFE.ipynb', 'PSO+individual_feature+testrun8_RF_Feature importance+PSO.ipynb', 'PSO+individual_feature+testrun9_Correlation+PSO.ipynb', 'PSO_on_SWAT_feature.ipynb', 'report.log', 'SWAT+Correlation.ipynb', 'SWaT.csv', 'SWAT_Other_features_PSO_testrun2.ipynb', 'SWAT_other_feature_engg_PSO.ipynb', 'Testrun-summ.xlsx', '__pycache__']


  c /= stddev[:, None]
  c /= stddev[None, :]
  f = msb / msw



ESN Results by Feature Selection Method:
Correlation          | Accuracy: 0.9531 | F1: 0.9482
Mutual Info          | Accuracy: 0.9523 | F1: 0.9471
ANOVA F              | Accuracy: 0.9531 | F1: 0.9482
VarianceThres        | Accuracy: 0.8784 | F1: 0.8218
RF Importance        | Accuracy: 0.9531 | F1: 0.9482


Unnamed: 0,Method,Features,Accuracy,F1
0,Correlation,"[FIT401, FIT504, FIT503, UV401, P501, PIT501, ...",0.953092,0.948201
1,Mutual Info,"[AIT201, LIT301, AIT402, P402, UV401, AIT501, ...",0.952258,0.947123
2,ANOVA F,"[FIT401, P402, UV401, FIT501, FIT502, FIT503, ...",0.953092,0.948192
3,VarianceThres,"[FIT101, LIT101, MV101, P101, AIT201, AIT203, ...",0.878412,0.821802
4,RF Importance,"[PIT503, PIT502, PIT501, FIT401, FIT501, P501,...",0.953136,0.948247
