In [1]:
!pip install pyswarms scikit-learn pandas

Collecting pyswarms
  Downloading pyswarms-1.3.0-py2.py3-none-any.whl.metadata (33 kB)
Collecting future (from pyswarms)
  Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
Downloading pyswarms-1.3.0-py2.py3-none-any.whl (104 kB)
Downloading future-1.0.0-py3-none-any.whl (491 kB)
Installing collected packages: future, pyswarms

   ---------------------------------------- 0/2 [future]
   ---------------------------------------- 0/2 [future]
   ---------------------------------------- 0/2 [future]
   -------------------- ------------------- 1/2 [pyswarms]
   ---------------------------------------- 2/2 [pyswarms]

Successfully installed future-1.0.0 pyswarms-1.3.0


In [7]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load data
swat_df = pd.read_csv('SWaT.csv')

#The column "Timestamp" has a space infront of it and pandas is case sensitive. So, I'm just stripping off unwanted spaces
swat_df.columns = swat_df.columns.str.strip()

# Drop timestamp and label columns, replace as needed for your dataset structure
X = swat_df.drop(['Timestamp', 'Normal/Attack'], axis=1) 
y = swat_df['Normal/Attack']

# Normalize features
gen_scaler = MinMaxScaler()
X_scaled = gen_scaler.fit_transform(X)

# Optional: Drop constant columns
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
constant_cols = [c for c in X.columns if X[c].nunique() <= 1]
X_scaled = X_scaled.drop(constant_cols, axis=1)

In [8]:
print(X_scaled.shape)
print(X_scaled.head())

(449919, 44)
     FIT101    LIT101  MV101  P101  P102    AIT201    AIT202    AIT203  \
0  0.879322  0.452962    1.0   1.0   0.0  0.942783  0.876785  0.436773   
1  0.886285  0.453015    1.0   1.0   0.0  0.942783  0.876785  0.436773   
2  0.901833  0.452962    1.0   1.0   0.0  0.942783  0.876081  0.436773   
3  0.918195  0.453122    1.0   1.0   0.0  0.942783  0.876081  0.436773   
4  0.930842  0.453816    1.0   1.0   0.0  0.942783  0.876081  0.436773   

     FIT201  MV201  ...    FIT501    FIT502   FIT503    FIT504  P501  \
0  0.865044    1.0  ...  0.982970  0.940678  0.99154  0.987464   1.0   
1  0.865044    1.0  ...  0.982970  0.953861  0.99154  0.987464   1.0   
2  0.863956    1.0  ...  0.982751  0.951224  0.99154  0.990135   1.0   
3  0.863956    1.0  ...  0.982751  0.941808  0.99154  0.990135   1.0   
4  0.864228    1.0  ...  0.982751  0.941808  0.99154  0.990135   1.0   

     PIT501    PIT502    PIT503    FIT601  P602  
0  0.985802  0.837398  0.987359  0.000071   0.0  
1  0.9858

In [18]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

def pso_fitness(mask):
    fitness = []
    for particle in mask:
        cols = [i for i, bit in enumerate(particle) if bit]
        # Avoid empty selection
        if len(cols) == 0:
            fitness.append(0)
            continue
        clf = RandomForestClassifier(n_estimators=30, random_state=42)
        clf.fit(X_train[:, cols], y_train)
        preds = clf.predict(X_val[:, cols])
        fitness.append(f1_score(y_val, preds, average='weighted'))
    return -1 * np.array(fitness)  # negative f1 to minimize for PSO

In [20]:
import numpy as np
# Create a fake mask of 2 particles (try random masks of 0s and 1s, of length 44)
test_mask = np.random.randint(0, 2, (2, X_train.shape[1]))
print('Test mask shape:', test_mask.shape)
# Call the function
print(pso_fitness(test_mask))

Test mask shape: (2, 44)
[-0.99972215 -0.99968877]


In [14]:
print(pso_fitness(test_mask))

[-0.99972215 -0.99969989]


In [21]:
from pyswarms.discrete import BinaryPSO

n_features = X_scaled.shape[1]
options = {'c1': 1.5, 'c2': 1.5, 'w': 0.8, 'k': 2, 'p': 2}  # Add k and p
pso = BinaryPSO(n_particles=10, dimensions=n_features, options=options)
cost, pos = pso.optimize(pso_fitness, iters=10)

2025-11-14 13:57:21,885 - pyswarms.discrete.binary - INFO - Optimize for 10 iters with {'c1': 1.5, 'c2': 1.5, 'w': 0.8, 'k': 2, 'p': 2}
pyswarms.discrete.binary: 100%|██████████|10/10, best_cost=-1
2025-11-14 14:35:33,819 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: -0.9998333069884934, best pos: [0 0 1 0 1 1 1 0 1 1 1 1 0 1 0 0 1 0 0 1 0 1 0 1 1 1 1 1 0 0 1 1 1 1 1 1 0
 0 1 1 1 1 0 1]


In [22]:
selected_features = [col for col, use in zip(X_scaled.columns, pos) if use]
print("Selected features:", selected_features)

Selected features: ['MV101', 'P102', 'AIT201', 'AIT202', 'FIT201', 'MV201', 'P201', 'P203', 'P205', 'FIT301', 'MV302', 'MV304', 'AIT401', 'AIT402', 'FIT401', 'LIT401', 'P402', 'AIT501', 'AIT502', 'AIT503', 'AIT504', 'FIT501', 'FIT502', 'P501', 'PIT501', 'PIT502', 'PIT503', 'P602']
