<a href="https://colab.research.google.com/github/asheta66/Machine-Learning-2024/blob/main/ELM/PSO_FFNN_ELM_Prediction_Enhanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# PSO Optimization: FFNN vs ELM  
This notebook provides a **fair comparison** between:
- **PSO-FFNN**: Optimizes all weights & biases of a Feedforward Neural Network (1 hidden layer).  
- **PSO-ELM**: Optimizes input-to-hidden weights & biases, with output weights solved via ridge regression (Extreme Learning Machine).  

We train both models on the dataset `air_quality_o3.csv` where the **last column** is the target and all preceding columns are inputs.  

## Features:
- Automatic dataset loading and preprocessing  
- PSO optimization with logging  
- Metrics (MAE, RMSE, R², MAPE)  
- Figures:
  - Actual vs Estimated (Train/Test)  
  - PSO Convergence curves  


In [13]:

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

RANDOM_STATE = 123
np.random.seed(RANDOM_STATE)


In [14]:

# =========================
# Config
# =========================
CSV_PATH = "air_quality_o3.csv"   # dataset (last col = target)
TEST_SIZE = 0.2

HIDDEN_NEURONS = 64
ACTIVATION = "relu"   # "relu" | "tanh" | "sigmoid" | "linear"
ELM_RIDGE_ALPHA = 1e-2

# PSO hyperparameters
PSO_PARTICLES = 50
PSO_ITERS = 200
PSO_W, PSO_C1, PSO_C2 = 0.72, 1.49, 1.49
PSO_POS_BOUND, PSO_VEL_BOUND = 1.0, 0.3

FITNESS_USE_LOG = True  # If True, fitness = log(MSE), else raw MSE
EPS = 1e-12


In [15]:

# =========================
# Load and preprocess data
# =========================
df = pd.read_csv(CSV_PATH)
df.columns = [c.strip() for c in df.columns]

target_col = df.columns[-1]
feature_cols = df.columns[:-1]
print("Target:", target_col)
print("Features:", feature_cols.tolist())

X_raw = df[feature_cols].copy()
y_raw = df[target_col].astype(float).to_numpy()

X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(
    X_raw, y_raw, test_size=TEST_SIZE, random_state=RANDOM_STATE
)

# preprocess: impute + scale
x_pipe = Pipeline([("imputer", SimpleImputer(strategy="median")),
                   ("scaler", StandardScaler())])
X_train = x_pipe.fit_transform(X_train_raw)
X_test  = x_pipe.transform(X_test_raw)

y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(y_train_raw.reshape(-1,1)).ravel()
y_test  = y_scaler.transform(y_test_raw.reshape(-1,1)).ravel()

n_samples, n_features = X_train.shape
print("Train shape:", X_train.shape, "Test shape:", X_test.shape)


Target: O3
Features: ['AMP_TMP', 'CO', 'NO', 'NO2', 'Nox', 'RH', 'SO2', 'WD', 'WS', 'PM10']
Train shape: (876, 10) Test shape: (219, 10)


In [16]:

# =========================
# Utilities
# =========================
def activation_forward(Z, kind):
    if kind == "relu": return np.maximum(0.0, Z)
    if kind == "tanh": return np.tanh(Z)
    if kind == "sigmoid": return 1.0 / (1.0 + np.exp(-Z))
    if kind == "linear": return Z
    raise ValueError("Unknown activation")

def regression_report(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-8, None))) * 100
    return {"MAE": mae, "RMSE": rmse, "R2": r2, "MAPE_%": mape}

def fitness(val):
    return float(np.log(val + EPS)) if FITNESS_USE_LOG else float(val)


In [17]:

# =========================
# Parameter encoding
# =========================
def ffnn_param_size(d, H): return d*H + H + H + 1
def unpack_ffnn(theta, d, H):
    idx=0
    W1 = theta[idx:idx+d*H].reshape(d,H); idx+=d*H
    b1 = theta[idx:idx+H]; idx+=H
    W2 = theta[idx:idx+H].reshape(H,1); idx+=H
    b2 = theta[idx]
    return W1, b1, W2, b2
def ffnn_forward(X, theta, act, d, H):
    W1,b1,W2,b2 = unpack_ffnn(theta,d,H)
    Hx = activation_forward(X@W1+b1, act)
    return (Hx@W2+b2).ravel()

def elm_param_size(d,H): return d*H + H
def unpack_elm(theta,d,H):
    W1 = theta[:d*H].reshape(d,H)
    b1 = theta[d*H:d*H+H]
    return W1,b1
def elm_fit_predict(Xtr,ytr,Xev,theta,act,d,H,alpha):
    W1,b1 = unpack_elm(theta,d,H)
    Htr = activation_forward(Xtr@W1+b1, act)
    Htr_aug = np.hstack([Htr,np.ones((Htr.shape[0],1))])
    I=np.eye(Htr_aug.shape[1])
    beta=np.linalg.solve(Htr_aug.T@Htr_aug+alpha*I,Htr_aug.T@ytr)
    Hev=activation_forward(Xev@W1+b1,act)
    Hev_aug=np.hstack([Hev,np.ones((Hev.shape[0],1))])
    return (Hev_aug@beta).ravel()


In [18]:

# =========================
# PSO optimizer
# =========================
def pso_optimize(loss_fn, dim, swarm_size=PSO_PARTICLES, iters=PSO_ITERS,
                 w=PSO_W,c1=PSO_C1,c2=PSO_C2,
                 pos_bound=PSO_POS_BOUND, vel_bound=PSO_VEL_BOUND, seed=RANDOM_STATE):
    rng=np.random.default_rng(seed)
    Xp=rng.uniform(-pos_bound,pos_bound,(swarm_size,dim))
    V =rng.uniform(-vel_bound,vel_bound,(swarm_size,dim))
    pbest=Xp.copy()
    pval=np.array([loss_fn(x) for x in Xp])
    gidx=int(np.argmin(pval))
    gbest, gval=pbest[gidx].copy(),float(pval[gidx])
    hist=[gval]
    for it in range(iters):
        rp,rg=rng.random((swarm_size,dim)),rng.random((swarm_size,dim))
        V=w*V+c1*rp*(pbest-Xp)+c2*rg*(gbest-Xp)
        V=np.clip(V,-vel_bound,vel_bound)
        Xp=np.clip(Xp+V,-pos_bound,pos_bound)
        vals=np.array([loss_fn(x) for x in Xp])
        improved=vals<pval
        pbest[improved]=Xp[improved]; pval[improved]=vals[improved]
        gidx=int(np.argmin(pval))
        if pval[gidx]<gval:
            gval=float(pval[gidx]); gbest=pbest[gidx].copy()
        hist.append(gval)
    return gbest,gval,np.array(hist)


In [19]:

# =========================
# Define losses
# =========================
FFNN_DIM = ffnn_param_size(n_features,HIDDEN_NEURONS)
ELM_DIM  = elm_param_size(n_features,HIDDEN_NEURONS)

def ffnn_train_loss(theta):
    yhat=ffnn_forward(X_train,theta,ACTIVATION,n_features,HIDDEN_NEURONS)
    return fitness(mean_squared_error(y_train,yhat))

def elm_train_loss(theta):
    yhat=elm_fit_predict(X_train,y_train,X_train,theta,ACTIVATION,n_features,HIDDEN_NEURONS,ELM_RIDGE_ALPHA)
    return fitness(mean_squared_error(y_train,yhat))


In [20]:

print("Optimizing FFNN...")
best_ffnn,loss_ffnn,hist_ffnn=pso_optimize(ffnn_train_loss,FFNN_DIM)
print("Optimizing ELM...")
best_elm,loss_elm,hist_elm=pso_optimize(elm_train_loss,ELM_DIM)


Optimizing FFNN...
Optimizing ELM...


In [21]:

# =========================
# Predictions
# =========================
# FFNN
ytr_ffnn_s=ffnn_forward(X_train,best_ffnn,ACTIVATION,n_features,HIDDEN_NEURONS)
yte_ffnn_s=ffnn_forward(X_test,best_ffnn,ACTIVATION,n_features,HIDDEN_NEURONS)
ytr_ffnn=y_scaler.inverse_transform(ytr_ffnn_s.reshape(-1,1)).ravel()
yte_ffnn=y_scaler.inverse_transform(yte_ffnn_s.reshape(-1,1)).ravel()

# ELM
ytr_elm_s=elm_fit_predict(X_train,y_train,X_train,best_elm,ACTIVATION,n_features,HIDDEN_NEURONS,ELM_RIDGE_ALPHA)
yte_elm_s=elm_fit_predict(X_train,y_train,X_test,best_elm,ACTIVATION,n_features,HIDDEN_NEURONS,ELM_RIDGE_ALPHA)
ytr_elm=y_scaler.inverse_transform(ytr_elm_s.reshape(-1,1)).ravel()
yte_elm=y_scaler.inverse_transform(yte_elm_s.reshape(-1,1)).ravel()


In [22]:

import pandas as pd
ffnn_train=regression_report(y_train_raw,ytr_ffnn)
ffnn_test=regression_report(y_test_raw,yte_ffnn)
elm_train=regression_report(y_train_raw,ytr_elm)
elm_test=regression_report(y_test_raw,yte_elm)
metrics=pd.DataFrame({"FFNN-Train":ffnn_train,"FFNN-Test":ffnn_test,
                      "ELM-Train":elm_train,"ELM-Test":elm_test}).T.round(4)
metrics


Unnamed: 0,MAE,RMSE,R2,MAPE_%
FFNN-Train,4.3407,5.5753,0.7112,20.1903
FFNN-Test,4.9342,6.3762,0.6456,22.2623
ELM-Train,3.0835,3.8993,0.8587,13.7692
ELM-Test,4.304,5.5815,0.7284,19.1045


In [23]:

# =========================
# Figures: Actual vs Estimated
# =========================
def plot_actual_vs_est(ytr,ytr_pred,yte,yte_pred,name,path):
    fig,axes=plt.subplots(2,1,figsize=(12,5),sharey=True)
    axes[0].plot(ytr,label="Actual");axes[0].plot(ytr_pred,label="Estimated")
    axes[0].set_title(f"{name} Train");axes[0].grid(True);axes[0].legend()
    axes[1].plot(yte,label="Actual");axes[1].plot(yte_pred,label="Estimated")
    axes[1].set_title(f"{name} Test");axes[1].grid(True);axes[1].legend()
    plt.tight_layout();plt.savefig(path,dpi=150);plt.close(fig)
    print("Saved",path)

os.makedirs("figs",exist_ok=True)
plot_actual_vs_est(y_train_raw,ytr_ffnn,y_test_raw,yte_ffnn,"FFNN","figs/ffnn.png")
plot_actual_vs_est(y_train_raw,ytr_elm,y_test_raw,yte_elm,"ELM","figs/elm.png")


Saved figs/ffnn.png
Saved figs/elm.png


In [24]:

def plot_pso(hist_ffnn,hist_elm,path):
    import numpy as np
    fig,axes=plt.subplots(1,2,figsize=(12,4),sharey=True)
    h1,h2=np.asarray(hist_ffnn),np.asarray(hist_elm)
    ymax=max(h1.max(),h2.max());lo,hi=0,ymax*1.05
    axes[0].plot(h1);axes[0].set_title("FFNN");axes[0].set_ylim(lo,hi)
    axes[0].set_xlabel("Iter");axes[0].set_ylabel("Fitness")
    axes[1].plot(h2);axes[1].set_title("ELM");axes[1].set_ylim(lo,hi)
    axes[1].set_xlabel("Iter");axes[1].set_ylabel("Fitness")
    plt.suptitle("PSO Convergence");plt.tight_layout()
    plt.savefig(path,dpi=150);plt.close(fig)
    print("Saved",path)

plot_pso(hist_ffnn,hist_elm,"figs/convergence.png")


Saved figs/convergence.png
