In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

df = pd.read_csv('xAPI-Edu-Data.csv') 

target_map = {'L': 0, 'M': 1, 'H': 2}
df['Class'] = df['Class'].map(target_map)

le = LabelEncoder()
binary_cols = ['Gender', 'Semester', 'ParentschoolSatisfaction', 'StudentAbsenceDays', 'Relation', 'ParentAnsweringSurvey'] 
for col in binary_cols:
    if col in df.columns:
        df[col] = le.fit_transform(df[col])

df = pd.get_dummies(df, drop_first=True)

X = df.drop('Class', axis=1)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Preprocessing complete. X_train shape:", X_train_scaled.shape)

Preprocessing complete. X_train shape: (384, 60)


In [None]:
import numpy as np
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

from ASHO import ASHOFeatureSelector
from ESNN import EvolutionarySNN

print("Training SVM...")
svm_model = SVC(kernel='rbf', probability=True, random_state=42)
svm_model.fit(X_train_scaled, y_train)


print("Running ASHO Feature Selection for XGBoost...")
xgb_base = XGBClassifier(eval_metric='mlogloss', random_state=42)

asho_selector = ASHOFeatureSelector(
    classifier=xgb_base, 
    n_agents=10, 
    max_iter=5
)

asho_selector.fit(X_train_scaled, y_train)

X_train_asho = asho_selector.transform(X_train_scaled)
X_test_asho = asho_selector.transform(X_test_scaled)

print(f"ASHO selected {X_train_asho.shape[1]} features out of {X_train_scaled.shape[1]}.")

xgb_final = XGBClassifier(eval_metric='mlogloss', random_state=42)
xgb_final.fit(X_train_asho, y_train)

print("Training Evolutionary SNN...")

n_features = X_train_scaled.shape[1]
n_classes = len(np.unique(y_train))

# Initialize ESNN
esnn_model = EvolutionarySNN(
    n_inputs=n_features,
    n_outputs=n_classes,
    pop_size=10,
    generations=10,
    device='cpu'
)

esnn_model.fit(X_train_scaled, y_train)

print("Calculating Ensemble Predictions...")

prob_svm = svm_model.predict_proba(X_test_scaled)
prob_xgb = xgb_final.predict_proba(X_test_asho) 
prob_snn = esnn_model.predict_proba(X_test_scaled)

pred_svm = np.argmax(prob_svm, axis=1)
pred_xgb = np.argmax(prob_xgb, axis=1)
pred_snn = np.argmax(prob_snn, axis=1)

acc_svm = accuracy_score(y_test, pred_svm)
acc_xgb = accuracy_score(y_test, pred_xgb)
acc_snn = accuracy_score(y_test, pred_snn)

print(f"\nIndividual Model Performance:")
print(f"SVM Accuracy:      {acc_svm:.4f}")
print(f"XGBoost Accuracy:  {acc_xgb:.4f} (using {X_test_asho.shape[1]} features)")
print(f"ESNN Accuracy:     {acc_snn:.4f}")

avg_probs = (prob_svm + prob_xgb + prob_snn) / 3
y_pred_ensemble = np.argmax(avg_probs, axis=1)

accuracy = accuracy_score(y_test, y_pred_ensemble)
print(f"--------------------------------------------------")
print(f"Final Ensemble Accuracy: {accuracy:.4f}")
print(f"--------------------------------------------------")

Training SVM...
Running ASHO Feature Selection for XGBoost...
ASHO Iteration 1/5 - Best Fitness: 0.7188
ASHO Iteration 2/5 - Best Fitness: 0.7266
ASHO Iteration 3/5 - Best Fitness: 0.7266
ASHO Iteration 4/5 - Best Fitness: 0.7266
ASHO Iteration 5/5 - Best Fitness: 0.7266
ASHO selected 32 features out of 60.
Training Evolutionary SNN...
Initialized ESNN Population: 10 agents, 4099 parameters each.
ESNN Gen 1/10 - Best Acc: 0.4089
ESNN Gen 2/10 - Best Acc: 0.4245
ESNN Gen 3/10 - Best Acc: 0.4271
ESNN Gen 4/10 - Best Acc: 0.4297
ESNN Gen 5/10 - Best Acc: 0.4427
ESNN Gen 6/10 - Best Acc: 0.4427
ESNN Gen 7/10 - Best Acc: 0.4427
ESNN Gen 8/10 - Best Acc: 0.4635
ESNN Gen 9/10 - Best Acc: 0.4661
ESNN Gen 10/10 - Best Acc: 0.4661
Calculating Ensemble Predictions...

Individual Model Performance:
SVM Accuracy:      0.6875
XGBoost Accuracy:  0.7188 (using 32 features)
ESNN Accuracy:     0.5208
--------------------------------------------------
Final Ensemble Accuracy: 0.7083
---------------------

In [None]:
from sklearn.linear_model import LogisticRegression

print("Generating meta-features for Stacking...")

prob_train_svm = svm_model.predict_proba(X_train_scaled)
prob_train_xgb = xgb_final.predict_proba(X_train_asho)
prob_train_snn = esnn_model.predict_proba(X_train_scaled)

X_train_stack = np.hstack([prob_train_svm, prob_train_xgb, prob_train_snn])

prob_test_svm = svm_model.predict_proba(X_test_scaled)
prob_test_xgb = xgb_final.predict_proba(X_test_asho)
prob_test_snn = esnn_model.predict_proba(X_test_scaled)

X_test_stack = np.hstack([prob_test_svm, prob_test_xgb, prob_test_snn])

print("Training Meta-Learner (Logistic Regression)...")
meta_learner = LogisticRegression(random_state=42)
meta_learner.fit(X_train_stack, y_train)

y_pred_stacking = meta_learner.predict(X_test_stack)
stacking_accuracy = accuracy_score(y_test, y_pred_stacking)

print("SCENARIO 2 RESULTS: Soft Voting vs Stacking")
print(f"Soft Voting Accuracy: {accuracy:.4f}")
print(f"Stacking Accuracy:    {stacking_accuracy:.4f}")


Generating meta-features for Stacking...
Training Meta-Learner (Logistic Regression)...
SCENARIO 2 RESULTS: Soft Voting vs Stacking
Soft Voting Accuracy: 0.7083
Stacking Accuracy:    0.7396


In [None]:
from sklearn.decomposition import FastICA
from sklearn.metrics import mean_squared_error
import math

xgb_fixed = XGBClassifier(eval_metric='mlogloss', random_state=42)

print("="*60)
print("SCENARIO 3: Analysis of Feature Engineering Impact")
print("="*60)

print("\n[Case A] Training Baseline (All 16 Features)...")

xgb_fixed.fit(X_train_scaled, y_train)

pred_a = xgb_fixed.predict(X_test_scaled)

acc_a = accuracy_score(y_test, pred_a)
rmse_a = math.sqrt(mean_squared_error(y_test, pred_a))
dim_a = X_train_scaled.shape[1]

print(f"   -> Accuracy: {acc_a:.4f} | RMSE: {rmse_a:.4f} | Features: {dim_a}")

print("\n[Case B] Training with ICA (Feature Extraction)...")

ica = FastICA(n_components=10, random_state=42, whiten='unit-variance')

X_train_ica = ica.fit_transform(X_train_scaled)
X_test_ica = ica.transform(X_test_scaled)

xgb_fixed.fit(X_train_ica, y_train)

pred_b = xgb_fixed.predict(X_test_ica)

acc_b = accuracy_score(y_test, pred_b)
rmse_b = math.sqrt(mean_squared_error(y_test, pred_b))
dim_b = X_train_ica.shape[1]

print(f"   -> Accuracy: {acc_b:.4f} | RMSE: {rmse_b:.4f} | Components: {dim_b}")

print("\n[Case C] Training with ASHO (Feature Selection)...")

asho_scenario3 = ASHOFeatureSelector(
    classifier=XGBClassifier(eval_metric='mlogloss', random_state=42),
    n_agents=10,
    max_iter=10 
)

asho_scenario3.fit(X_train_scaled, y_train)

X_train_asho_s3 = asho_scenario3.transform(X_train_scaled)
X_test_asho_s3 = asho_scenario3.transform(X_test_scaled)

xgb_fixed.fit(X_train_asho_s3, y_train)

pred_c = xgb_fixed.predict(X_test_asho_s3)

acc_c = accuracy_score(y_test, pred_c)
rmse_c = math.sqrt(mean_squared_error(y_test, pred_c))
dim_c = X_train_asho_s3.shape[1]

print(f"   -> Accuracy: {acc_c:.4f} | RMSE: {rmse_c:.4f} | Selected Features: {dim_c}")

print("\n" + "="*60)
print(f"{'Method':<20} | {'Dimensions':<10} | {'Accuracy':<10} | {'RMSE':<10}")
print("-" * 60)
print(f"{'Baseline (All)':<20} | {dim_a:<10} | {acc_a:.4f}     | {rmse_a:.4f}")
print(f"{'Extraction (ICA)':<20} | {dim_b:<10} | {acc_b:.4f}     | {rmse_b:.4f}")
print(f"{'Selection (ASHO)':<20} | {dim_c:<10} | {acc_c:.4f}     | {rmse_c:.4f}")
print("="*60)

if acc_c > acc_b and acc_c > acc_a:
    print("Conclusion: ASHO (Selection) provided the best performance.")
elif acc_b > acc_c:
    print("Conclusion: ICA (Extraction) outperformed ASHO.")
else:
    print("Conclusion: Baseline (All Features) performed best.")

SCENARIO 3: Analysis of Feature Engineering Impact

[Case A] Training Baseline (All 16 Features)...
   -> Accuracy: 0.8229 | RMSE: 0.4208 | Features: 60

[Case B] Training with ICA (Feature Extraction)...
   -> Accuracy: 0.6562 | RMSE: 0.6374 | Components: 10

[Case C] Training with ASHO (Feature Selection)...
ASHO Iteration 1/10 - Best Fitness: 0.7188
ASHO Iteration 2/10 - Best Fitness: 0.7266
ASHO Iteration 3/10 - Best Fitness: 0.7266
ASHO Iteration 4/10 - Best Fitness: 0.7266
ASHO Iteration 5/10 - Best Fitness: 0.7266
ASHO Iteration 6/10 - Best Fitness: 0.7266
ASHO Iteration 7/10 - Best Fitness: 0.7266
ASHO Iteration 8/10 - Best Fitness: 0.7266
ASHO Iteration 9/10 - Best Fitness: 0.7266
ASHO Iteration 10/10 - Best Fitness: 0.7266
   -> Accuracy: 0.7188 | RMSE: 0.5303 | Selected Features: 32

Method               | Dimensions | Accuracy   | RMSE      
------------------------------------------------------------
Baseline (All)       | 60         | 0.8229     | 0.4208
Extraction (ICA) 