In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# 1. Load Data
# The filename in your original code was actually correct for this dataset!
df = pd.read_csv('synthetic_student_performance.csv') 
df.info()

# CHANGE: Drop 'StudentID' as it's just an identifier, not a feature
if 'StudentID' in df.columns:
    df = df.drop('StudentID', axis=1)

# 2. Target Handling
# CHANGE: The target column is 'GradeClass', not 'Class'.
# It is already numeric (0=A, 1=B, ... 4=F), so we DO NOT need a map.
# We just define y based on this column.
target_col = 'GradeClass'

# 3. Encoding Features
# CHANGE: This dataset comes pre-encoded (Gender is 0/1, Ethnicity is 0-3, etc.).
# We don't need the loop with LabelEncoder for binary columns anymore.

# OPTIONAL: 'Ethnicity' is numeric (0,1,2,3) but represents categories (Caucasian, African American, etc.).
# For linear models, it's often better to One-Hot Encode this specific column.
# If you want to do that, uncomment the lines below. Otherwise, you can leave it as is.
df['Ethnicity'] = df['Ethnicity'].astype(str) # Convert to string to force get_dummies to act
df = pd.get_dummies(df, columns=['Ethnicity'], drop_first=True)

# 4. Split Data (80% Train, 20% Test)
X = df.drop(target_col, axis=1)
y = df[target_col]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Normalization (Z-Score)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Preprocessing complete. X_train shape:", X_train_scaled.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   StudentID          5000 non-null   int64  
 1   Age                5000 non-null   int64  
 2   Gender             5000 non-null   int64  
 3   Ethnicity          5000 non-null   int64  
 4   ParentalEducation  5000 non-null   int64  
 5   StudyTimeWeekly    5000 non-null   int64  
 6   Absences           5000 non-null   int64  
 7   Tutoring           5000 non-null   int64  
 8   ParentalSupport    5000 non-null   int64  
 9   Extracurricular    5000 non-null   int64  
 10  Sports             5000 non-null   int64  
 11  Music              5000 non-null   int64  
 12  Volunteering       5000 non-null   int64  
 13  GPA                5000 non-null   float64
 14  GradeClass         5000 non-null   int64  
dtypes: float64(1), int64(14)
memory usage: 586.1 KB
Preprocessing complete. 

In [None]:
import numpy as np
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Import your custom classes from the files you created
from ASHO import ASHOFeatureSelector
from ESNN import EvolutionarySNN

# --- 1. Define & Train SVM (Standard) ---
print("Training SVM...")
# SVM must have probability=True for soft voting
svm_model = SVC(kernel='rbf', probability=True, random_state=42)
svm_model.fit(X_train_scaled, y_train)


# --- 2. Define & Train ASHO-XGBoost ---
print("Running ASHO Feature Selection for XGBoost...")
# Base estimator for the selector
xgb_base = XGBClassifier(eval_metric='mlogloss', random_state=42)

# Initialize ASHO with the parameters from your file
asho_selector = ASHOFeatureSelector(
    classifier=xgb_base, 
    n_agents=10, 
    max_iter=5  # Adjust iterations as needed
)

# Fit ASHO to select features
asho_selector.fit(X_train_scaled, y_train)

# Transform data to keep only selected features
X_train_asho = asho_selector.transform(X_train_scaled)
X_test_asho = asho_selector.transform(X_test_scaled)

print(f"ASHO selected {X_train_asho.shape[1]} features out of {X_train_scaled.shape[1]}.")

# Train the FINAL XGBoost model on the selected features
xgb_final = XGBClassifier(eval_metric='mlogloss', random_state=42)
xgb_final.fit(X_train_asho, y_train)


# --- 3. Define & Train Evolutionary SNN ---
print("Training Evolutionary SNN...")
# Determine input size (full features) and output size (number of classes)
n_features = X_train_scaled.shape[1]
n_classes = len(np.unique(y_train)) # Should be 3 for (L, M, H)

# Initialize ESNN
esnn_model = EvolutionarySNN(
    n_inputs=n_features,
    n_outputs=n_classes,
    pop_size=10,
    generations=10,
    device='cpu' # Change to 'cuda' if GPU is available
)

# Fit ESNN
esnn_model.fit(X_train_scaled, y_train)


# --- 4. Soft Voting Ensemble ---
print("Calculating Ensemble Predictions...")

# Get probabilities from each model
# Note: XGBoost must use X_test_asho (subset), others use X_test_scaled (full)
prob_svm = svm_model.predict_proba(X_test_scaled)
prob_xgb = xgb_final.predict_proba(X_test_asho) 
prob_snn = esnn_model.predict_proba(X_test_scaled)

# --- NEW: Calculate Individual Accuracies ---
# We take argmax to convert probabilities back to class labels (0, 1, 2, etc.)
pred_svm = np.argmax(prob_svm, axis=1)
pred_xgb = np.argmax(prob_xgb, axis=1)
pred_snn = np.argmax(prob_snn, axis=1)

acc_svm = accuracy_score(y_test, pred_svm)
acc_xgb = accuracy_score(y_test, pred_xgb)
acc_snn = accuracy_score(y_test, pred_snn)

print(f"\nIndividual Model Performance:")
print(f"SVM Accuracy:      {acc_svm:.4f}")
print(f"XGBoost Accuracy:  {acc_xgb:.4f}")
print(f"ESNN Accuracy:     {acc_snn:.4f}")

# [cite_start]Calculate Average (Soft Voting) [cite: 1390-1392]
# Formula: P_final = (P_XGB + P_SNN + P_SVM) / 3
avg_probs = (prob_svm + prob_xgb + prob_snn) / 3
y_pred_ensemble = np.argmax(avg_probs, axis=1)

# --- 5. Final Evaluation ---
accuracy = accuracy_score(y_test, y_pred_ensemble)
print(f"--------------------------------------------------")
print(f"Final Ensemble Accuracy: {accuracy:.4f}")
print(f"--------------------------------------------------")

Training SVM...
Running ASHO Feature Selection for XGBoost...
ASHO Iteration 1/5 - Best Fitness: 0.9958
ASHO Iteration 2/5 - Best Fitness: 0.9958
ASHO Iteration 3/5 - Best Fitness: 0.9958
ASHO Iteration 4/5 - Best Fitness: 0.9958
ASHO Iteration 5/5 - Best Fitness: 0.9958
ASHO selected 8 features out of 13.
Training Evolutionary SNN...
Initialized ESNN Population: 10 agents, 1156 parameters each.
ESNN Gen 1/10 - Best Acc: 0.4622
ESNN Gen 2/10 - Best Acc: 0.4692
ESNN Gen 3/10 - Best Acc: 0.4715
ESNN Gen 4/10 - Best Acc: 0.4793
ESNN Gen 5/10 - Best Acc: 0.4793
ESNN Gen 6/10 - Best Acc: 0.4793
ESNN Gen 7/10 - Best Acc: 0.4793
ESNN Gen 8/10 - Best Acc: 0.4793
ESNN Gen 9/10 - Best Acc: 0.4793
ESNN Gen 10/10 - Best Acc: 0.4793
Calculating Ensemble Predictions...

Individual Model Performance:
SVM Accuracy:      0.9500
XGBoost Accuracy:  0.9960 (using 8 features)
ESNN Accuracy:     0.4680
--------------------------------------------------
Final Ensemble Accuracy: 0.9970
-----------------------

In [8]:
from sklearn.linear_model import LogisticRegression

# --- 1. Prepare Input for Meta-Learner (Stacking) ---
# We need to generate probability predictions from all base models on both Train and Test sets.
# These probabilities become the "new features" for the Level-1 model.

print("Generating meta-features for Stacking...")

# A. Generate Base Predictions on Training Data
# Note: In a rigorous production environment, we would use Cross-Validation (Out-of-Fold) predictions here
# to prevent overfitting. For this specific scenario implementation using your custom classes,
# we are using the fitted models directly as described in the "Alur Pengujian".

# SVM (Full Features)
prob_train_svm = svm_model.predict_proba(X_train_scaled)
# XGBoost (ASHO Features Only)
prob_train_xgb = xgb_final.predict_proba(X_train_asho)
# ESNN (Full Features)
prob_train_snn = esnn_model.predict_proba(X_train_scaled)

# Stack them horizontally to create Level-1 Training Data
# Shape will be (n_samples, 3 models * 3 classes) = (n, 9)
X_train_stack = np.hstack([prob_train_svm, prob_train_xgb, prob_train_snn])


# B. Generate Base Predictions on Test Data (for Evaluation)
# SVM
prob_test_svm = svm_model.predict_proba(X_test_scaled)
# XGBoost
prob_test_xgb = xgb_final.predict_proba(X_test_asho)
# ESNN
prob_test_snn = esnn_model.predict_proba(X_test_scaled)

# Stack them to create Level-1 Test Data
X_test_stack = np.hstack([prob_test_svm, prob_test_xgb, prob_test_snn])


# --- 2. Train Meta-Learner (Logistic Regression) ---
# Reference: Proposal uses Logistic Regression as the Meta-Learner [cite: 440, 499]

print("Training Meta-Learner (Logistic Regression)...")
meta_learner = LogisticRegression(random_state=42)
meta_learner.fit(X_train_stack, y_train)


# --- 3. Evaluate Stacking Performance ---
y_pred_stacking = meta_learner.predict(X_test_stack)
stacking_accuracy = accuracy_score(y_test, y_pred_stacking)


# --- 4. Compare Results (Scenario 2 Analysis) ---
# Soft Voting Accuracy comes from the variable 'accuracy' in the previous cell

print("SCENARIO 2 RESULTS: Soft Voting vs Stacking")
print(f"Soft Voting Accuracy: {accuracy:.4f}") # 'accuracy' from previous cell
print(f"Stacking Accuracy:    {stacking_accuracy:.4f}")


Generating meta-features for Stacking...
Training Meta-Learner (Logistic Regression)...
SCENARIO 2 RESULTS: Soft Voting vs Stacking
Soft Voting Accuracy: 0.9970
Stacking Accuracy:    0.9970


In [9]:
from sklearn.decomposition import FastICA
from sklearn.metrics import mean_squared_error
import math

# --- FIX: Removed 'use_label_encoder=False' to stop the warning ---
# We use the same random_state and hyperparameters for all three cases
xgb_fixed = XGBClassifier(eval_metric='mlogloss', random_state=42)

print("="*60)
print("SCENARIO 3: Analysis of Feature Engineering Impact")
print("="*60)

# ---------------------------------------------------------
# Case A: Baseline (All Original Features)
# ---------------------------------------------------------
print("\n[Case A] Training Baseline (All 16 Features)...")

# Train
xgb_fixed.fit(X_train_scaled, y_train)

# Predict
pred_a = xgb_fixed.predict(X_test_scaled)

# Metrics
acc_a = accuracy_score(y_test, pred_a)
rmse_a = math.sqrt(mean_squared_error(y_test, pred_a))
dim_a = X_train_scaled.shape[1]

print(f"   -> Accuracy: {acc_a:.4f} | RMSE: {rmse_a:.4f} | Features: {dim_a}")


# ---------------------------------------------------------
# Case B: Feature Extraction (ICA)
# ---------------------------------------------------------
print("\n[Case B] Training with ICA (Feature Extraction)...")

# Initialize ICA
# We choose n_components=10 as a representative reduction
ica = FastICA(n_components=10, random_state=42, whiten='unit-variance')

# Transform Data
X_train_ica = ica.fit_transform(X_train_scaled)
X_test_ica = ica.transform(X_test_scaled)

# Train XGBoost on Independent Components
xgb_fixed.fit(X_train_ica, y_train)

# Predict
pred_b = xgb_fixed.predict(X_test_ica)

# Metrics
acc_b = accuracy_score(y_test, pred_b)
rmse_b = math.sqrt(mean_squared_error(y_test, pred_b))
dim_b = X_train_ica.shape[1]

print(f"   -> Accuracy: {acc_b:.4f} | RMSE: {rmse_b:.4f} | Components: {dim_b}")


# ---------------------------------------------------------
# Case C: Feature Selection (ASHO)
# ---------------------------------------------------------
print("\n[Case C] Training with ASHO (Feature Selection)...")

# Initialize ASHO
# --- FIX: Removed 'use_label_encoder=False' here as well ---
asho_scenario3 = ASHOFeatureSelector(
    classifier=XGBClassifier(eval_metric='mlogloss', random_state=42),
    n_agents=10,
    max_iter=10 
)

# Run Optimization
asho_scenario3.fit(X_train_scaled, y_train)

# Transform Data
X_train_asho_s3 = asho_scenario3.transform(X_train_scaled)
X_test_asho_s3 = asho_scenario3.transform(X_test_scaled)

# Train XGBoost on Selected Features
xgb_fixed.fit(X_train_asho_s3, y_train)

# Predict
pred_c = xgb_fixed.predict(X_test_asho_s3)

# Metrics
acc_c = accuracy_score(y_test, pred_c)
rmse_c = math.sqrt(mean_squared_error(y_test, pred_c))
dim_c = X_train_asho_s3.shape[1]

print(f"   -> Accuracy: {acc_c:.4f} | RMSE: {rmse_c:.4f} | Selected Features: {dim_c}")


# ---------------------------------------------------------
# Final Comparison Summary
# ---------------------------------------------------------
print("\n" + "="*60)
print(f"{'Method':<20} | {'Dimensions':<10} | {'Accuracy':<10} | {'RMSE':<10}")
print("-" * 60)
print(f"{'Baseline (All)':<20} | {dim_a:<10} | {acc_a:.4f}     | {rmse_a:.4f}")
print(f"{'Extraction (ICA)':<20} | {dim_b:<10} | {acc_b:.4f}     | {rmse_b:.4f}")
print(f"{'Selection (ASHO)':<20} | {dim_c:<10} | {acc_c:.4f}     | {rmse_c:.4f}")
print("="*60)

if acc_c > acc_b and acc_c > acc_a:
    print("Conclusion: ASHO (Selection) provided the best performance.")
elif acc_b > acc_c:
    print("Conclusion: ICA (Extraction) outperformed ASHO.")
else:
    print("Conclusion: Baseline (All Features) performed best.")

SCENARIO 3: Analysis of Feature Engineering Impact

[Case A] Training Baseline (All 16 Features)...
   -> Accuracy: 0.9950 | RMSE: 0.0707 | Features: 13

[Case B] Training with ICA (Feature Extraction)...
   -> Accuracy: 0.7460 | RMSE: 0.5523 | Components: 10

[Case C] Training with ASHO (Feature Selection)...
ASHO Iteration 1/10 - Best Fitness: 0.9958
ASHO Iteration 2/10 - Best Fitness: 0.9958
ASHO Iteration 3/10 - Best Fitness: 0.9958
ASHO Iteration 4/10 - Best Fitness: 0.9958
ASHO Iteration 5/10 - Best Fitness: 0.9958
ASHO Iteration 6/10 - Best Fitness: 0.9958
ASHO Iteration 7/10 - Best Fitness: 0.9958
ASHO Iteration 8/10 - Best Fitness: 0.9958
ASHO Iteration 9/10 - Best Fitness: 0.9958
ASHO Iteration 10/10 - Best Fitness: 0.9958
   -> Accuracy: 0.9960 | RMSE: 0.0632 | Selected Features: 8

Method               | Dimensions | Accuracy   | RMSE      
------------------------------------------------------------
Baseline (All)       | 13         | 0.9950     | 0.0707
Extraction (ICA)  