In [42]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import VotingClassifier, StackingClassifier
from imblearn.over_sampling import SMOTE
import warnings
from mealpy.swarm_based.GWO import OriginalGWO

warnings.filterwarnings('ignore')

# Load and Preprocess Data
data = pd.read_csv('StudentsPerformance_with_headers.csv')
label_encoder = LabelEncoder()
for column in data.select_dtypes(include=['object']).columns:
    data[column] = label_encoder.fit_transform(data[column])

# Define target and features
X = data.drop(columns=['GRADE'])
y = data['GRADE'].apply(lambda x: 1 if x > 2 else 0)

# Balance dataset with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Evaluation Function
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return accuracy, precision, recall, f1

# Define objective function for GWO optimization for SVM
def svm_objective_function(solution):
    C, gamma = solution
    model = SVC(C=C, gamma=gamma, kernel='rbf', probability=True)
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    return 1 - np.mean(scores)  # Minimize error (1 - accuracy)
# GWO for SVM
problem_svm = {
    "obj_func": svm_objective_function,
    "lb": [0.1, 0.001],  # Lower bounds for C and gamma
    "ub": [10, 1],       # Upper bounds for C and gamma
    "minmax": "min",
    "verbose": True,
    "epoch": 100,         # Set epoch here
    "pop_size": 20        # Set pop_size here
}

# Now initialize GWO
gwo_svm = OriginalGWO(problem_svm)
best_params_svm, best_accuracy_svm = gwo_svm.solve()

print("Best SVM Parameters:", best_params_svm)
print("Best cross-validated accuracy for SVM:", 1 - best_accuracy_svm)



# Define objective function for GWO optimization for Random Forest
def rf_objective_function(solution):
    n_estimators, max_depth = int(solution[0]), int(solution[1])
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    return 1 - np.mean(scores)  # Minimize error (1 - accuracy)

# GWO for Random Forest
problem_rf = {
    "obj_func": rf_objective_function,
    "lb": [50, 5],      # Lower bounds for n_estimators and max_depth
    "ub": [300, 20],    # Upper bounds for n_estimators and max_depth
    "minmax": "min",
    "verbose": True,
}
gwo_rf = OriginalGWO(problem_rf, epoch=100, pop_size=20)
best_params_rf, best_accuracy_rf = gwo_rf.solve()
print("Best Random Forest Parameters:", best_params_rf)
print("Best cross-validated accuracy for Random Forest:", 1 - best_accuracy_rf)

# Optimized Models
optimized_svm = SVC(C=best_params_svm[0], gamma=best_params_svm[1], kernel='rbf', probability=True)
optimized_svm.fit(X_train, y_train)

optimized_rf = RandomForestClassifier(n_estimators=int(best_params_rf[0]), max_depth=int(best_params_rf[1]), random_state=42)
optimized_rf.fit(X_train, y_train)

# Model Evaluation
svm_metrics = evaluate_model(optimized_svm, X_test, y_test)
rf_metrics = evaluate_model(optimized_rf, X_test, y_test)

# Voting and Stacking Ensembles
voting_ensemble = VotingClassifier(estimators=[
    ('SVM', optimized_svm), 
    ('RandomForest', optimized_rf),
    ('ExtraTrees', ExtraTreesClassifier(n_estimators=100, max_depth=10, random_state=42)),
    ('GradientBoosting', GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42))
], voting='soft')
voting_ensemble.fit(X_train, y_train)

stacking_ensemble = StackingClassifier(
    estimators=[('SVM', optimized_svm), ('RandomForest', optimized_rf)],
    final_estimator=SVC(C=1, gamma=0.1, probability=True, random_state=42)
)
stacking_ensemble.fit(X_train, y_train)

# Model Comparison
model_names = ["Optimized SVM", "Optimized Random Forest", "Voting Ensemble", "Stacking Ensemble"]
model_metrics = [
    svm_metrics, 
    rf_metrics, 
    evaluate_model(voting_ensemble, X_test, y_test), 
    evaluate_model(stacking_ensemble, X_test, y_test)
]

for name, metrics in zip(model_names, model_metrics):
    accuracy, precision, recall, f1 = metrics
    print(f"{name} - Accuracy: {accuracy*100:.2f}%, Precision: {precision*100:.2f}%, Recall: {recall*100:.2f}%, F1 Score: {f1*100:.2f}%")

# Function for collecting user input and making predictions
def user_predict_cgpa(model, scaler):
    print("\nProvide details to predict your CGPA:")
   # Initialize user_data
    user_data = {}
    
    # Define the questions and corresponding keys for user input
    questions = [
        ("Student Age (1: 18-21, 2: 22-25, 3: above 26): ", "Student Age"),
        ("Sex (1: female, 2: male): ", "Sex"),
        ("High School Type (1: private, 2: state, 3: other): ", "High School Type"),
        ("Scholarship (1: None, 2: 25%, 3: 50%, 4: 75%, 5: Full): ", "Scholarship"),
        ("Additional Work (1: Yes, 2: No): ", "Additional Work"),
        ("Artistic/Sports Activity (1: Yes, 2: No): ", "Artistic/Sports Activity"),
        ("Partner (1: Yes, 2: No): ", "Partner"),
        ("Total Salary (1: 135-200, 2: 201-270, 3: 271-340, 4: 341-410, 5: above 410): ", "Total Salary"),
        ("Transportation (1: Bus, 2: Car/Taxi, 3: Bicycle, 4: Other): ", "Transportation"),
        ("Accommodation (1: Rental, 2: Dormitory, 3: Family, 4: Other): ", "Accommodation"),
        ("Mother’s Education (1: primary, 2: secondary, 3: high school, 4: university, 5: MSc, 6: PhD): ", "Mother’s Education"),
        ("Father’s Education (1: primary, 2: secondary, 3: high school, 4: university, 5: MSc, 6: PhD): ", "Father’s Education"),
        ("Siblings (1: 1, 2: 2, 3: 3, 4: 4, 5: 5+): ", "Siblings"),
        ("Parental Status (1: married, 2: divorced, 3: died): ", "Parental Status"),
        ("Mother’s Occupation (1: retired, 2: housewife, 3: gov. officer, 4: private, 5: self-employed, 6: other): ", "Mother’s Occupation"),
        ("Father’s Occupation (1: retired, 2: gov. officer, 3: private, 4: self-employed, 5: other): ", "Father’s Occupation"),
        ("Weekly Study Hours (1: None, 2: <5, 3: 6-10, 4: 11-20, 5: >20): ", "Weekly Study Hours"),
        ("Non-Scientific Reading (1: None, 2: Sometimes, 3: Often): ", "Non-Scientific Reading"),
        ("Scientific Reading (1: None, 2: Sometimes, 3: Often): ", "Scientific Reading"),
        ("Seminar Attendance (1: Yes, 2: No): ", "Seminar Attendance"),
        ("Impact of Projects (1: positive, 2: negative, 3: neutral): ", "Impact of Projects"),
        ("Class Attendance (1: always, 2: sometimes, 3: never): ", "Class Attendance"),
        ("Midterm 1 Prep (1: alone, 2: friends, 3: n/a): ", "Midterm 1 Prep"),
        ("Midterm 2 Prep (1: close to exam, 2: regular, 3: never): ", "Midterm 2 Prep"),
        ("Taking Notes (1: never, 2: sometimes, 3: always): ", "Taking Notes"),
        ("Listening in Class (1: never, 2: sometimes, 3: always): ", "Listening in Class"),
        ("Discussion Improves Success (1: never, 2: sometimes, 3: always): ", "Discussion Improves Success"),
        ("Flip-Classroom (1: not useful, 2: useful, 3: n/a): ", "Flip-Classroom"),
    ]
    for prompt, key in questions:
        while True:
            try:
                user_data[key] = int(input(prompt))
                break
            except ValueError:
                print("Invalid input. Please enter a valid number.")
    user_input_df = pd.DataFrame([user_data])
    missing_cols = [col for col in X.columns if col not in user_input_df.columns]
    for col in missing_cols:
        user_input_df[col] = 0
    user_input_scaled = scaler.transform(user_input_df)
    prediction = model.predict(user_input_scaled)
    print("\nPrediction:", "Pass" if prediction[0] == 1 else "Fail")

# Use voting ensemble for predictions
user_predict_cgpa(voting_ensemble, scaler)


ValueError: 'epoch' is an integer and value should be in range: [1, 100000].

['GWO_WOA', 'IGWO', 'Optimizer', 'OriginalGWO', 'RW_GWO', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'np']
