In [5]:
!pip install xgboost scikit-learn imbalanced-learn



In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
import joblib
import warnings
warnings.filterwarnings("ignore")

# ==============================
# 1. Load Dataset
# ==============================
path = "/content/drive/MyDrive/data.csv"   # <-- change path if needed
df = pd.read_csv(path, sep=';')

print("Dataset Shape:", df.shape)
print(df.head())

# ==============================
# 2. Features and Target
# ==============================
X = df.drop("Target", axis=1)
y = df["Target"]

# Encode target labels
le = LabelEncoder()
y = le.fit_transform(y)

# ==============================
# 3. Train-Test Split
# ==============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# ==============================
# 4. Handle Class Imbalance (Optional but Recommended)
# ==============================
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

# ==============================
# 5. Define XGBoost Model
# ==============================
model = XGBClassifier(
    objective='multi:softmax',
    eval_metric='mlogloss',
    use_label_encoder=False,
    random_state=42
)

# ==============================
# 6. Hyperparameter Search Space
# ==============================
param_dist = {
    "n_estimators": [200, 300, 400, 500],
    "max_depth": [3, 4, 5, 6, 7, 8],
    "learning_rate": [0.01, 0.05, 0.1, 0.2],
    "subsample": [0.7, 0.8, 0.9, 1.0],
    "colsample_bytree": [0.7, 0.8, 0.9, 1.0],
    "gamma": [0, 0.1, 0.2, 0.3],
    "min_child_weight": [1, 3, 5]
}

# ==============================
# 7. Randomized Search
# ==============================
random_search = RandomizedSearchCV(
    model,
    param_distributions=param_dist,
    n_iter=30,
    scoring='accuracy',
    cv=5,
    verbose=1,
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)

print("Best Parameters Found:")
print(random_search.best_params_)

best_model = random_search.best_estimator_

# ==============================
# 8. Evaluation
# ==============================
y_pred = best_model.predict(X_test)

print("\nTest Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))

# ==============================
# 9. Save Model
# ==============================
joblib.dump(best_model, "/content/drive/MyDrive/student_xgboost_model.pkl")
print("\nModel Saved Successfully!")

Dataset Shape: (4424, 37)
   Marital status  Application mode  Application order  Course  \
0               1                17                  5     171   
1               1                15                  1    9254   
2               1                 1                  5    9070   
3               1                17                  2    9773   
4               2                39                  1    8014   

   Daytime/evening attendance\t  Previous qualification  \
0                             1                       1   
1                             1                       1   
2                             1                       1   
3                             1                       1   
4                             0                       1   

   Previous qualification (grade)  Nacionality  Mother's qualification  \
0                           122.0            1                      19   
1                           160.0            1                       1   
