# IMPORT DATA DAN LIBRARY

In [43]:
import numpy as np
import pandas as pd

# split & CV
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score

# base & transformers
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer, make_column_selector as selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# imbalanced
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.combine import SMOTETomek

# model & metrics 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

In [10]:
df = pd.read_csv('DataFrame_processed/DataFrame_processed.csv')

In [13]:
X = df.drop(columns=["Attrition"])
y = df["Attrition"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# PIPELINE FULL

In [45]:
# pipeline transformasi
prep = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), selector(dtype_include=np.number)),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), selector(dtype_exclude=np.number)),
    ],
    remainder="drop"
)

# full preprocessing pipeline
pipe = ImbPipeline(steps=[
    ("prep", prep),
    ("smote", SMOTETomek(random_state=42)),
    ("clf", RandomForestClassifier(n_estimators=400, random_state=42, n_jobs=-1)) # bisa tambah ata ganti model lain 
])

> ## Untuk output score

In [47]:
# Silakan di copas 
pipe.fit(X_train, y_train)
y_pred  = pipe.predict(X_test)
y_proba = pipe.predict_proba(X_test)[:, 1]
print("\n=== TEST REPORT ===")
print(classification_report(y_test, y_pred, digits=4))
print("Test ROC-AUC:", roc_auc_score(y_test, y_proba).round(4))


=== TEST REPORT ===
              precision    recall  f1-score   support

           0     0.8856    0.9717    0.9266       247
           1     0.6957    0.3404    0.4571        47

    accuracy                         0.8707       294
   macro avg     0.7906    0.6560    0.6919       294
weighted avg     0.8552    0.8707    0.8516       294

Test ROC-AUC: 0.8154


---

---

# Model 1 - Logistic Regression

In [64]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(random_state=42)

pipe_log_reg = ImbPipeline(steps=[
    ("prep", prep),
    ("smote", SMOTETomek(random_state=42)),
    ("clf", logreg)
])

In [74]:
pipe_log_reg.fit(X_train, y_train)
y_pred  = pipe_log_reg.predict(X_test)
y_proba = pipe_log_reg.predict_proba(X_test)[:, 1]
print("\n=== TEST REPORT ===")
print(classification_report(y_test, y_pred, digits=4))
print("Test ROC-AUC:", roc_auc_score(y_test, y_proba).round(4))


=== TEST REPORT ===
              precision    recall  f1-score   support

           0     0.9317    0.7733    0.8451       247
           1     0.3708    0.7021    0.4853        47

    accuracy                         0.7619       294
   macro avg     0.6512    0.7377    0.6652       294
weighted avg     0.8420    0.7619    0.7876       294

Test ROC-AUC: 0.7941


> ## HyperParameter Tuning

# Model 2 - Decision Tree Classifier

In [66]:
from sklearn.tree import DecisionTreeClassifier

dec_tree_clf = DecisionTreeClassifier(random_state=42)

pipe_tree = ImbPipeline(steps=[
    ("prep", prep),
    ("smote", SMOTETomek(random_state=42)),
    ("clf", dec_tree_clf)
])

In [76]:
pipe_tree.fit(X_train, y_train)
y_pred  = pipe_tree.predict(X_test)
y_proba = pipe_tree.predict_proba(X_test)[:, 1]
print("\n=== TEST REPORT ===")
print(classification_report(y_test, y_pred, digits=4))
print("Test ROC-AUC:", roc_auc_score(y_test, y_proba).round(4))


=== TEST REPORT ===
              precision    recall  f1-score   support

           0     0.8577    0.8300    0.8436       247
           1     0.2364    0.2766    0.2549        47

    accuracy                         0.7415       294
   macro avg     0.5471    0.5533    0.5493       294
weighted avg     0.7584    0.7415    0.7495       294

Test ROC-AUC: 0.5533


> ## HyperParameter Tuning

# Model 3 - Bagging Classifier

In [70]:
from sklearn.ensemble import BaggingClassifier

bagging_base = BaggingClassifier(random_state=42)

pipe_bagging = ImbPipeline(steps=[
    ("prep", prep),
    ("smote", SMOTETomek(random_state=42)),
    ("clf", bagging_base)
])

In [78]:
pipe_bagging.fit(X_train, y_train)
y_pred  = pipe_bagging.predict(X_test)
y_proba = pipe_bagging.predict_proba(X_test)[:, 1]
print("\n=== TEST REPORT ===")
print(classification_report(y_test, y_pred, digits=4))
print("Test ROC-AUC:", roc_auc_score(y_test, y_proba).round(4))


=== TEST REPORT ===
              precision    recall  f1-score   support

           0     0.8889    0.9393    0.9134       247
           1     0.5455    0.3830    0.4500        47

    accuracy                         0.8503       294
   macro avg     0.7172    0.6611    0.6817       294
weighted avg     0.8340    0.8503    0.8393       294

Test ROC-AUC: 0.7865


> ## HyperParameter Tuning

# Model 4 - Ada Boost Classifier 

In [86]:
from sklearn.ensemble import AdaBoostClassifier

best_estimator = DecisionTreeClassifier(random_state=42)
boost_model = AdaBoostClassifier(estimator= best_estimator,algorithm='SAMME',random_state=42)
pipe_boost = ImbPipeline(steps=[
    ("prep", prep),
    ("smote", SMOTETomek(random_state=42)),
    ("clf", boost_model) 
])

In [88]:
pipe_boost.fit(X_train, y_train)
y_pred  = pipe_boost.predict(X_test)
y_proba = pipe_boost.predict_proba(X_test)[:, 1]
print("\n=== TEST REPORT ===")
print(classification_report(y_test, y_pred, digits=4))
print("Test ROC-AUC:", roc_auc_score(y_test, y_proba).round(4))


=== TEST REPORT ===
              precision    recall  f1-score   support

           0     0.8689    0.8583    0.8635       247
           1     0.3000    0.3191    0.3093        47

    accuracy                         0.7721       294
   macro avg     0.5844    0.5887    0.5864       294
weighted avg     0.7779    0.7721    0.7749       294

Test ROC-AUC: 0.5887


> ## HyperParameter Tuning

# Model 5 - Ensemble Stacking

> ## HyperParameter Tuning