# IMPORT LIBRARY MODELING

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# LOAD DATASET

In [2]:
X = pd.read_csv("X_preprocessing_ready.csv")
y = pd.read_csv("y_target.csv")

In [3]:
y = y.values.ravel()

# ENCODING TARGET

In [4]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_encoded = le.fit_transform(y)

print("Mapping label:")
for i, label in enumerate(le.classes_):
    print(f"{label} → {i}")


Mapping label:
Approved → 0
Rejected → 1


In [5]:
np.unique(y_encoded)

array([0, 1])

# Modelling 

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

## Split Data

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y_encoded,   # target yang SUDAH encoding
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)

In [8]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(3415, 11)
(854, 11)
(3415,)
(854,)


### LOGISTIC REGRESSION

In [9]:
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)

In [10]:
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42
)

rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [11]:
print(classification_report(y_test, y_pred_lr, target_names=['Approved', 'Rejected']))

              precision    recall  f1-score   support

    Approved       0.93      0.95      0.94       531
    Rejected       0.92      0.88      0.90       323

    accuracy                           0.92       854
   macro avg       0.92      0.91      0.92       854
weighted avg       0.92      0.92      0.92       854



### Random Forest

In [12]:
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42
)

rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [13]:
print(classification_report(y_test, y_pred_rf, target_names=['Approved', 'Rejected']))

              precision    recall  f1-score   support

    Approved       0.98      0.99      0.99       531
    Rejected       0.99      0.97      0.98       323

    accuracy                           0.98       854
   macro avg       0.99      0.98      0.98       854
weighted avg       0.98      0.98      0.98       854



### XGBoost

In [14]:
xgb = XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='logloss',
    random_state=42
)

xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

In [15]:
print(classification_report(y_test, y_pred_xgb, target_names=['Approved', 'Rejected']))

              precision    recall  f1-score   support

    Approved       0.98      0.99      0.98       531
    Rejected       0.98      0.96      0.97       323

    accuracy                           0.98       854
   macro avg       0.98      0.98      0.98       854
weighted avg       0.98      0.98      0.98       854



### EVALUASI KETIGA MODEL 

In [16]:
print("Logistic Regression Accuracy :", accuracy_score(y_test, y_pred_lr))
print("Random Forest Accuracy       :", accuracy_score(y_test, y_pred_rf))
print("XGBoost Accuracy             :", accuracy_score(y_test, y_pred_xgb))

Logistic Regression Accuracy : 0.9227166276346604
Random Forest Accuracy       : 0.9836065573770492
XGBoost Accuracy             : 0.9789227166276346


In [17]:
import joblib

# Simpan ketiga model
joblib.dump(lr, 'logistic_regression_model.pkl')
joblib.dump(rf, 'random_forest_model.pkl')
joblib.dump(xgb, 'xgboost_model.pkl')

print("✓ Model Logistic Regression tersimpan sebagai: logistic_regression_model.pkl")
print("✓ Model Random Forest tersimpan sebagai: random_forest_model.pkl")
print("✓ Model XGBoost tersimpan sebagai: xgboost_model.pkl")


✓ Model Logistic Regression tersimpan sebagai: logistic_regression_model.pkl
✓ Model Random Forest tersimpan sebagai: random_forest_model.pkl
✓ Model XGBoost tersimpan sebagai: xgboost_model.pkl
