# Pinjaman Online Model Training
This notebook trains models to predict:
- Kemampuan Bayar (Classification)
- Total Pinjaman Disetujui (Regression)
- Lama Pinjaman (Regression)

##### LOAD DATASET

In [2]:
import pandas as pd

# Load dataset
df = pd.read_csv("pinjaman_online_dataset.csv")
df.head()


Unnamed: 0,name,age,salary,tgl_pinjam,tgl_gajian,pengajuan_pinjaman,kemampuan_bayar,total_pinjaman_disetujui,lama_pinjaman
0,Intan,32,10366097,2025-01-11,2025-01-19,7040178,0,3520089,9
1,Dedi,30,12361781,2025-01-24,2025-01-16,2986055,1,2986055,12
2,Joko,54,12563752,2025-02-18,2025-01-07,8004943,0,4002471,6
3,Joko,27,10548153,2025-01-08,2025-01-09,9053767,0,4526883,6
4,Citra,46,13270028,2025-02-16,2025-01-10,2199889,1,2199889,3


##### CLEANSING DATASET

In [3]:
df["tgl_pinjam"] = pd.to_datetime(df["tgl_pinjam"])
df["tgl_gajian"] = pd.to_datetime(df["tgl_gajian"])

# Days between gajian and pinjam
df["days_since_gajian"] = (df["tgl_pinjam"] - df["tgl_gajian"]).dt.days

# Ratio of loan to salary
df["loan_to_salary_ratio"] = df["pengajuan_pinjaman"] / df["salary"]

# Drop unused columns
df = df.drop(columns=["name", "tgl_pinjam", "tgl_gajian"])
df.head()

Unnamed: 0,age,salary,pengajuan_pinjaman,kemampuan_bayar,total_pinjaman_disetujui,lama_pinjaman,days_since_gajian,loan_to_salary_ratio
0,32,10366097,7040178,0,3520089,9,-8,0.679154
1,30,12361781,2986055,1,2986055,12,8,0.241555
2,54,12563752,8004943,0,4002471,6,42,0.637146
3,27,10548153,9053767,0,4526883,6,-1,0.858327
4,46,13270028,2199889,1,2199889,3,37,0.165779


##### TRAINING DATASET

In [9]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=["kemampuan_bayar", "total_pinjaman_disetujui", "lama_pinjaman"])
y_class = df["kemampuan_bayar"]
y_reg_total = df["total_pinjaman_disetujui"]
y_reg_lama = df["lama_pinjaman"]

X_train, X_test, y_train_class, y_test_class = train_test_split(X, y_class, test_size=0.2, random_state=42)
_, _, y_train_total, y_test_total = train_test_split(X, y_reg_total, test_size=0.2, random_state=42)
_, _, y_train_lama, y_test_lama = train_test_split(X, y_reg_lama, test_size=0.2, random_state=42)

##### Classification Model (KEMAMPUAN BAYAR)

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

clf = RandomForestClassifier()
clf.fit(X_train, y_train_class)
y_pred_class = clf.predict(X_test)
print(classification_report(y_test_class, y_pred_class))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00        13

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



##### Regression Model (TOTAL PINJAMAN)


In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

reg_total = RandomForestRegressor()
reg_total.fit(X_train, y_train_total)
y_pred_total = reg_total.predict(X_test)
print("MSE Total Pinjaman:", mean_squared_error(y_test_total, y_pred_total))


MSE Total Pinjaman: 12764476793.42207


##### Regression Model (LAMA PINJAMAN)

In [6]:
reg_lama = RandomForestRegressor()
reg_lama.fit(X_train, y_train_lama)
y_pred_lama = reg_lama.predict(X_test)
print("MSE Lama Pinjaman:", mean_squared_error(y_test_lama, y_pred_lama))


MSE Lama Pinjaman: 12.069225


##### Save Models

In [10]:
import joblib

joblib.dump(clf, "model_kemampuan_bayar.pkl")
joblib.dump(reg_total, "model_total_disetujui.pkl")
joblib.dump(reg_lama, "model_lama_pinjaman.pkl")


['model_lama_pinjaman.pkl']