# 📊 Credit Scoring Model Training (GrameenLoan)

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib
import os


### ✅ Step 1: Generate synthetic training data

In [None]:
np.random.seed(42)
n_samples = 5000

data = pd.DataFrame({
    "monthly_income": np.random.normal(20000, 8000, n_samples).clip(5000, 100000),
    "monthly_mobile_spend": np.random.normal(300, 100, n_samples).clip(50, 1000),
    "monthly_utility_spend": np.random.normal(1500, 700, n_samples).clip(300, 5000),
    "household_size": np.random.randint(1, 10, n_samples),
    "is_self_employed": np.random.choice([0, 1], n_samples, p=[0.7, 0.3])
})

data["credit_score"] = (
    600
    + 0.0025 * data["monthly_income"]
    - 0.005 * data["monthly_mobile_spend"]
    - 0.004 * data["monthly_utility_spend"]
    - 3 * data["household_size"]
    + 20 * data["is_self_employed"]
    + np.random.normal(0, 10, n_samples)
).clip(300, 850).astype(int)

data.head()


### ✅ Step 2: Train-Test Split

In [None]:
X = data.drop("credit_score", axis=1)
y = data["credit_score"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


### ✅ Step 3: Train the Model

In [None]:
model = GradientBoostingRegressor(n_estimators=100, max_depth=5)
model.fit(X_train, y_train)


### ✅ Step 4: Evaluate the Model

In [None]:
preds = model.predict(X_test)
rmse = mean_squared_error(y_test, preds, squared=False)
print(f"RMSE: {rmse:.2f}")


### ✅ Step 5: Save the Model

In [None]:
os.makedirs("ai_model", exist_ok=True)
joblib.dump(model, "ai_model/credit_model.pkl")
print("Model saved to ai_model/credit_model.pkl")
