In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import xgboost as xgb

In [3]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.2-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/150.0 MB 330.3 kB/s eta 0:07:34
   ---------------------------------------- 0.1/150.0 MB 409.6 kB/s eta 0:06:06
   ---------------------------------------- 0.1/150.0 MB 476.3 kB/s eta 0:05:15
   ---------------------------------------- 0.1/150.0 MB 568.9 kB/s eta 0:04:24
   ---------------------------------------- 0.2/150.0 MB 850.1 kB/s eta 0:02:57
   ---------------------------------------- 0.3/150.0 MB 827.5 kB/s eta 0:03:01
   ---------------------------------------- 0.3/150.0 MB 827.5 kB/s eta 0:03:01
   ---------------------------------------- 0.3/150.0 MB 731.4 kB/s eta 0:03:25
   ---------------------------------------- 0.4/150.0 MB 836.4 kB/s et

In [42]:
pip install scikit-learn==1.7.0

Collecting scikit-learn==1.7.0Note: you may need to restart the kernel to use updated packages.

  Downloading scikit_learn-1.7.0-cp311-cp311-win_amd64.whl.metadata (14 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn==1.7.0)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.7.0-cp311-cp311-win_amd64.whl (10.7 MB)
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   ---------------------------------------- 0.1/10.7 MB 302.7 kB/s eta 0:00:36
   ---------------------------------------- 0.1/10.7 MB 302.7 kB/s eta 0:00:36
   ---------------------------------------- 0.1/10.7 MB 361.0 kB/s eta 0:00:30
    --------------------------------------- 0

In [43]:
df=pd.read_csv('credit_risk_dataset.csv')
df=df.dropna()

In [44]:
X = df.drop('loan_status', axis=1)
y = df['loan_status']

In [45]:
categorical_cols = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
numerical_cols = X.select_dtypes(include=np.number).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

In [46]:
model = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', xgb.XGBClassifier(random_state=42, eval_metric='logloss'))
])

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [48]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [49]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9354050279329609
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.99      0.96      4443
           1       0.96      0.74      0.84      1285

    accuracy                           0.94      5728
   macro avg       0.95      0.87      0.90      5728
weighted avg       0.94      0.94      0.93      5728



In [41]:
import os
save_dir = r"C:\Users\anurag\CreditFraud"
os.makedirs(save_dir, exist_ok=True) 

joblib.dump({
    'model': model,
    'feature_names': list(X.columns),
    'categorical_cols': categorical_cols,
    'numerical_cols': numerical_cols
}, os.path.join(save_dir, "credit_risk_model.joblib"))


['C:\\Users\\anurag\\CreditFraud\\credit_risk_model.joblib']

In [None]:
import os
import joblib

# Your existing code ...

# Define the path where you want to save the file
save_dir = r"C:\Users\anurag\CreditFraud"
os.makedirs(save_dir, exist_ok=True)  # Create directory if it doesn't exist

save_path = os.path.join(save_dir, "model_data.joblib")

# Save model, scaler, and metadata
model_data = {
    'model': model,
    'feature_names': list(X.columns),
    'categorical_cols': categorical_cols,
    'numerical_cols': numerical_cols
}

joblib.dump(model_data, save_path)