In [1]:
import pandas as pd
from google.colab import files
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, accuracy_score

uploaded = files.upload()

data = pd.read_csv("credit_risk_dataset.csv")

data.head()

X = data[["person_age", "person_income", "person_emp_length", "loan_amnt", "loan_int_rate", "person_home_ownership", "loan_intent","cb_person_default_on_file"]]
y = data["loan_status"]
X.drop(columns=["loan_amnt"], inplace=True)
numeric_features = ["person_age","person_income","person_emp_length","loan_int_rate"]
categorical_features = ["person_home_ownership","loan_intent","cb_person_default_on_file"]
numeric_transformer = Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('scaler',StandardScaler())
])
categorical_transformer = Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),
    ('onehot',OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(
    transformers= [
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)
X_processed = preprocessor.fit_transform(X)
categorical_columns = preprocessor.transformers_[1][1]['onehot'].get_feature_names_out(categorical_features)
all_columns = numeric_features + list(categorical_columns)

X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)
random_forest_classifier = RandomForestClassifier(random_state=42)
random_forest_classifier.fit(X_train,y_train)
y_pred = random_forest_classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test,y_pred))
print("\nClassification Report:")
print(classification_report(y_test,y_pred))
categories = {
    "person_home_ownership": ['RENT', 'OWN', 'MORTGAGE', 'OTHER'],
    "loan_intent": ['PERSONAL', 'EDUCATION', 'MEDICAL', 'VENTURE', 'HOMEIMPROVEMENT',
       'DEBTCONSOLIDATION'],
    "cb_person_default_on_file": ['Y', 'N']
}

def predict_loan_default():
  print("\nEnter applicant details for credit risk assessment:")
  age = float(input("Applicant's age:"))
  income = float(input("Applicant's income:"))
  emp_length = float(input("Applicant's employment length (in years):"))
  loan_int_rate = float(input("Loan interest rate:"))
  loan_amnt = float(input("Loan amount requested:"))
  loan_percent_income = loan_amnt/income
  home_ownership = input("Home ownership (RENT/MORTGAGE/OWN/OTHER):").upper()
  loan_intent = input("Loan intent (DEBTCONSOLIDATION/EDUCATION/HOMEIMPROVEMENT/MEDICAL/PERSONAL/VENTURE):").upper()
  default_history = input("Has the applicant defaulted before (Y/N):").upper()

  input_data = {
      "person_age": [age],
      "person_income": [income],
      "person_emp_length": [emp_length],
      "loan_int_rate": [loan_int_rate]
  }
  for category, value in zip(["person_home_ownership","loan_intent","cb_person_default_on_file"],[home_ownership,loan_intent,default_history]):
    for val in categories[category]:
      input_data[f"{category}_{val}"] = [1 if value == val else 0]
  input_data = pd.DataFrame(input_data)

  for col in all_columns:
    if col not in input_data.columns:
      input_data[col] = 0
  input_data = input_data[all_columns]
  prediction = random_forest_classifier.predict(input_data)[0]
  if prediction == 1:
    print("\nBased on the information provided, the applicant is predicted to be more likely to default on the loan")
  else:
    print("\nBased on the information provided, the applicant is predicted to be less likely to default on the loan")

predict_loan_default()

Saving credit_risk_dataset.csv to credit_risk_dataset.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.drop(columns=["loan_amnt"], inplace=True)


Accuracy: 0.8540739604112322

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.95      0.91      5072
           1       0.75      0.51      0.61      1445

    accuracy                           0.85      6517
   macro avg       0.81      0.73      0.76      6517
weighted avg       0.85      0.85      0.84      6517


Enter applicant details for credit risk assessment:
Applicant's age:23
Applicant's income:20000
Applicant's employment length (in years):2
Loan interest rate:10
Loan amount requested:500000
Home ownership (RENT/MORTGAGE/OWN/OTHER):OWN
Loan intent (DEBTCONSOLIDATION/EDUCATION/HOMEIMPROVEMENT/MEDICAL/PERSONAL/VENTURE):PERSONAL
Has the applicant defaulted before (Y/N):N

Based on the information provided, the applicant is predicted to be less likely to default on the loan




In [2]:
pip install gradio




In [3]:
import gradio as gr

def gradio_predict_loan_default(age,income,emp_length,loan_int_rate,loan_amnt,home_ownership,loan_intent,default_history):
  loan_percent_income = loan_amnt/income
  input_data = {
      "person_age": [age],
      "person_income": [income],
      "person_emp_length": [emp_length],
      "loan_int_rate": [loan_int_rate]
  }
  for category, value in zip(["person_home_ownership","loan_intent","cb_person_default_on_file"],[home_ownership,loan_intent,default_history]):
    for val in categories[category]:
      input_data[f"{category}_{val}"] = [1 if value == val else 0]
  input_data = pd.DataFrame(input_data)

  for col in all_columns:
    if col not in input_data.columns:
      input_data[col] = 0
  input_data = input_data[all_columns]
  prediction = random_forest_classifier.predict(input_data)[0]
  if prediction == 1:
    return "\nBased on the information provided, the applicant is predicted to be more likely to default on the loan"
  else:
    return "\nBased on the information provided, the applicant is predicted to be less likely to default on the loan"

inputs = [
    gr.Number(label="Applicant's age"),
    gr.Number(label="Applicant's income"),
    gr.Number(label="Applicant's emplyment length (in years)"),
    gr.Number(label="Loan interest rate"),
    gr.Number(label="Loan amount requested"),
    gr.Dropdown(choices=['RENT','MORTGAGE','OWN','OTHER'], label="Home ownership status"),
    gr.Dropdown(choices=['DEBTCONSOLIDATION','EDUCATION','HOMEIMPROVEMENT','MEDICAL','PERSONAL','VENTURE'], label= "Loan intent"),
    gr.Dropdown(choices=['Y','N'], label="Has the applicant defaulted before?")
]

import gradio as gr
import pandas as pd

def gradio_predict_loan_default(age,income,emp_length,loan_int_rate,loan_amnt,home_ownership,loan_intent,default_history):
  loan_percent_income = loan_amnt/income
  input_data = {
      "person_age": [age],
      "person_income": [income],
      "person_emp_length": [emp_length],
      "loan_int_rate": [loan_int_rate]
  }
  for category, value in zip(["person_home_ownership","loan_intent","cb_person_default_on_file"],[home_ownership,loan_intent,default_history]):
    for val in categories[category]:
      input_data[f"{category}_{val}"] = [1 if value == val else 0]
  input_data = pd.DataFrame(input_data)

  for col in all_columns:
    if col not in input_data.columns:
      input_data[col] = 0
  input_data = input_data[all_columns]
  prediction = random_forest_classifier.predict(input_data)[0]
  if prediction == 1:
    return "\nBased on the information provided, the applicant is predicted to be more likely to default on the loan"
  else:
    return "\nBased on the information provided, the applicant is predicted to be less likely to default on the loan"

inputs = [
    gr.Number(label="Applicant's age"),
    gr.Number(label="Applicant's income"),
    gr.Number(label="Applicant's emplyment length (in years)"),
    gr.Number(label="Loan interest rate"),
    gr.Number(label="Loan amount requested"),
    gr.Dropdown(choices=['RENT','MORTGAGE','OWN','OTHER'], label="Home ownership status"),
    gr.Dropdown(choices=['DEBTCONSOLIDATION','EDUCATION','HOMEIMPROVEMENT','MEDICAL','PERSONAL','VENTURE'], label= "Loan intent"),
    gr.Dropdown(choices=['Y','N'], label="Has the applicant defaulted before?")
]

outputs = gr.Textbox()
gr.Interface(fn=gradio_predict_loan_default,inputs=inputs, outputs=outputs, title="Credit Risk Assessment Model").launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://92a45d93be2a7900fa.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


