###   RiskWiz(Credit Score Classifier)
***Wizardry in predicting loan risk: low, medium, or high!***

This script:
- Loads and preprocesses a **financial dataset**
- Trains a **Decision Tree Classifier** to classify loan applicants *(low risk, Medium risk and High risk)*
- Evaluates model performance
- Accepts **input** to predict their risk category interactively


In [1]:
#  Import  libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:

# ------------------------------------------
# Load and Preprocess the Dataset
# ------------------------------------------

# Load dataset
df = pd.read_csv("data.csv")

# Drop irrelevant columns if present
columns_to_drop = ["ApplicationDate", "RiskScore"]
df = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

# Generate 3-class RiskLabel from CreditScore
# RiskLabel from CreditScore (but do NOT use CreditScore as a feature)
if 'RiskLabel' not in df.columns:
    df['RiskLabel'] = df['CreditScore'].apply(lambda x: 'High Risk' if x < 580 else 'Low Risk')

In [3]:

# ------------------------------------------
#  Feature Selection
# ------------------------------------------

# Select relevant features for model training
features = ['Age', 'Experience', 'JobTenure', 
            'MonthlyIncome', 'SavingsAccountBalance', 'CheckingAccountBalance', 
            'LoanAmount', 'LoanDuration', 'MonthlyLoanPayment', 'BaseInterestRate', 'InterestRate', 
            'MonthlyDebtPayments', 'TotalLiabilities', 'DebtToIncomeRatio', 'TotalDebtToIncomeRatio', 
            'CreditCardUtilizationRate', 'NumberOfOpenCreditLines', 
            'TotalAssets', 'NetWorth',
            'PaymentHistory', 'UtilityBillsPaymentHistory']

df = df[features + ['RiskLabel']]

In [4]:
# ------------------------------------------
#  Train-Test Split
# ------------------------------------------

X = df[features]
y = df['RiskLabel']

# Split into training and testing sets (80%-20%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [5]:

# ------------------------------------------
#  Model Training
# ------------------------------------------

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# ------------------------------------------
#  Model Evaluation
# ------------------------------------------

y_pred = model.predict(X_test)

# Print evaluation metrics
print("\n Model Accuracy:", accuracy_score(y_test, y_pred))
print("\n Classification Report:\n", classification_report(y_test, y_pred))


 Model Accuracy: 0.98815

 Classification Report:
               precision    recall  f1-score   support

   High Risk       0.99      0.99      0.99     20532
    Low Risk       0.99      0.99      0.99     19468

    accuracy                           0.99     40000
   macro avg       0.99      0.99      0.99     40000
weighted avg       0.99      0.99      0.99     40000



In [6]:

# ------------------------------------------
#  Predict with User Input
# ------------------------------------------

print("\n===  Predict Risk Category ===")

def get_user_input():
    """
    Collects user input for features used in prediction.
    Returns:
        pd.DataFrame: Single-row DataFrame containing user inputs.
    """
    user_data = {}

    # Personal & Employment Information
    user_data['Age'] = int(input("Age(Years): "))
    user_data['Experience'] = int(input("Work Experience (years): "))
    user_data['JobTenure'] = int(input("Job Tenure (years): "))

    # Income & Account Balances
    user_data['MonthlyIncome'] = float(input("Monthly Income: "))
    user_data['SavingsAccountBalance'] = float(input("Savings Account Balance: "))
    user_data['CheckingAccountBalance'] = float(input("Checking Account Balance: "))

    # Loan Information
    user_data['LoanAmount'] = float(input("Loan Amount: "))
    user_data['LoanDuration'] = int(input("Loan Duration (months): "))
    user_data['MonthlyLoanPayment'] = float(input("Monthly Loan Payment: "))
    user_data['BaseInterestRate'] = float(input("Base Interest Rate (e.g., 0.05 for 5%): "))
    user_data['InterestRate'] = float(input("Interest Rate (e.g., 0.05): "))
    
    # Debt & Liabilities
    user_data['MonthlyDebtPayments'] = float(input("Monthly Debt Payments: "))
    user_data['TotalLiabilities'] = float(input("Total Liabilities: "))
    user_data['DebtToIncomeRatio'] = float(input("Debt to Income Ratio (e.g., 0.3): "))
    user_data['TotalDebtToIncomeRatio'] = float(input("Total Debt to Income Ratio (e.g., 0.4): "))
    
    # Credit Information
    user_data['CreditCardUtilizationRate'] = float(input("Credit Card Utilization Rate (0 to 1): "))
    user_data['NumberOfOpenCreditLines'] = int(input("Number Of Open Credit Lines: "))

    # Assets & Net Worth
    user_data['TotalAssets'] = float(input("Total Assets: "))
    user_data['NetWorth'] = float(input("Net Worth: "))

    # Payment History
    user_data['PaymentHistory'] = float(input("Payment History Score: "))
    user_data['UtilityBillsPaymentHistory'] = float(input("Utility Bills Payment History(0.0 - 1.0): "))
    
    return pd.DataFrame([user_data])

# Collect input and make prediction
user_df = get_user_input()
prediction = model.predict(user_df)
print("\nRisk Category:", prediction[0])



===  Predict Risk Category ===

Risk Category: Low Risk
