In [6]:
# ---------------------------------------------
# 1. Import Libraries
# ---------------------------------------------
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# ---------------------------------------------
# 2. Load & Preprocess the Dataset
# ---------------------------------------------
df = pd.read_csv("data.csv")

# Drop irrelevant columns
columns_to_drop = ["ApplicationDate", "RiskScore"]
df = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

# Generate 3-class RiskLabel from CreditScore
if 'RiskLabel' not in df.columns:
    def get_risk_label(score):
        if score < 580:
            return 'High Risk'
        elif score < 670:
            return 'Medium Risk'
        else:
            return 'Low Risk'
    df['RiskLabel'] = df['CreditScore'].apply(get_risk_label)

# Define feature columns (EXCLUDE CreditScore to prevent leakage)
features = ['Age', 'Experience', 'JobTenure', 
            'MonthlyIncome', 'SavingsAccountBalance', 'CheckingAccountBalance', 
            'LoanAmount', 'LoanDuration', 'MonthlyLoanPayment', 'BaseInterestRate', 'InterestRate', 
            'MonthlyDebtPayments', 'TotalLiabilities', 'DebtToIncomeRatio', 'TotalDebtToIncomeRatio', 
            'CreditCardUtilizationRate', 'NumberOfOpenCreditLines', 
            'TotalAssets', 'NetWorth',
            'PaymentHistory', 'UtilityBillsPaymentHistory']

df = df[features + ['RiskLabel']]

# ---------------------------------------------
# 3. Split Dataset
# ---------------------------------------------
X = df[features]
y = df['RiskLabel']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ---------------------------------------------
# 4. Train Model
# ---------------------------------------------
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# ---------------------------------------------
# 5. Evaluate Model
# ---------------------------------------------
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ---------------------------------------------
# 6. User Input Prediction (Only Risk Category)
# ---------------------------------------------
def get_user_input():
    user_data = {}
    user_data['Age'] = int(input("Age (Years): "))
    user_data['Experience'] = int(input("Work Experience (years): "))
    user_data['JobTenure'] = int(input("Job Tenure (years): "))

    # Income & Account Balances
    user_data['MonthlyIncome'] = float(input("Monthly Income: "))
    user_data['SavingsAccountBalance'] = float(input("Savings Account Balance: "))
    user_data['CheckingAccountBalance'] = float(input("Checking Account Balance: "))

    # Loan Info
    user_data['LoanAmount'] = float(input("Loan Amount: "))
    user_data['LoanDuration'] = int(input("Loan Duration (months): "))
    user_data['MonthlyLoanPayment'] = float(input("Monthly Loan Payment: "))
    user_data['BaseInterestRate'] = float(input("Base Interest Rate (0.05 for 5%): "))
    user_data['InterestRate'] = float(input("Interest Rate (e.g., 0.08): "))

    # Debt Info
    user_data['MonthlyDebtPayments'] = float(input("Monthly Debt Payments: "))
    user_data['TotalLiabilities'] = float(input("Total Liabilities: "))
    user_data['DebtToIncomeRatio'] = float(input("Debt to Income Ratio (e.g., 0.3): "))
    user_data['TotalDebtToIncomeRatio'] = float(input("Total Debt to Income Ratio (e.g., 0.4): "))

    # Credit Behavior
    user_data['CreditCardUtilizationRate'] = float(input("Credit Card Utilization Rate (0 to 1): "))
    user_data['NumberOfOpenCreditLines'] = int(input("Number of Open Credit Lines: "))

    # Assets
    user_data['TotalAssets'] = float(input("Total Assets: "))
    user_data['NetWorth'] = float(input("Net Worth: "))

    # Payment History
    user_data['PaymentHistory'] = float(input("Payment History Score: "))
    user_data['UtilityBillsPaymentHistory'] = float(input("Utility Bills Payment History (0.0 - 1.0): "))

    return pd.DataFrame([user_data])

# Predict and show risk category
user_df = get_user_input()
prediction = model.predict(user_df)
print("\nRisk Category:", prediction[0])


Accuracy: 0.98445

Classification Report:
               precision    recall  f1-score   support

   High Risk       0.99      0.99      0.99     20532
    Low Risk       0.74      0.73      0.73       318
 Medium Risk       0.98      0.98      0.98     19150

    accuracy                           0.98     40000
   macro avg       0.90      0.90      0.90     40000
weighted avg       0.98      0.98      0.98     40000


Risk Category: Low Risk
