In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("data.csv")

# Drop irrelevant columns if they exist
columns_to_drop = ["ApplicationDate", "RiskScore"]
df = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

# Create binary RiskLabel if not already in binary form
if 'RiskLabel' not in df.columns:
    def risk_category(score):
        if score < 670:  # High + Medium = Risky
            return "Risky"
        else:
            return "Not Risky"
    df['RiskLabel'] = df['CreditScore'].apply(risk_category)
else:
    df['RiskLabel'] = df['RiskLabel'].replace({
        'High Risk': 'Risky',
        'Medium Risk': 'Risky',
        'Low Risk': 'Not Risky'
    })

# Feature columns
features = [
    'Age', 'Experience', 'JobTenure', 
    'MonthlyIncome', 'SavingsAccountBalance', 'CheckingAccountBalance', 
    'LoanAmount', 'LoanDuration', 'MonthlyLoanPayment', 'BaseInterestRate', 'InterestRate', 
    'MonthlyDebtPayments', 'TotalLiabilities', 'DebtToIncomeRatio', 'TotalDebtToIncomeRatio', 
    'CreditCardUtilizationRate', 'NumberOfOpenCreditLines', 
    'TotalAssets', 'NetWorth',
    'PaymentHistory', 'UtilityBillsPaymentHistory'
]

df = df[features + ['RiskLabel']]

# Split into features and label
X = df[features]
y = df['RiskLabel']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Decision Tree Classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate model
y_pred = model.predict(X_test_scaled)
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Input section for user
def get_user_input():
    """
    Collects user input for features used in prediction.
    Returns:
        pd.DataFrame: Single-row DataFrame containing user inputs.
    """
    user_data = {}

    # Personal & Employment Information
    user_data['Age'] = int(input("Age(Years): "))
    user_data['Experience'] = int(input("Work Experience (years): "))
    user_data['JobTenure'] = int(input("Job Tenure (years): "))

    # Income & Account Balances
    user_data['MonthlyIncome'] = float(input("Monthly Income: "))
    user_data['SavingsAccountBalance'] = float(input("Savings Account Balance: "))
    user_data['CheckingAccountBalance'] = float(input("Checking Account Balance: "))

    # Loan Information
    user_data['LoanAmount'] = float(input("Loan Amount: "))
    user_data['LoanDuration'] = int(input("Loan Duration (months): "))
    user_data['MonthlyLoanPayment'] = float(input("Monthly Loan Payment: "))
    user_data['BaseInterestRate'] = float(input("Base Interest Rate (e.g., 0.05 for 5%): "))
    user_data['InterestRate'] = float(input("Interest Rate (e.g., 0.05): "))
    
    # Debt & Liabilities
    user_data['MonthlyDebtPayments'] = float(input("Monthly Debt Payments: "))
    user_data['TotalLiabilities'] = float(input("Total Liabilities: "))
    user_data['DebtToIncomeRatio'] = float(input("Debt to Income Ratio (e.g., 0.3): "))
    user_data['TotalDebtToIncomeRatio'] = float(input("Total Debt to Income Ratio (e.g., 0.4): "))
    
    # Credit Information
    user_data['CreditCardUtilizationRate'] = float(input("Credit Card Utilization Rate (0 to 1): "))
    user_data['NumberOfOpenCreditLines'] = int(input("Number Of Open Credit Lines: "))

    # Assets & Net Worth
    user_data['TotalAssets'] = float(input("Total Assets: "))
    user_data['NetWorth'] = float(input("Net Worth: "))

    # Payment History
    user_data['PaymentHistory'] = float(input("Payment History Score: "))
    user_data['UtilityBillsPaymentHistory'] = float(input("Utility Bills Payment History(0.0 - 1.0): "))
    
    return pd.DataFrame([user_data])

# Collect input and make prediction
user_df = get_user_input()
user_prediction = model.predict(user_df)[0]
print(f"\n Predicted Credit Score Category: **{user_prediction}**")


Accuracy: 0.995275

Classification Report:
               precision    recall  f1-score   support

   Not Risky       0.69      0.72      0.71       318
       Risky       1.00      1.00      1.00     39682

    accuracy                           1.00     40000
   macro avg       0.85      0.86      0.85     40000
weighted avg       1.00      1.00      1.00     40000


 Predicted Credit Score Category: **Risky**


