In [None]:
!pip install xgboost scikit-learn pandas numpy


In [None]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Sample dataset
data = {
    'Current Loan Amount': [5000, 15000, 20000, 10000],
    'Term': ['Short', 'Long', 'Short', 'Long'],
    'Credit Score': [650, 700, 750, 600],
    'Years in current job': [2, 5, 10, 3],
    'Home Ownership': ['Rent', 'Own', 'Home Mortgage', 'Rent'],
    'Annual Income': [40000, 60000, 80000, 30000],
    'Monthly Debt': [1000, 1200, 500, 1100],
    'Years of Credit History': [5, 8, 10, 4],
    'Months since last delinquent': [6, 0, 12, 3],
    'Number of Open Accounts': [5, 3, 7, 4],
    'Number of Credit Problems': [1, 0, 0, 3],
    'Current Credit Balance': [2000, 5000, 10000, 1500],
    'Maximum Open Credit': [10000, 20000, 30000, 5000],
    'Bankruptcies': [0, 1, 0, 2],
    'Tax Liens': [0, 0, 0, 1],
    'Risk': [0, 1, 0, 1]  # 0 = No risk, 1 = Risk of fraud/repayment failure
}

# Convert the data into a DataFrame
df = pd.DataFrame(data)

# Preprocessing
# Convert categorical variables to numeric
df['Term'] = df['Term'].map({'Short': 0, 'Long': 1})
df['Home Ownership'] = df['Home Ownership'].map({'Rent': 0, 'Own': 1, 'Home Mortgage': 2})

# Features and labels
X = df.drop(columns=['Risk'])
y = df['Risk']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the XGBoost model
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"XGBoost Model Accuracy: {accuracy * 100:.2f}%")
