<a href="https://colab.research.google.com/github/Yashasvi-30/Predictive-Analysis/blob/main/DecisionTree-8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
# Replace 'loan_default_data.csv' with your dataset path
data = pd.read_csv('loan_default_data.csv')

# Overview of the dataset
print("Dataset Overview:")
print(data.head())
print("\nData Summary:")
print(data.info())

# Handle missing values (if any)
data.fillna(data.median(numeric_only=True), inplace=True)  # For numerical columns
data.fillna(data.mode().iloc[0], inplace=True)            # For categorical columns

# Feature engineering
# Convert categorical variables to numerical (if applicable)
data = pd.get_dummies(data, drop_first=True)

# Split dataset into features (X) and target (y)
# Replace 'default' with your target column
X = data.drop('default', axis=1)
y = data['default']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Build a Decision Tree Model
decision_tree = DecisionTreeClassifier(criterion='gini', max_depth=4, random_state=42)
decision_tree.fit(X_train, y_train)

# Predict on test data
y_pred = decision_tree.predict(X_test)

# Evaluate the model
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Visualizing the Decision Tree
plt.figure(figsize=(20, 10))
plot_tree(decision_tree, feature_names=X.columns, class_names=['No Default', 'Default'], filled=True, rounded=True, fontsize=10)
plt.title("Decision Tree Visualization")
plt.show()

# Feature Importance
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': decision_tree.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nFeature Importance:")
print(feature_importance)

# Visualize Feature Importance
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance)
plt.title('Feature Importance')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()
