In [None]:
# Importing required libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Decision Tree classifier
clf = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Test the model
accuracy = clf.score(X_test, y_test)
print(f"Accuracy: {accuracy:.4f}")

# Visualizing the Decision Tree
plt.figure(figsize=(15, 7))
plot_tree(clf, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.title("Decision Tree Visualization")
plt.show()

In [None]:
#Effect of Maximum Depth

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate dataset
X, y = make_classification(n_samples=500, n_features=2, n_classes=2, random_state=42, n_clusters_per_class=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train decision trees with varying depths
depths = [1, 3, 5, 10]
plt.figure(figsize=(12, 8))

for i, depth in enumerate(depths):
    model = DecisionTreeClassifier(max_depth=depth, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate and plot decision boundary
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Depth {depth}: Accuracy = {acc:.2f}")

    # Plot decision boundary
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.subplot(2, 2, i + 1)
    plt.contourf(xx, yy, Z, alpha=0.8, cmap='coolwarm')
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k', cmap='coolwarm')
    plt.title(f"Max Depth = {depth}")
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")

plt.tight_layout()
plt.show()


In [None]:
 # Decision Tree Classification on Simple Data

from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
import numpy as np

# Generate synthetic dataset
X, y = make_classification(n_samples=200, n_features=2, n_classes=2, random_state=42, n_clusters_per_class=1)

# Train decision tree classifier
model = DecisionTreeClassifier(max_depth=3, random_state=42)
model.fit(X, y)

# Plot decision boundary
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.8, cmap='coolwarm')
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k', cmap='coolwarm')
plt.title("Decision Tree Decision Boundary")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()

# Plot tree structure
plt.figure(figsize=(10, 6))
plot_tree(model, filled=True, feature_names=["Feature 1", "Feature 2"], class_names=["Class 0", "Class 1"])
plt.title("Decision Tree Structure")
plt.show()


In [None]:
#Handling Imbalanced Classes

from sklearn.metrics import ConfusionMatrixDisplay

# Generate an imbalanced dataset
X, y = make_classification(n_samples=1000, n_features=2, n_classes=2, weights=[0.9, 0.1], random_state=42)

# Train decision tree
model = DecisionTreeClassifier(random_state=42)
model.fit(X, y)

# Predict and plot confusion matrix
y_pred = model.predict(X)
ConfusionMatrixDisplay.from_estimator(model, X, y, cmap='Blues')
plt.title("Confusion Matrix for Imbalanced Data")
plt.show()

# Decision boundary for imbalanced data
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.8, cmap='coolwarm')
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k', cmap='coolwarm')
plt.title("Decision Tree Decision Boundary (Imbalanced Data)")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()


In [None]:
# Feature Importance


from sklearn.datasets import make_classification
import pandas as pd

# Generate a dataset with multiple features
X, y = make_classification(n_samples=200, n_features=5, n_classes=2, random_state=42)
feature_names = [f"Feature {i+1}" for i in range(X.shape[1])]

# Train decision tree
model = DecisionTreeClassifier(random_state=42)
model.fit(X, y)

# Extract feature importances
importances = model.feature_importances_
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Plot feature importances
plt.figure(figsize=(8, 6))
plt.bar(importance_df['Feature'], importance_df['Importance'], color='teal')
plt.title("Feature Importance")
plt.xlabel("Features")
plt.ylabel("Importance Score")
plt.show()



# **THE END.**