In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report

# 1. Load the Dataset
# We use the Iris dataset, a classic dataset for classification
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Display the first few rows of the data
print("Feature Names:", data.feature_names)
print("First 5 rows of data:\n", X.head())

# 2. Split the Data
# Split arrays or matrices into random train and test subsets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Build the Decision Tree Model
# Initialize the classifier (criterion='gini' is standard for classification)
clf = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# 4. Make Predictions and Evaluate
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=data.target_names))

# 5. Visualize the Decision Tree
plt.figure(figsize=(12, 8))
plot_tree(clf, 
          feature_names=data.feature_names, 
          class_names=data.target_names, 
          filled=True, 
          rounded=True)
plt.title("Decision Tree Visualization")
plt.show()

: 