In [None]:
import pandas as pd

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.tree import DecisionTreeClassifier, plot_tree

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

In [None]:
import matplotlib.pyplot as plt

In [None]:
import seaborn as sns

In [None]:
try:
    df = pd.read_csv('data.csv')
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: 'data.csv' not found. Please ensure the file is in the correct directory.")
    exit()


In [None]:
print("\n--- Initial Data Inspection ---")
print("First 5 rows of the dataset:")
print(df.head())
print("\nDataset Info:")
df.info()
print("\nValue counts for 'species':")
print(df['species'].value_counts())

In [None]:
X = df.drop('species', axis=1)
y = df['species']

In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)
target_names = le.classes_

In [None]:
print(f"\nOriginal species labels: {target_names}")
print(f"Encoded species labels (first 5): {y_encoded[:5]}")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded)

In [None]:
print(f"\nTraining set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

In [None]:
print("\n--- Training Initial (Unpruned) Decision Tree ---")
dt_classifier_unpruned = DecisionTreeClassifier(random_state=42)
dt_classifier_unpruned.fit(X_train, y_train)

In [None]:
print("Unpruned Decision Tree model trained successfully.")

In [None]:
y_pred_unpruned = dt_classifier_unpruned.predict(X_test)
accuracy_unpruned = accuracy_score(y_test, y_pred_unpruned)
f1_unpruned = f1_score(y_test, y_pred_unpruned, average='weighted') # Use 'weighted' for multi-class F1
conf_matrix_unpruned = confusion_matrix(y_test, y_pred_unpruned)
class_report_unpruned = classification_report(y_test, y_pred_unpruned, target_names=target_names)

In [None]:
print(f"\nUnpruned Model Accuracy: {accuracy_unpruned:.4f}")
print(f"Unpruned Model F1-Score (weighted): {f1_unpruned:.4f}")
print("\nUnpruned Model Confusion Matrix:\n", conf_matrix_unpruned)
print("\nUnpruned Model Classification Report:\n", class_report_unpruned)

In [None]:
print("\n--- Visualizing Unpruned Tree ---")
plt.figure(figsize=(20, 15))
plot_tree(dt_classifier_unpruned,
          feature_names=X.columns.tolist(),
          class_names=target_names,
          filled=True,
          rounded=True,
          fontsize=10)
plt.title("Unpruned Decision Tree for Iris Classification", fontsize=16)
plt.show()
print("Unpruned tree visualization displayed.")

In [None]:
print("\n--- Pruning the Decision Tree ---")
print("\nPruning with max_depth=3:")
dt_classifier_pruned_depth = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_classifier_pruned_depth.fit(X_train, y_train)

In [None]:
y_pred_pruned_depth = dt_classifier_pruned_depth.predict(X_test)
accuracy_pruned_depth = accuracy_score(y_test, y_pred_pruned_depth)
f1_pruned_depth = f1_score(y_test, y_pred_pruned_depth, average='weighted')

In [None]:
print(f"Pruned (max_depth=3) Model Accuracy: {accuracy_pruned_depth:.4f}")
print(f"Pruned (max_depth=3) Model F1-Score (weighted): {f1_pruned_depth:.4f}")
print("\nPruned (max_depth=3) Model Classification Report:\n",
      classification_report(y_test, y_pred_pruned_depth, target_names=target_names))

In [None]:
plt.figure(figsize=(15, 10))
plot_tree(dt_classifier_pruned_depth,
          feature_names=X.columns.tolist(),
          class_names=target_names,
          filled=True,
          rounded=True,
          fontsize=10)
plt.title("Decision Tree Pruned with max_depth=3", fontsize=16)
plt.show()
print("Depth-pruned tree visualization displayed.")

In [None]:
print("\nPruning with ccp_alpha (finding optimal alpha):")

In [None]:
path = dt_classifier_unpruned.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities

In [None]:
clfs = []
for ccp_alpha in ccp_alphas:
    clf = DecisionTreeClassifier(random_state=42, ccp_alpha=ccp_alpha)
    clf.fit(X_train, y_train)
    clfs.append(clf)
