## Decision tree classifier/regressor

In [2]:

from src import trees

from sklearn.datasets import load_iris
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#### Decision tree classifier example with Iris dataset

In [6]:
# Example with Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Data shapes - X_train: {X_train.shape}, X_test: {X_test.shape},  y_train: {y_train.shape},  y_test: {y_test.shape}\n")

# Create and train the classifier
clf = trees.DecisionTreeClassifier(max_depth=3)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print the tree structure
print("\nTree Structure:\n")
clf.print_tree()



Data shapes - X_train: (120, 4), X_test: (30, 4),  y_train: (120,),  y_test: (30,)

Accuracy: 1.00

Tree Structure:

If Feature[2] <= 1.95:
  Leaf: 0
Else:
  If Feature[2] <= 4.75:
    If Feature[3] <= 1.65:
      Leaf: 1
    Else:
      Leaf: 2
  Else:
    If Feature[3] <= 1.75:
      Leaf: 1
    Else:
      Leaf: 2


#### Decision tree classifier example with Wine dataset

In [7]:
# Example with Wine dataset
wine = load_wine()
X = wine.data
y = wine.target
names = wine.feature_names


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Data shapes - X_train: {X_train.shape}, X_test: {X_test.shape},  y_train: {y_train.shape},  y_test: {y_test.shape}\n")

# Create and train the classifier
clf = trees.DecisionTreeClassifier(max_depth=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print the tree structure
print("\nTree Structure:\n")
clf.print_tree(feature_names=names)



Data shapes - X_train: (142, 13), X_test: (36, 13),  y_train: (142,),  y_test: (36,)

Accuracy: 0.94

Tree Structure:

If color_intensity <= 3.81:
  If proline <= 1002.50:
    If ash <= 2.93:
      Leaf: 1
    Else:
      Leaf: 0
  Else:
    Leaf: 0
Else:
  If flavanoids <= 1.40:
    Leaf: 2
  Else:
    If proline <= 724.50:
      If alcohol <= 13.14:
        Leaf: 1
      Else:
        Leaf: 0
    Else:
      Leaf: 0
