# Decision Tree Classifier Playground 

In [5]:
!pip install pandas



In [3]:
import pandas as pd

from decision_tree import DecisionTreeClassifier

## Loading dataset.

Original raw dataset
```csv
#name,legs,color
Lion,4,0,
Monkey,4,1
Parrot,2,2
Snake,0,2
Bear,4,1
```

Normalized dataset (mapped string to int)

```csv
#name,legs,color
0,4,0
1,4,1
2,2,2
3,0,2
4,4,1
```

Class mapping.

```
Lion:0
Monkey:1
Parrot:2
Snake:3
Bear:4
```

In [4]:
# Load the CSV file into a Pandas DataFrame.
df = pd.read_csv('dataset/normalized_animals.csv')

# Split the DataFrame into features (X) and target (y).
X = df[['legs', 'color']].values
y = df['name'].values

## Train DecisionTreeClassifier

In [5]:
clf = DecisionTreeClassifier()
clf.fit(X, y)

In [6]:
# Labels dictionary.
labels = {
    0: 'Lion',
    1: 'Monkey',
    2: 'Parrot',
    3: 'Snake',
    4: 'Bear'
}

def print_cls(cls):
    if cls is None:
        print('Unknown')
    else:
        print(labels[cls])

In [7]:
clf.print_tree(labels)

 - Feature: 0 < 2
  - Class: Snake Leaf: True
  - Feature: 0 < 4
   - Class: Parrot Leaf: True
   - Feature: 1 < 1
    - Class: Lion Leaf: True
    - Class: Unknown Leaf: True


## Test Prediction.

Explainer: the features [4,1] do not appear to be discriminative predictors for distinguishing between `Monkey` and `Bear` classes. This means that the values of these features do not appear to have a strong correlation with the target class labels, and using them to make predictions may not result in accurate classification.

In [11]:
test_cases = [
    [4, 0],
    [4, 1],
    [2, 2],
    [0, 2],
    [4, 1]
]
for features in test_cases:
    predictions = clf.predict(features)
    print_cls(predictions)

Lion
Unknown
Parrot
Snake
Unknown


In [18]:
def print_tree(node, depth=0):
    if node['leaf']:
        label_idx = node['label']
        label = labels[label_idx] if label_idx is not None else 'Unknown'
        print(f"{depth * ' '} - Class: {label} Leaf: {node['leaf']}")
    else:
        print(f"{depth * ' '} - Feature: {node['feature_idx']} < {node['threshold']}")
        print_tree(node['left'], depth + 1)
        print_tree(node['right'], depth + 1)

In [19]:
#name,legs,color
# Lion,4,0,
# Monkey,4,1
# Parrot,2,2
# Snake,0,2
# Bear,4,1
print_tree(clf.tree)

 - Feature: 0 < 2
  - Class: Snake Leaf: True
  - Feature: 0 < 4
   - Class: Parrot Leaf: True
   - Feature: 1 < 1
    - Class: Lion Leaf: True
    - Class: Unknown Leaf: True
