<a href="https://colab.research.google.com/github/Trizzole/HelloAI/blob/main/DecisionTree2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow_decision_forests

In [2]:
import numpy as np
import pandas as pd
import tensorflow_decision_forests as tfdf

In [17]:
path = "/content/car_evaluation.csv"
pandas_dataset = pd.read_csv(path)

# Display the first 3 examples.
pandas_dataset.head(3)

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc


In [22]:
label = "unacc"

classes = list(pandas_dataset[label].unique())
print(f"Label classes: {classes}")
# >> Label classes: ['unacc', 'acc', 'good', 'vgood']

pandas_dataset[label] = pandas_dataset[label].map(classes.index)

Label classes: ['unacc', 'acc', 'vgood', 'good']


In [23]:
np.random.seed(1)
# Use the ~10% of the examples as the testing set
# and the remaining ~90% of the examples as the training set.
test_indices = np.random.rand(len(pandas_dataset)) < 0.1
pandas_train_dataset = pandas_dataset[~test_indices]
pandas_test_dataset = pandas_dataset[test_indices]

print("Training examples: ", len(pandas_train_dataset))
# >> Training examples: 309

print("Testing examples: ", len(pandas_test_dataset))
# >> Testing examples: 35

Training examples:  1554
Testing examples:  173


In [24]:
tf_train_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(pandas_train_dataset, label=label)
model = tfdf.keras.CartModel()
model.fit(tf_train_dataset)

Use /tmp/tmpn2tmfjld as temporary training directory
Reading training dataset...
Training dataset read in 0:00:00.440315. Found 1554 examples.
Training model...
Model trained in 0:00:00.092248
Compiling model...
Model compiled.


<keras.callbacks.History at 0x7f8216cb7f70>

In [7]:
tfdf.model_plotter.plot_model_in_colab(model, max_depth=10)

In [25]:
model.compile("accuracy")
print("Train evaluation: ", model.evaluate(tf_train_dataset, return_dict=True))
# >> Train evaluation:  {'loss': 0.0, 'accuracy': 0.96116}

tf_test_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(pandas_test_dataset, label=label)
print("Test evaluation: ", model.evaluate(tf_test_dataset, return_dict=True))
# >> Test evaluation:  {'loss': 0.0, 'accuracy': 0.97142}

Train evaluation:  {'loss': 0.0, 'accuracy': 0.9472329616546631}
Test evaluation:  {'loss': 0.0, 'accuracy': 0.9653179049491882}


In [26]:
!pip install keras-tuner

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [27]:
import keras_tuner as kt

def build_model(hp):
  model = tfdf.keras.CartModel(
      min_examples=hp.Choice("min_examples",
          # Try four possible values for "min_examples" hyperparameter.
          # min_examples=10 would limit the growth of the decision tree,
          # while min_examples=1 would lead to deeper decision trees.
         [1, 2, 5, 10]),
      validation_ratio=hp.Choice("validation_ratio",
         # Three possible values for the "validation_ratio" hyperparameter.
         [0.0, 0.05, 0.10]),
      )
  model.compile("accuracy")
  return model

tuner = kt.RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=10,
    directory="/tmp/tuner",
    project_name="tune_cart")

tuner.search(x=tf_train_dataset, validation_data=tf_test_dataset)
best_model = tuner.get_best_models()[0]

print("Best hyperparameters: ", tuner.get_best_hyperparameters()[0].values)
# >> Best hyperparameters:  {'min_examples': 2, 'validation_ratio': 0.0}

Use /tmp/tmpv5meoojc as temporary training directory
Best hyperparameters:  {'min_examples': 1, 'validation_ratio': 0.05}


In [28]:
model = tfdf.keras.CartModel(min_examples=2, validation_ratio=0.0)
model.fit(tf_train_dataset)

model.compile("accuracy")
print("Test evaluation: ", model.evaluate(tf_test_dataset, return_dict=True))
# >> Test evaluation:  {'loss': 0.0, 'accuracy': 1.0}

Use /tmp/tmpgwr51h2g as temporary training directory
Reading training dataset...
Training dataset read in 0:00:00.747770. Found 1554 examples.
Training model...
Model trained in 0:00:00.221031
Compiling model...
Model compiled.
Test evaluation:  {'loss': 0.0, 'accuracy': 0.9768785834312439}


In [29]:
tfdf.model_plotter.plot_model_in_colab(model, max_depth=10)