combining

https://github.com/hchiam/decision-forests#usage-example

and

https://www.tensorflow.org/decision_forests/tutorials/beginner_colab
https://colab.research.google.com/drive/1y5JQxgP2eQKPno5rhWEP-pEiGgy1NByO#scrollTo=BRKLWIWNuOZ1

The hidden code cell below lets you limit the output height in colab with

`%set_cell_height 100`

In [2]:
#@title

from IPython.core.magic import register_line_magic
from IPython.display import Javascript
from IPython.display import display as ipy_display

# Some of the model training logs can cover the full
# screen if not compressed to a smaller viewport.
# This magic allows setting a max height for a cell.
@register_line_magic
def set_cell_height(size):
  ipy_display(
      Javascript("google.colab.output.setIframeHeight(0, true, {maxHeight: " +
                 str(size) + "})"))

In [21]:
# silence output:
%%capture

!pip install tensorflow
!pip install tensorflow_decision_forests
!pip install pandas
!pip install numpy
!pip install 'tensorflowjs>=4.4.0'

import tensorflow as tf
import tensorflow_decision_forests as tfdf
import pandas as pd
import numpy as np
import tensorflowjs as tfjs
from google.colab import files


The following cell is just setup to get the CSV data into `train_ds` and `test_ds` and set up `label`:

In [9]:
# Download the dataset
!wget -q https://storage.googleapis.com/download.tensorflow.org/data/palmer_penguins/penguins.csv -O /tmp/penguins.csv

# Load a dataset into a Pandas Dataframe.
dataset_df = pd.read_csv("/tmp/penguins.csv")

# Display the first 3 examples.
dataset_df.head(3)

# Split the dataset into a training and a testing dataset.

def split_dataset(dataset, test_ratio=0.30):
  """Splits a panda dataframe in two."""
  test_indices = np.random.rand(len(dataset)) < test_ratio
  return dataset[~test_indices], dataset[test_indices]


train_ds_pd, test_ds_pd = split_dataset(dataset_df)
print("{} examples in training, {} examples for testing.".format(
    len(train_ds_pd), len(test_ds_pd)))

label='species'
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label=label)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_ds_pd, label=label)

250 examples in training, 94 examples for testing.


In [16]:
%set_cell_height 400

# # Load the dataset in a Pandas dataframe.
# train_df = pd.read_csv("project/train.csv")
# test_df = pd.read_csv("project/test.csv")

# # Convert the dataset into a TensorFlow dataset.
# train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label="my_label")
# test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_df, label="my_label")

# Train the model
model = tfdf.keras.RandomForestModel()
model.fit(train_ds)

# Look at the model.
model.summary()

# Evaluate the model.
model.evaluate(test_ds)

<IPython.core.display.Javascript object>

Use /tmp/tmp9cc06pxn as temporary training directory
Reading training dataset...




Training dataset read in 0:00:00.236018. Found 250 examples.
Training model...
Model trained in 0:00:00.084901
Compiling model...




Model compiled.




Model: "random_forest_model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
Total params: 1 (1.00 Byte)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 1 (1.00 Byte)
_________________________________________________________________
Type: "RANDOM_FOREST"
Task: CLASSIFICATION
Label: "__LABEL"

Input Features (7):
	bill_depth_mm
	bill_length_mm
	body_mass_g
	flipper_length_mm
	island
	sex
	year

No weights

Variable Importance: INV_MEAN_MIN_DEPTH:
    1.    "bill_length_mm"  0.476350 ################
    2. "flipper_length_mm"  0.406154 ###########
    3.     "bill_depth_mm"  0.332945 #####
    4.            "island"  0.309721 ####
    5.       "body_mass_g"  0.283886 ##
    6.               "sex"  0.252181 
    7.              "year"  0.251575 

Variable Importance: NUM_AS_ROOT:
    1.    "bill_length_mm" 124.000000 ################
    2. "flipper_length_mm" 122.000000 ###############
    3.    





0.0

In [18]:
# so it's easier to tell that the colab actually saved a new project/model
!rm -rf project/model

In [24]:
# # Export to a TensorFlow SavedModel.
# # Note: the model is compatible with Yggdrasil Decision Forests.
# model.save("project/model")

In [23]:
# Save the model in the SavedModel format
tf.saved_model.save(model, "./saved_model")

# Convert the SavedModel to TensorFlow.js and save as a zip file
tfjs.converters.tf_saved_model_conversion_v2.convert_tf_saved_model("./saved_model", "./tfjs_model")

# Download the converted TFJS model
!zip -r tfjs_model.zip tfjs_model/
files.download("tfjs_model.zip")

'NoneType' object has no attribute 'name'


weight StatefulPartitionedCall/random_forest_model_4/StatefulPartitionedCall/RaggedConstant/Const with shape (1,) and dtype int64 was auto converted to the type int32
weight StatefulPartitionedCall/random_forest_model_4/StatefulPartitionedCall/RaggedConstant/Const_1 with shape (1,) and dtype int64 was auto converted to the type int32
  adding: tfjs_model/ (stored 0%)
  adding: tfjs_model/group1-shard1of1.bin (deflated 36%)
  adding: tfjs_model/model.json (deflated 91%)
  adding: tfjs_model/assets.zip (stored 0%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [39]:
example_penguin = pd.DataFrame({
    'island': tf.constant(["Torgersen"]),
    'bill_length_mm': tf.constant([39.1]),
    'bill_depth_mm': tf.constant([17.3]),
    'flipper_length_mm': tf.constant([3.1]),
    'body_mass_g': tf.constant([1000.0]),
    'sex': tf.constant(["Female"]),
    'year': tf.constant([2007], dtype=tf.int32),
    'label': [0],
})

tf_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(example_penguin, label="label")

for features, label in tf_dataset:
  print("Features:",features)
  print("label:", label)

predictions = model.predict(tf_dataset, verbose=0)

print(predictions)

Features: {'island': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Torgersen'], dtype=object)>, 'bill_length_mm': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([39.1], dtype=float32)>, 'bill_depth_mm': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([17.3], dtype=float32)>, 'flipper_length_mm': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([3.1], dtype=float32)>, 'body_mass_g': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([1000.], dtype=float32)>, 'sex': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Female'], dtype=object)>, 'year': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([2007], dtype=int32)>}
label: tf.Tensor([0], shape=(1,), dtype=int64)
[[0.9933325  0.00666667 0.        ]]


In [53]:
index = tf.math.argmax(predictions, axis=1).numpy()[0]
print(f'Predicted index: {index}')
labelClasses = ["Adelie", "Gentoo", "Chinstrap"]
print(f'Predicted class: {labelClasses[index]}')

Predicted index: 0
Predicted class: Adelie
