In [420]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import _tree
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
from pathlib import Path
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split

In [421]:
import tensorflow_decision_forests as tfdf
from pathlib import Path
import pandas as pd
import numpy as np

# Function to split dataset into training and testing datasets
def split_dataset(dataset, test_ratio=0.1):
    """Splits a pandas DataFrame into training and testing datasets."""
    test_indices = np.random.rand(len(dataset)) < test_ratio
    return dataset[~test_indices], dataset[test_indices]

# Load the dataset
data = Path('../Resources/clean_fetal_health.csv')
dataset_df = pd.read_csv(data)

# Split the dataset into training and testing datasets
train_ds_pd, test_ds_pd = split_dataset(dataset_df, test_ratio = 0.1)
print("{} examples in training, {} examples for testing.".format(len(train_ds_pd), len(test_ds_pd)))

# Function to duplicate the training dataset
def duplicate_dataset(dataset, num_duplicates=2):
    """Duplicates the training dataset."""
    duplicated_datasets = [dataset] * num_duplicates
    return pd.concat(duplicated_datasets)

# Increase the size of the training dataset by duplicating it
num_duplicates = 1  # Specify how many times you want to duplicate the dataset
train_ds_pd = duplicate_dataset(train_ds_pd, num_duplicates)

# Print the new training size
print("{} examples in new training after duplication.".format(len(train_ds_pd)))

# Convert the training and testing datasets to TensorFlow datasets
label = "fetal_health"
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label=label)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_ds_pd, label=label)

# Specify the model
num_candidate_attributes = min(50, len(train_ds_pd.columns) - 1)

model_1 = tfdf.keras.RandomForestModel(
    num_candidate_attributes=num_candidate_attributes,
    verbose=2, 
    growing_strategy="BEST_FIRST_GLOBAL",
    max_depth=8,
    min_examples=5)

# Train the model
model_1.fit(train_ds)


1910 examples in training, 216 examples for testing.
1910 examples in new training after duplication.
Use 11 thread(s) for training
Use /var/folders/qq/8hq4nv8x5rz6hyggl1ylc_kh0000gn/T/tmpkd9o846c as temporary training directory
Reading training dataset...
Training tensor examples:
Features: {'baseline_FHR_bpm': <tf.Tensor 'data:0' shape=(None,) dtype=float64>, 'accelerations': <tf.Tensor 'data_1:0' shape=(None,) dtype=float64>, 'fetal_movement': <tf.Tensor 'data_2:0' shape=(None,) dtype=float64>, 'uterine_contractions': <tf.Tensor 'data_3:0' shape=(None,) dtype=float64>, 'light_decelerations': <tf.Tensor 'data_4:0' shape=(None,) dtype=float64>, 'severe_decelerations': <tf.Tensor 'data_5:0' shape=(None,) dtype=float64>, 'prolonged_decelerations': <tf.Tensor 'data_6:0' shape=(None,) dtype=float64>, 'abnorm_ST_Var_Perc': <tf.Tensor 'data_7:0' shape=(None,) dtype=float64>, 'mean_ST_Var': <tf.Tensor 'data_8:0' shape=(None,) dtype=float64>, 'abnorm_LT_Var_Perc': <tf.Tensor 'data_9:0' shape=

[INFO 24-04-05 00:42:58.9038 EDT kernel.cc:771] Start Yggdrasil model training
[INFO 24-04-05 00:42:58.9046 EDT kernel.cc:772] Collect training examples
[INFO 24-04-05 00:42:58.9046 EDT kernel.cc:785] Dataspec guide:
column_guides {
  column_name_pattern: "^__LABEL$"
  type: CATEGORICAL
  categorial {
    min_vocab_frequency: 0
    max_vocab_count: -1
  }
}
default_column_guide {
  categorial {
    max_vocab_count: 2000
  }
  discretized_numerical {
    maximum_num_bins: 255
  }
}
ignore_columns_without_guides: false
detect_numerical_as_discretized_numerical: false

[INFO 24-04-05 00:42:58.9048 EDT kernel.cc:391] Number of batches: 2
[INFO 24-04-05 00:42:58.9048 EDT kernel.cc:392] Number of examples: 1910
[INFO 24-04-05 00:42:58.9051 EDT kernel.cc:792] Training dataset:
Number of records: 1910
Number of columns: 22

Number of columns by type:
	NUMERICAL: 21 (95.4545%)
	CATEGORICAL: 1 (4.54545%)

Columns:

NUMERICAL: 21 (95.4545%)
	0: "FHR_hist_max" NUMERICAL mean:164.1 min:122 max:238 

Model trained in 0:00:00.173624
Compiling model...
Model compiled.


<tf_keras.src.callbacks.History at 0x34be6abd0>

In [422]:
model_1.compile(metrics=["accuracy"])
evaluation = model_1.evaluate(test_ds, return_dict=True)
print()

for name, value in evaluation.items():
  print(f"{name}: {value:.4f}")

# Compile the model with metrics
model_1.compile(metrics=["accuracy"])

# Evaluate the model on the test dataset
evaluation = model_1.evaluate(test_ds, return_dict=True)
print()

# Print evaluation metrics
for name, value in evaluation.items():
    print(f"{name}: {value:.4f}")

# Iterate through the test dataset and print prediction probabilities
for features, label in test_ds:
    predictions = model_1.predict(features)  # Get predictions for the current batch
    print("Prediction probabilities:")
    print(predictions)



loss: 0.0000
accuracy: 0.9537

loss: 0.0000
accuracy: 0.9537
Prediction probabilities:
[[0.         0.11666662 0.10666663 0.77666605]
 [0.         0.02       0.03333334 0.9466659 ]
 [0.         0.00333333 0.         0.99666584]
 [0.         0.9466659  0.02       0.03333334]
 [0.         0.         0.12666662 0.8733326 ]
 [0.         0.8499993  0.14999993 0.        ]
 [0.         0.99999917 0.         0.        ]
 [0.         0.81999934 0.1799999  0.        ]
 [0.         0.9933325  0.00666667 0.        ]
 [0.         0.816666   0.18333323 0.        ]
 [0.         0.99999917 0.         0.        ]
 [0.         0.99999917 0.         0.        ]
 [0.         0.00333333 0.         0.99666584]
 [0.         0.77666605 0.2233332  0.        ]
 [0.         0.00666667 0.9933325  0.        ]
 [0.         0.99999917 0.         0.        ]
 [0.         0.826666   0.17333324 0.        ]
 [0.         0.6833328  0.31666645 0.        ]
 [0.         0.99999917 0.         0.        ]
 [0.         0.9999

2024-04-05 00:42:59.438426: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [423]:
# Prepare and load the model with TensorFlow
import tensorflow as tf
import tensorflowjs as tfjs

# Save the model in the SavedModel format
tf.saved_model.save(model_1, "./my_saved_model")

# Convert the SavedModel to TensorFlow.js and save as a zip file
tfjs.converters.tf_saved_model_conversion_v2.convert_tf_saved_model("./my_saved_model", "./tfjs_model")

INFO:tensorflow:Assets written to: ./my_saved_model/assets


INFO:tensorflow:Assets written to: ./my_saved_model/assets


'NoneType' object has no attribute 'name'


[INFO 24-04-05 00:42:59.6572 EDT kernel.cc:1233] Loading model from path ./my_saved_model/assets/ with prefix ff4d3ef6d3d74b56
[INFO 24-04-05 00:42:59.6805 EDT decision_forest.cc:734] Model loaded with 300 root(s), 18300 node(s), and 20 input feature(s).
[INFO 24-04-05 00:42:59.6805 EDT kernel.cc:1061] Use fast generic engine
'NoneType' object has no attribute 'name'


weight StatefulPartitionedCall/random_forest_model_36/StatefulPartitionedCall/RaggedConstant/Const with shape (1,) and dtype int64 was auto converted to the type int32
weight StatefulPartitionedCall/random_forest_model_36/StatefulPartitionedCall/RaggedConstant/Const_1 with shape (1,) and dtype int64 was auto converted to the type int32
