# Random Forest with ZKML


## Create and export model via TF -> TFLite

In [None]:
# check if notebook is in colab
try:
    import google.colab
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "onnx"])
    subprocess.check_call([sys.executable, "-m", "pip", "install", "hummingbird-ml"])
except:
    pass

import tensorflow as tf
import os
import sys
from sklearn.ensemble import RandomForestClassifier as Rf
import pandas as pd
from torch import nn
import numpy as np
sys.path.insert(0, "../../..")
from zkml.python.converter import Converter
from sklearn.model_selection import train_test_split

MODEL_NAME="random_forest"
TFLITE_PATH = os.path.join(os.getcwd(), MODEL_NAME + ".tflite")
OUT_MODEL_PATH= os.path.join(os.getcwd(), MODEL_NAME + "_model.msgpack")
OUT_CONFIG_PATH= os.path.join(os.getcwd(), MODEL_NAME + "_config.json")
OUT_INPUT_NPY_PATH= os.path.join(os.getcwd(), MODEL_NAME + "_input.npy")
OUT_INPUT_MSG_PATH = os.path.join(os.getcwd(), MODEL_NAME + "_input.msgpack")
PB_PATH = os.path.join(os.getcwd(), MODEL_NAME + ".pb")
ONNX_PATH = os.path.join(os.getcwd(), MODEL_NAME + ".onnx")
INPUT_CONVERTER_PATH = os.path.join(os.getcwd(), "../../src/zkml/python/input_converter.py")
TIME_CIRCUIT_PATH= os.path.join(os.getcwd(), "../../src/zkml/target/release/time_circuit")
filepath_iris_input = "iris_input_data.csv"
filepath_iris_classes = "iris_classes.csv"
# Install TF-DF
%pip install tensorflow tensorflow_decision_forests

# Load TF-DF
import tensorflow_decision_forests as tfdf
import pandas as pd

# Load a dataset in a Pandas dataframe.
iris_data = pd.read_csv(filepath_iris_input).values.astype(np.float32)
iris_labels = pd.read_csv(filepath_iris_classes).squeeze().values

# Split the dataset into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(iris_data, iris_labels)
# Convert X_train and X_test to pandas DataFrames
X_train = pd.DataFrame(X_train, columns=["sepal.length", "sepal.width", "petal.length", "petal.width"])
X_train['variety'] = y_train
X_test = pd.DataFrame(X_test, columns=["sepal.length", "sepal.width", "petal.length", "petal.width"])
X_test['variety'] = y_test


# Convert the dataset into a TensorFlow dataset.
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(X_train, label="variety")
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(X_test, label="variety")

# Train a Random Forest model.
model = tfdf.keras.RandomForestModel()
model.fit(train_ds)

# Summary of the model structure.
model.summary()

# Evaluate the model.
model.evaluate(test_ds)

# Export the model to TFLite
model.save(MODEL_NAME, save_format="tf")
converter = tf.lite.TFLiteConverter.from_saved_model(MODEL_NAME)
try:
    tflite_model = converter.convert()
except ConverterError as e:
    print(f"Conversion failed with error: {e}")
    tflite_model = None

# Save the TFLite model
with open(TFLITE_PATH, 'wb') as f:
    f.write(tflite_model)


## Convert model to msgpack using ZKML

In [None]:
converter = Converter(model_path=TFLITE_PATH, expose_output=False, commit=False, scale_factor=7, k=6, num_cols=2, num_randoms=64, use_selectors=True)

model_packed, config_packed = converter.to_msgpack(
  start_layer=0,
  end_layer=10000,
)
if model_packed is None:
  raise Exception('Failed to convert model')
with open(OUT_MODEL_PATH, 'wb') as f:
  f.write(model_packed)
with open(OUT_CONFIG_PATH, 'wb') as f:
  f.write(config_packed)

## Create msgpack from input

In [None]:
with open (OUT_INPUT_NPY_PATH, 'wb') as f:
    np.save(f, np.array(x[0]).reshape(-1, 1))

!python {INPUT_CONVERTER_PATH} --model_config {OUT_MODEL_PATH} --inputs {OUT_INPUT_NPY_PATH} --output {OUT_INPUT_MSG_PATH}


## Generate the proof

In [None]:
#!../../src/zkml/target/release/time_circuit {OUT_MODEL_PATH} {OUT_INPUT_MSG_PATH} kzg
!../../src/zkml/target/release/test_circuit {OUT_MODEL_PATH} {OUT_INPUT_MSG_PATH} kzg
