# Data Preparation

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

from mqt.predictor import ml

np.random.seed(10)

predictor = ml.Predictor()
figure_of_merit = "expected_fidelity"

training_data = predictor.get_prepared_training_data(figure_of_merit=figure_of_merit, save_non_zero_indices=True)

X_train = training_data.X_train
X_test = training_data.X_test
y_train = training_data.y_train
y_test = training_data.y_test
indices_train = training_data.indices_train
indices_test = training_data.indices_test
names_list = training_data.names_list
scores_list = training_data.scores_list

# selected device: no 0
test_scores = [scores_list[i][0] for i in indices_test]
train_scores = [scores_list[i][0] for i in indices_train]

performance = []

In [None]:
print(len(X_train), len(X_test))

# Random Forest

In [None]:
clf = RandomForestRegressor(random_state=0)
tree_param = [
    {
        "n_estimators": [100, 200, 500],
        "criterion": ["gini", "entropy", "log_loss"],
        "max_depth": list(range(8, 30, 6)),
        "min_samples_split": list(range(2, 20, 6)),
        "min_samples_leaf": list(range(2, 20, 6)),
        "min_weight_fraction_leaf": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
        "max_features": [None, "sqrt", "log2"],
        "max_leaf_nodes": [None, 10, 20, 30, 40, 50],
        "min_impurity_decrease": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
        "bootstrap": [True, False],
    },
]
clf = GridSearchCV(clf, tree_param, cv=5, n_jobs=8).fit(X_train, train_scores)

y_pred = np.array(list(clf.predict(X_test)))

In [None]:
clf.best_params_

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(test_scores, y_pred, color="blue", marker="x")
plt.plot([0, 1], [0, 1], color="red", linewidth=2)
plt.legend()
plt.xlim(0, 1)
plt.title("Predicted vs Actual Values")
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.show()

In [None]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# New code to convert the model to ONNX
initial_type = [("float_input", FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(clf, initial_types=initial_type)

# Save the model
with open("model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())