# Data Preparation

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import glob
from qiskit import QuantumCircuit
from mqt.predictor import ml
from mqt.predictor import reward

# Read in quantum circuits and (calculate) scores
X, Y = [], []
for qasm_file in glob.glob("/Users/di93vaw/mqt-predictor/src/mqt/predictor/ml/training_data/training_circuits/*.qasm"):
    qc = QuantumCircuit.from_qasm_file(qasm_file)
    feat_dict = ml.helper.create_feature_dict(qc)
    score = reward.crit_depth(qc)
    X.append(list(feat_dict.values()))
    Y.append(score)

# Split data into training and test set
X_train, X_test = X[:int(len(X)*0.8)], X[int(len(X)*0.8):]
Y_train, Y_test = Y[:int(len(Y)*0.8)], Y[int(len(Y)*0.8):]

In [None]:
print(len(X_train), len(X_test))

# Random Forest

In [None]:
clf = RandomForestRegressor(random_state=0)
tree_param = [
    {
        "n_estimators": [100, 200, 500],
        "max_depth": list(range(8, 30, 6)),
        "min_samples_split": list(range(2, 20, 6)),
        "min_samples_leaf": list(range(2, 20, 6)),
        "bootstrap": [True, False],
    },
]

clf = GridSearchCV(clf, tree_param, cv=3, n_jobs=-1).fit(X_train, Y_train)

y_pred = np.array(list(clf.predict(X_test)))

In [None]:
clf.best_params_

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(Y_test, y_pred, color="blue", marker="x")
plt.plot([0, 1], [0, 1], color="red", linewidth=2)
plt.legend()
plt.xlim(0, 1)
plt.title("Predicted vs Actual Values")
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.show()

In [None]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# New code to convert the model to ONNX
initial_type = [("float_input", FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(clf, initial_types=initial_type)

# Save the model
with open("model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())