In [None]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import torch
from joblib import parallel_config
from sklearn.model_selection import GridSearchCV
from torch_geometric.data import Dataset
from torch_geometric.utils import to_networkx

from mqt.predictor import ml
from mqt.predictor.ml import GNNClassifier, MultiGNNClassifier

predictor = ml.Predictor()
figure_of_merit = "critical_depth"  # "fidelity"
color1 = "#21918c"
color2 = "#440154"

Create training data

In [None]:
from pathlib import Path

source_path = Path("/home/ubuntu/mqt/mqt-predictor/src/mqt/predictor/ml/training_data/training_circuits/")
target_path = Path("/home/ubuntu/mqt/mqt-predictor/src/mqt/predictor/ml/training_data/training_circuits_compiled")

# uncomment only on first run

# with parallel_config(backend="threading", n_jobs=-1):
#    training_data, name_list, scores_list = predictor.generate_trainingdata_from_qasm_files(
#        figure_of_merit, path_uncompiled_circuits=source_path, path_compiled_circuits=target_path
#    )
# ml.helper.save_training_data(training_data, name_list, scores_list, figure_of_merit)

In [None]:
training_data = predictor.get_prepared_training_data(
    figure_of_merit=figure_of_merit, save_non_zero_indices=True, graph_only=True
)

X_train = training_data.X_train
X_test = training_data.X_test
y_train = training_data.y_train
y_test = training_data.y_test
indices_train = training_data.indices_train
indices_test = training_data.indices_test
names_list = training_data.names_list
scores_list = training_data.scores_list

scores_filtered = [scores_list[i] for i in indices_test]
names_filtered = [names_list[i] for i in indices_test]

performance = []

In [None]:
def plot_hisogram(y1, y2, y1_label="y1", y2_label="y2"):
    plt.figure(figsize=(10, 5))
    plt.hist(y1, bins=30, alpha=0.5, label=y1_label)
    plt.hist(y2, bins=30, alpha=0.5, label=y2_label)
    plt.xlabel("Value")
    plt.ylabel("Frequency")
    plt.legend(loc="upper right")
    plt.show()


plot_hisogram(y_train, y_test, "y_train", "y_test")

# Accuracy of a classifier that only learns probability distribution

In [None]:
class_counts = np.bincount(y_train)
relative_frequencies = class_counts / len(y_train)

num_iterations = 1000
scores = []

for _ in range(num_iterations):
    # Sample instances according to the relative frequencies in y_train
    samples = np.random.choice(7, size=len(y_test), p=relative_frequencies)

    pred = torch.tensor(samples)
    labels = torch.tensor(y_test)
    correct = pred.eq(labels).sum().item()
    total = len(y_test)
    scores.append(int(correct) / total)

# Calculate the average score
average_score = sum(scores) / num_iterations
print(average_score)

In [None]:
class MyDataset(Dataset):
    def __init__(self, data_list, scores_list, transform=None, pre_transform=None):
        super().__init__(".", transform, pre_transform)
        self.data = data_list
        for X, score in zip(self.data, scores_list):
            y = torch.tensor(score)
            y[y == -1] = 0  # NOTE: score 0,1 might be easier to predict -> sigmoid
            X.y = y.float()

    @property
    def raw_file_names(self):
        return []

    @property
    def processed_file_names(self):
        return []

    def len(self):
        return len(self.data)

    def get(self, idx):
        return self.data[idx]

    def process(self):
        pass


# Create the dataset
train_dataset = MyDataset(X_train, [scores_list[i] for i in indices_train])
test_dataset = MyDataset(X_test, [scores_list[i] for i in indices_test])

In [None]:
num2ops = {
    0: "u3",
    1: "u2",
    2: "u1",
    3: "cx",
    4: "id",
    5: "u0",
    6: "u",
    7: "p",
    8: "x",
    9: "y",
    10: "z",
    11: "h",
    12: "s",
    13: "sdg",
    14: "t",
    15: "tdg",
    16: "rx",
    17: "ry",
    18: "rz",
    19: "sx",
    20: "sxdg",
    21: "cz",
    22: "cy",
    23: "swap",
    24: "ch",
    25: "ccx",
    26: "cswap",
    27: "crx",
    28: "cry",
    29: "crz",
    30: "cu1",
    31: "cp",
    32: "cu3",
    33: "csx",
    34: "cu",
    35: "rxx",
    36: "rzz",
    37: "rccx",
    38: "rc3x",
    39: "c3x",
    40: "c3sqrtx",
    41: "c4x",
    42: "msr",
}

In [None]:
def draw_graph(data):
    plt.figure(figsize=(10, 5))  # Create a new figure with a specified size (10x10)

    G = to_networkx(data, to_undirected=True)  # Convert to a networkx graph
    G = nx.Graph(G)  # Convert to a simple graph

    # Create a dictionary of node labels
    ops = {node: data.x[node].item() for node in G.nodes()}
    node_labels = {node: num2ops[ops[node]] for node in G.nodes()}

    # Use planar layout with specified node order
    pos = nx.planar_layout(G)

    nx.draw(G, pos, labels=node_labels, arrows=False)

    edge_labels = []
    for attr in data.edge_attr:
        if attr == 1:
            edge_labels += ["ctrl"]
        else:
            edge_labels += ["target"]

    # Create a dictionary of edge labels
    edge_labels_dict = dict(zip(G.edges(), edge_labels))

    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels_dict)

    plt.show()


draw_graph(X_train[0])

# GNN Classifier

In [None]:
clf = GNNClassifier(
    num_node_categories=43,  # distinct gate types (incl. 'id' and 'meas')
    num_edge_categories=2,  # wire features (control/target)
    output_dim=7,  # number of classes (devices)
)

param_grid = [
    # {
    #    "model": ["TransformerConv"],
    #    #--------------------------------
    #    "optimizer": ["adam"],
    #    "learning_rate": [1e-3],
    #    "batch_size": [32],
    #    "epochs": [25],
    #    #--------------------------------
    #    "node_embedding_dim": [4],
    #    "edge_embedding_dim": [None],
    #    "num_layers": [4],
    #    "hidden_dim": [4],
    #    "dropout": [0.],
    #    "batch_norm": [False],
    #    "activation": ["relu"],
    #    "readout": ["node-attention"],
    #    "heads": [1, 2],
    #    "concat": [True],
    #    "beta": [False],
    #    "bias": [True],
    #    "root_weight": [True],
    # },
    {
        "model": ["GAT"],
        # --------------------------------
        "optimizer": ["adam"],
        "learning_rate": [1e-3],
        "batch_size": [32],
        "epochs": [25],
        # --------------------------------
        "node_embedding_dim": [None, 4],
        "edge_embedding_dim": [None],
        "num_layers": [2, 3],
        "hidden_dim": [4, 8],
        "dropout": [0.0, 0.33],
        "batch_norm": [False, True],
        "activation": ["relu"],
        "readout": ["node-attention", "feat-attention"],
        "jk": [None],
        "v2": [True, False],
    },
]
with parallel_config(backend="threading", n_jobs=-1):
    clf = GridSearchCV(clf, param_grid, cv=3, n_jobs=-1, verbose=3).fit(train_dataset)

In [None]:
clf.best_params_

In [None]:
clf_best_params_ = {
    "activation": "relu",
    "batch_norm": True,
    "batch_size": 32,
    "dropout": 0.33,
    "edge_embedding_dim": None,
    "epochs": 25,
    "hidden_dim": 4,
    "jk": None,
    "learning_rate": 0.001,
    "model": "GAT",
    "node_embedding_dim": None,
    "num_layers": 3,
    "optimizer": "adam",
    "readout": "feat-attention",
    "v2": False,
}

Train GNN on most promising parameter settings

In [None]:
best_params_ = {
    "optimizer": "adam",
    "learning_rate": 1e-3,
    "batch_size": 64,
    "epochs": 250,
    "num_node_categories": 43,
    "num_edge_categories": 2,
    "node_embedding_dim": None,
    "edge_embedding_dim": None,
    "num_layers": 2,
    "hidden_dim": 4,
    "output_dim": 7,
    "dropout": 0.0,
    "batch_norm": False,
    "activation": "relu",
    "readout": "node-attention",
    "heads": 2,
    "concat": True,
    "beta": 1.0,
    "bias": False,
    "root_weight": False,
    "model": "GAT",
    "jk": "last",
    "v2": True,
}

best_params_.update({"epochs": 50})
gnn = MultiGNNClassifier(**best_params_)

gnn.fit(train_dataset)
gnn.score(test_dataset)

In [None]:
gnn.get_params()

In [None]:
y_pred = np.array(list(clf.predict(test_dataset)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="GNNClassifier", color=color1)

rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)

print("Best Accuracy: ", clf.best_score_)  # averaged over N folds
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("GNN", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

# GNN Classifier (one GNN for each class/device)

In [None]:
clf = MultiGNNClassifier(
    num_node_categories=43,  # distinct gate types (incl. 'id' and 'meas')
    num_edge_categories=2,  # wire features (control/target)
    output_dim=7,  # number of classes (devices)
)

param_grid = {
    "optimizer": ["adam"],
    "learning_rate": [1e-3],
    "batch_size": [32],
    "epochs": [25],
    "node_embedding_dim": [4],
    "edge_embedding_dim": [None],
    "num_layers": [3],
    "hidden_dim": [4],
    "dropout": [0.33],
    "batch_norm": [True],
    "activation": ["relu"],
    "readout": ["attention"],
}

clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=-1).fit(train_dataset)

y_pred = np.array(list(clf.predict(test_dataset)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="MLPClassifier", color=color1)

In [None]:
rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("MultiGNN", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

# Data Preparation

In [None]:
import numpy as np
from sklearn import svm, tree
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from mqt.predictor import ml

np.random.seed(10)

predictor = ml.Predictor()
figure_of_merit = "critical_depth"

training_data = predictor.get_prepared_training_data(figure_of_merit=figure_of_merit, save_non_zero_indices=True)

X_train = training_data.X_train
X_test = training_data.X_test
y_train = training_data.y_train
y_test = training_data.y_test
indices_train = training_data.indices_train
indices_test = training_data.indices_test
names_list = training_data.names_list
scores_list = training_data.scores_list

scores_filtered = [scores_list[i] for i in indices_test]
names_filtered = [names_list[i] for i in indices_test]

performance = []

In [None]:
color1 = "#21918c"
color2 = "#440154"

In [None]:
print(X_train)

In [None]:
print(len(X_train), len(X_test))

# Random Forest

In [None]:
clf = RandomForestClassifier(random_state=0)
tree_param = [
    {
        "n_estimators": [100, 200, 500],
        "max_depth": list(range(8, 30, 6)),
        "min_samples_split": list(range(2, 20, 6)),
        "min_samples_leaf": list(range(2, 20, 6)),
        "bootstrap": [True, False],
    },
]
clf = GridSearchCV(clf, tree_param, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="RandomForestClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)
print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Feature Importance: ", clf.best_estimator_.feature_importances_)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Random Forest", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

In [None]:
predictor.plot_eval_all_detailed_compact_normed(
    names_filtered, scores_filtered, y_pred, y_test, color_all=color1, color_pred=color2
)

In [None]:
clf.best_estimator_.get_params()

### Feature Importances

In [None]:
path = ml.helper.get_path_trained_model(figure_of_merit, return_non_zero_indices=True)
non_zero_indices = np.load(str(path), allow_pickle=True)

openqasm_qc_list = ml.helper.get_openqasm_gates()
feature_names = [openqasm_qc_list[i] for i in range(len(openqasm_qc_list))]
feature_names.append("num qubits")
feature_names.append("depth")
feature_names.append("prog. comm.")
feature_names.append("crit. dept")
feature_names.append("entang. ratio")
feature_names.append("parallelism")
feature_names.append("liveness")
feature_names = [feature_names[i] for i in non_zero_indices]

importances = clf.best_estimator_.feature_importances_
std = np.std([tree.feature_importances_ for tree in clf.best_estimator_.estimators_], axis=0)

idx = np.argsort(-importances)

plt.figure(figsize=(8, 6))
plt.bar(np.array(feature_names)[idx], np.array(importances)[idx], color=color1, width=0.9)
plt.errorbar(
    np.array(feature_names)[idx],
    np.array(importances)[idx],
    np.array(std)[idx],
    fmt="o",
    color=color2,
)
plt.xticks(rotation=90, fontsize=18)
plt.yticks(fontsize=18)
plt.ylabel("Relative feature importance", fontsize=18)
plt.tight_layout()
plt.savefig("results/feature_importances.pdf")
plt.show()

#### Check the relative importances per feature

In [None]:
summary = zip(np.array(feature_names)[idx], np.array(importances)[idx])
for feature, importance in list(summary):
    print(feature, np.round(importance, 3))

# GradientBoostingClassifier

In [None]:
clf = GradientBoostingClassifier()

param_grid = {
    "learning_rate": [0.01, 0.1, 1],
}

clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))

res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="GradientBoostingClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Gradient Boosting", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

# Decision Tree Classifier

In [None]:
clf = tree.DecisionTreeClassifier(random_state=5)

tree_param = [
    {
        "criterion": ["entropy", "gini"],
        "max_depth": list(range(1, 15, 1)),
        "min_samples_split": list(range(2, 20, 4)),
        "min_samples_leaf": list(range(2, 20, 4)),
        "max_leaf_nodes": list(range(2, 200, 40)),
        "max_features": list(range(1, len(non_zero_indices), 10)),
    },
]
clf = GridSearchCV(clf, tree_param, cv=5, n_jobs=8).fit(X_train, y_train)
y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="DecisionTreeClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
print("Feature Importance: ", clf.best_estimator_.feature_importances_)
performance.append(("Decision Tree", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

# Nearest Neighbor

In [None]:
clf = KNeighborsClassifier()
param_grid = {"n_neighbors": range(1, 10, 1)}
clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="KNeighborsClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Nearest Neighbor", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

# MLPClassifier

In [None]:
clf = MLPClassifier(max_iter=1000)

param_grid = {
    "hidden_layer_sizes": [(50, 50, 50), (50, 100, 50), (100,)],
    "activation": ["tanh", "relu"],
    "solver": ["sgd", "adam"],
    "alpha": [0.0001, 0.05],
    "learning_rate": ["constant", "adaptive"],
}

clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="MLPClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Multilayer Perceptron", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

# SVM

In [None]:
clf = svm.SVC()
param_grid = {"C": [0.1, 1, 10], "gamma": [1, 0.1, 0.01], "kernel": ["rbf", "sigmoid"]}
clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="SVM", color=color1)
rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Support Vector Machine", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

# Naive Bayes

In [None]:
clf = GaussianNB()
param_grid = {"var_smoothing": np.logspace(0, -9, num=100)}
clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="GaussianNB", color=color1)
rel_goodness = np.round(np.mean(rel_scores), 4)
rel_goodness_std = np.round(np.std(rel_scores), 4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Naive Bayes", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

# Save Performance Results

In [None]:
print(performance)
from pathlib import Path

file = Path("results/performances_" + figure_of_merit + ".csv")
with file.open("w") as f:
    f.write("Classifier, Accuracy, Top3, Worst Rank, Eval. Score Diff., Std\n")
    for sublist in performance:
        line = f"{sublist[0]}, {sublist[1]}, {sublist[2]}, {sublist[3]}, {sublist[4]}, {sublist[5]} \n"
        f.write(line)