# Data Preparation

In [6]:
from __future__ import absolute_import
from mqt.predictor import ml
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn import tree, svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(10)

predictor = ml.Predictor()
figure_of_merit="expected_fidelity"

training_data = predictor.get_prepared_training_data(figure_of_merit=figure_of_merit, save_non_zero_indices=True)

X_train = training_data.X_train
X_test = training_data.X_test
y_train = training_data.y_train
y_test = training_data.y_test
indices_train = training_data.indices_train
indices_test= training_data.indices_test
names_list= training_data.names_list
scores_list= training_data.scores_list

scores_filtered = [scores_list[i] for i in indices_test]
names_filtered = [names_list[i] for i in indices_test]

performance = []

ModuleNotFoundError: No module named 'numpy'

In [7]:
color1 = "#21918c"
color2 = "#440154"

In [8]:
X_train

NameError: name 'X_train' is not defined

In [9]:
print(len(X_train), len(X_test))

NameError: name 'X_train' is not defined

# Random Forest

In [10]:
clf = RandomForestClassifier(random_state=0)
tree_param = [
    {
        "n_estimators": [100, 200, 500],
        "max_depth": list(range(8, 30, 6)),
        "min_samples_split": list(range(2, 20, 6)),
        "min_samples_leaf": list(range(2, 20, 6)),
        "bootstrap": [True, False],
    },
]
clf = GridSearchCV(clf, tree_param, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="RandomForestClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores),4)
rel_goodness_std = np.round(np.std(rel_scores),4)
print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Feature Importance: ", clf.best_estimator_.feature_importances_)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Random Forest", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

NameError: name 'RandomForestClassifier' is not defined

In [11]:
predictor.plot_eval_all_detailed_compact_normed(
    names_filtered, scores_filtered, y_pred, y_test, color_all=color1, color_pred=color2
)

NameError: name 'predictor' is not defined

In [12]:
clf.best_estimator_.get_params()

NameError: name 'clf' is not defined

### Feature Importances

In [13]:
path = ml.helper.get_path_trained_model(figure_of_merit, return_non_zero_indices=True)
non_zero_indices = np.load(str(path), allow_pickle=True)
        
openqasm_qc_list = ml.helper.get_openqasm_gates()
feature_names = [openqasm_qc_list[i] for i in range(0, len(openqasm_qc_list))]
feature_names.append("num qubits")
feature_names.append("depth")
feature_names.append("prog. comm.")
feature_names.append("crit. dept")
feature_names.append("entang. ratio")
feature_names.append("parallelism")
feature_names.append("liveness")
feature_names = [feature_names[i] for i in non_zero_indices]

importances = clf.best_estimator_.feature_importances_
std = np.std(
    [tree.feature_importances_ for tree in clf.best_estimator_.estimators_], axis=0
)

idx = np.argsort(-importances)

plt.figure(figsize=(8, 6))
plt.bar(np.array(feature_names)[idx], np.array(importances)[idx], color=color1, width=0.9)
plt.errorbar(
    np.array(feature_names)[idx],
    np.array(importances)[idx],
    np.array(std)[idx],
    fmt="o",
    color=color2,
)
plt.xticks(rotation=90, fontsize=18)
plt.yticks(fontsize=18)
plt.ylabel("Relative feature importance", fontsize=18)
plt.tight_layout()
plt.savefig("results/feature_importances.pdf")
plt.show()

NameError: name 'ml' is not defined

#### Check the relative importances per feature

In [14]:
summary = zip(np.array(feature_names)[idx], np.array(importances)[idx])
for feature, importance in list(summary):
    print(feature, np.round(importance,3))

NameError: name 'np' is not defined

# GradientBoostingClassifier

In [15]:
clf = GradientBoostingClassifier()

param_grid = {
    "learning_rate": [0.01, 0.1, 1],
}

clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))

res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="GradientBoostingClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores),4)
rel_goodness_std = np.round(np.std(rel_scores),4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Gradient Boosting", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

NameError: name 'GradientBoostingClassifier' is not defined

# Decision Tree Classifier

In [16]:
clf = tree.DecisionTreeClassifier(random_state=5)

tree_param = [
    {
        "criterion": ["entropy", "gini"],
        "max_depth": list(range(1, 15, 1)),
        "min_samples_split": list(range(2, 20, 4)),
        "min_samples_leaf": list(range(2, 20, 4)),
        "max_leaf_nodes": list(range(2, 200, 40)),
        "max_features": list(range(1, len(non_zero_indices), 10)),
    },
]
clf = GridSearchCV(clf, tree_param, cv=5, n_jobs=8).fit(X_train, y_train)
y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="DecisionTreeClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores),4)
rel_goodness_std = np.round(np.std(rel_scores),4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
print("Feature Importance: ", clf.best_estimator_.feature_importances_)
performance.append(("Decision Tree", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

NameError: name 'tree' is not defined

# Nearest Neighbor

In [17]:
clf = KNeighborsClassifier()
param_grid = dict(n_neighbors=range(1, 10, 1))
clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="KNeighborsClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores),4)
rel_goodness_std = np.round(np.std(rel_scores),4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Nearest Neighbor", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

NameError: name 'KNeighborsClassifier' is not defined

# MLPClassifier

In [18]:
clf = MLPClassifier(max_iter=1000)

param_grid = {
    "hidden_layer_sizes": [(50, 50, 50), (50, 100, 50), (100,)],
    "activation": ["tanh", "relu"],
    "solver": ["sgd", "adam"],
    "alpha": [0.0001, 0.05],
    "learning_rate": ["constant", "adaptive"],
}

clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="MLPClassifier", color=color1)
rel_goodness = np.round(np.mean(rel_scores),4)
rel_goodness_std = np.round(np.std(rel_scores),4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Multilayer Perceptron", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

NameError: name 'MLPClassifier' is not defined

# SVM

In [19]:
clf = svm.SVC()
param_grid = {"C": [0.1, 1, 10], "gamma": [1, 0.1, 0.01], "kernel": ["rbf", "sigmoid"]}
clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="SVM", color=color1)
rel_goodness = np.round(np.mean(rel_scores),4)
rel_goodness_std = np.round(np.std(rel_scores),4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Support Vector Machine", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

NameError: name 'svm' is not defined

# Naive Bayes

In [20]:
clf = GaussianNB()
param_grid = {"var_smoothing": np.logspace(0, -9, num=100)}
clf = GridSearchCV(clf, param_grid, cv=5, n_jobs=8).fit(X_train, y_train)

y_pred = np.array(list(clf.predict(X_test)))
res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)
predictor.plot_eval_histogram(res, filename="GaussianNB", color=color1)
rel_goodness = np.round(np.mean(rel_scores),4)
rel_goodness_std = np.round(np.std(rel_scores),4)

print("Best Accuracy: ", clf.best_score_)
top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)
print("Top 3: ", top3)
print("Rel Goodness: ", rel_goodness)
print("Rel Goodness Std: ", rel_goodness_std)
performance.append(("Naive Bayes", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))

NameError: name 'GaussianNB' is not defined

# Save Performance Results

In [21]:
print(performance)

filename = "results/performances_" + figure_of_merit + ".csv"
with open(filename, "w") as f:
    f.write("Classifier, Accuracy, Top3, Worst Rank, Eval. Score Diff., Std\n")
    for sublist in performance:
        line = "{}, {}, {}, {}, {}, {} \n".format(
            sublist[0], sublist[1], sublist[2], sublist[3], sublist[4], sublist[5]
        )
        f.write(line)

NameError: name 'performance' is not defined