# Model Predictions (cavity_claw_RouteMeander_eigenmode)

## Configuration

In [1]:
# The parameter file is where the hyperparameters are set. 
# It's reccomended to look at that file first, its interesting and you can set stuff there

from parameters import *

## Library

In [2]:
# Disable some console warnings
import os
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import tensorflow as tf# Disable some console warnings so you can be free of them printing. 
# Comment the next two lines if you are a professional and like looking at warnings.
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import os, gc
from pathlib import Path
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from tensorflow.keras.models import load_model

## Dataset

### Load

In [3]:
# Load all of the nice data you saved from the previous notebook, or downloaded from the drive

if DATA_AUGMENTATION:
    if 'Try Both' not in ENCODING_TYPE:
        encoding = ENCODING_TYPE.replace(' ','_')
        if 'one hot' in ENCODING_TYPE:
            X_train = np.load('{}/npy/x_train_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            X_val = np.load('{}/npy/x_val_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            X_test = np.load('{}/npy/x_test_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

            y_value_train = np.load('{}/npy/y_value_train_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            y_value_val = np.load('{}/npy/y_value_val_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            y_value_test = np.load('{}/npy/y_value_test_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

            y_exists_train = np.load('{}/npy/y_exists_train_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            y_exists_val = np.load('{}/npy/y_exists_val_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            y_exists_test = np.load('{}/npy/y_exists_test_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

        elif 'Linear' in ENCODING_TYPE:
            X_train = np.load('{}/npy/x_train_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            X_val = np.load('{}/npy/x_val_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            X_test = np.load('{}/npy/x_test_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

            y_value_train = np.load('{}/npy/y_value_train_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            y_value_val = np.load('{}/npy/y_value_val_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            y_value_test = np.load('{}/npy/y_value_test_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

            y_exists_train = np.load('{}/npy/y_exists_train_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            y_exists_val = np.load('{}/npy/y_exists_val_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
            y_exists_test = np.load('{}/npy/y_exists_test_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

    elif 'Try Both' in ENCODING_TYPE:
        # one-hot branch
        X_train_one_hot_encoding = np.load('{}/npy/x_train_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        X_val_one_hot_encoding = np.load('{}/npy/x_val_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        X_test_one_hot_encoding = np.load('{}/npy/x_test_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

        y_value_train_one_hot_encoding = np.load('{}/npy/y_value_train_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_value_val_one_hot_encoding = np.load('{}/npy/y_value_val_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_value_test_one_hot_encoding = np.load('{}/npy/y_value_test_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

        y_exists_train_one_hot_encoding = np.load('{}/npy/y_exists_train_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_exists_val_one_hot_encoding = np.load('{}/npy/y_exists_val_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_exists_test_one_hot_encoding = np.load('{}/npy/y_exists_test_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

        # linear branch
        X_train_linear_encoding = np.load('{}/npy/x_train_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        X_val_linear_encoding = np.load('{}/npy/x_val_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        X_test_linear_encoding = np.load('{}/npy/x_test_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

        y_value_train_linear_encoding = np.load('{}/npy/y_value_train_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_value_val_linear_encoding = np.load('{}/npy/y_value_val_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_value_test_linear_encoding = np.load('{}/npy/y_value_test_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

        y_exists_train_linear_encoding = np.load('{}/npy/y_exists_train_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_exists_val_linear_encoding = np.load('{}/npy/y_exists_val_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_exists_test_linear_encoding = np.load('{}/npy/y_exists_test_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

else:
    if 'Try Both' not in ENCODING_TYPE:
        if 'one hot' in ENCODING_TYPE:
            X_train = np.load('{}/npy/x_train_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            X_val = np.load('{}/npy/x_val_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            X_test = np.load('{}/npy/x_test_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)

            y_value_train = np.load('{}/npy/y_value_train_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            y_value_val = np.load('{}/npy/y_value_val_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            y_value_test = np.load('{}/npy/y_value_test_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)

            y_exists_train = np.load('{}/npy/y_exists_train_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            y_exists_val = np.load('{}/npy/y_exists_val_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            y_exists_test = np.load('{}/npy/y_exists_test_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)

        elif 'Linear' in ENCODING_TYPE:
            X_train = np.load('{}/npy/x_train_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            X_val = np.load('{}/npy/x_val_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            X_test = np.load('{}/npy/x_test_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)

            y_value_train = np.load('{}/npy/y_value_train_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            y_value_val = np.load('{}/npy/y_value_val_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            y_value_test = np.load('{}/npy/y_value_test_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)

            y_exists_train = np.load('{}/npy/y_exists_train_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            y_exists_val = np.load('{}/npy/y_exists_val_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
            y_exists_test = np.load('{}/npy/y_exists_test_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)

    elif 'Try Both' in ENCODING_TYPE:
        # one-hot branch
        X_train_one_hot_encoding = np.load('{}/npy/x_train_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        X_val_one_hot_encoding = np.load('{}/npy/x_val_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        X_test_one_hot_encoding = np.load('{}/npy/x_test_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)

        y_value_train_one_hot_encoding = np.load('{}/npy/y_value_train_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_value_val_one_hot_encoding = np.load('{}/npy/y_value_val_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_value_test_one_hot_encoding = np.load('{}/npy/y_value_test_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)

        y_exists_train_one_hot_encoding = np.load('{}/npy/y_exists_train_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_exists_val_one_hot_encoding = np.load('{}/npy/y_exists_val_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_exists_test_one_hot_encoding = np.load('{}/npy/y_exists_test_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)

        # linear branch
        X_train_linear_encoding = np.load('{}/npy/x_train_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        X_val_linear_encoding = np.load('{}/npy/x_val_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        X_test_linear_encoding = np.load('{}/npy/x_test_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)

        y_value_train_linear_encoding = np.load('{}/npy/y_value_train_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_value_val_linear_encoding = np.load('{}/npy/y_value_val_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_value_test_linear_encoding = np.load('{}/npy/y_value_test_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)

        y_exists_train_linear_encoding = np.load('{}/npy/y_exists_train_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_exists_val_linear_encoding = np.load('{}/npy/y_exists_val_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_exists_test_linear_encoding = np.load('{}/npy/y_exists_test_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)


### Visualize

In [6]:
# Decide which model file & test set to use
chosen_path = "model/best_keras_model_one_hot_encoding.keras"

# Current test arrays (value + exists)
X_test_cur        = np.asarray(X_test)
y_value_test_cur  = np.asarray(y_value_test)
y_exists_test_cur = np.asarray(y_exists_test)

# Name used for CSV / scalers, e.g. "one_hot" or "linear"
y_encoding_format_name = encoding  # e.g. "one_hot"

# Load y headers for labeling columns
y_headers_csv = f"y_characteristics_{y_encoding_format_name}_encoding.csv"
with open(y_headers_csv, "r") as f:
    headers = f.readline().strip().split(",")


In [7]:
# run on CPU
tf.keras.backend.clear_session()
gc.collect()
try:
    tf.config.experimental.reset_memory_stats('GPU:0')
except Exception:
    pass

with tf.device('/CPU:0'):
    chosen_model = load_model(chosen_path, compile=False)
    pred = chosen_model.predict(X_test_cur, verbose=0)

# unpack model outputs into value and exists predictions
if isinstance(pred, dict):
    y_value_pred = np.asarray(pred['value_out'])
    y_exists_pred = np.asarray(pred['exists_out'])
else:
    y_value_pred, y_exists_pred = pred
    y_value_pred = np.asarray(y_value_pred)
    y_exists_pred = np.asarray(y_exists_pred)

y_exists_pred_prob = np.asarray(y_exists_pred, dtype=float)
y_exists_pred_mask = (y_exists_pred_prob >= 0.5).astype(float)

print(f"\n—— {os.path.basename(chosen_path)} ——")
chosen_model.summary()
print(f"Samples: {len(X_test_cur)} | Value targets dim: {y_value_test_cur.shape[1]} | Exists targets dim: {y_exists_test_cur.shape[1]}")



—— best_keras_model_one_hot_encoding.keras ——


I0000 00:00:1768504305.210812  112877 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Samples: 183 | Value targets dim: 16 | Exists targets dim: 16


# Scaled

In [8]:
# use a smaller view if you want
N_SAMPLES_TO_SHOW = 3

n_samples = min(N_SAMPLES_TO_SHOW, len(X_test_cur))
n_params  = y_value_test_cur.shape[1]

# scaled errors (values only)
sq_errors  = (y_value_test_cur - y_value_pred) ** 2
abs_errors = np.abs(y_value_test_cur - y_value_pred)

# mask out parameters that are "not defined" according to the ground-truth exists flag
sq_errors_masked  = np.where(y_exists_test_cur == 1.0, sq_errors,  np.nan)
abs_errors_masked = np.where(y_exists_test_cur == 1.0, abs_errors, np.nan)

# scaled dataframe
rows = []
for i in range(n_samples):
    cav_freq, kappa = X_test_cur[i, 0], X_test_cur[i, 1]
    for j in range(n_params):
        rows.append({
            "sample_idx": i,
            "cavity_frequency": cav_freq,
            "kappa": kappa,
            "param": headers[j],
            "exists_true": float(y_exists_test_cur[i, j]),
            "exists_pred_prob": float(y_exists_pred_prob[i, j]),
            "exists_pred_mask": float(y_exists_pred_mask[i, j]),
            "ref":  float(y_value_test_cur[i, j]),
            "pred": float(y_value_pred[i, j]),
            "abs_error": float(abs_errors_masked[i, j]),
            "sq_error":  float(sq_errors_masked[i, j]),
        })
df = pd.DataFrame(rows)

# save scaled predictions
out_csv = Path(f"predictions_and_errors_{y_encoding_format_name}.csv")
df.to_csv(out_csv, index=False, float_format="%.6g")
print(f"\nSaved CSV -> {out_csv.resolve()}\n")

# pretty print per-sample (scaled)
for i in range(n_samples):
    sub = df[df["sample_idx"] == i].copy()
    sub = sub[[
        "param",
        "exists_true",
        "exists_pred_prob",
        "exists_pred_mask",
        "ref",
        "pred",
        "abs_error",
        "sq_error"
    ]]
    header_line = (
        f"— Sample {i} — "
        f"X: cavity_frequency={X_test_cur[i,0]:.6g}, kappa={X_test_cur[i,1]:.6g}"
    )
    print(header_line)
    print(sub.to_string(index=False))
    print()

# global stats over defined parameters only
print("Global scaled error stats (defined parameters only):")
print("  min abs_error:", float(np.nanmin(abs_errors_masked)))
print("  median abs_error:", float(np.nanmedian(abs_errors_masked)))
print("  max abs_error:", float(np.nanmax(abs_errors_masked)))
print("\nHere onehot/linear encoding and the MLP which maps categorical data to 1s and 0s is probably throwing off the global average. These will be rounded in the future and will probably always round to the right number to reconstruct the correct category-- but for now it might throw off the overall average error. In the future we might want to just have it consider the non-categorical data when finding an overall average and reporting that number.\n")



Saved CSV -> /home/olivias/ML_qubit_design/model_predict_cavity_claw_RouteMeander_eigenmode/predictions_and_errors_one_hot.csv

— Sample 0 — X: cavity_frequency=0.0208384, kappa=0.16982
                                                          param  exists_true  exists_pred_prob  exists_pred_mask      ref      pred  abs_error  sq_error
   design_options.claw_opts.connection_pads.readout.claw_length          1.0          0.999959               1.0 0.403941  0.420213   0.016272  0.000265
design_options.claw_opts.connection_pads.readout.ground_spacing          1.0          0.999962               1.0 0.000000  0.097638   0.097638  0.009533
                                 design_options.claw_opts.pos_x          1.0          0.999958               1.0 0.000000 -0.002075   0.002075  0.000004
                          design_options.claw_opts.cross_length          1.0          0.999960               1.0 0.000000 -0.008900   0.008900  0.000079
                           design_options.claw_o

# Unscaled

In [9]:
# load X feature names for the X scalers
with open('X_names', 'r') as f:
    X_index_names = f.read().splitlines()

# unscale X
X_test_unscaled = np.asarray(X_test_cur.copy())
for i in range(X_test_unscaled.shape[0]):
    for j in range(X_test_unscaled.shape[1]):
        scaler = joblib.load(f'scalers/scaler_X_{X_index_names[j]}.save')
        X_test_unscaled[i, j] = scaler.inverse_transform([[X_test_unscaled[i, j]]])[0][0]

# unscale y VALUES (refs and preds) – exists stays as 0/1 / probabilities
y_value_test_unscaled = np.asarray(y_value_test_cur.copy(), dtype=float)
y_value_pred_unscaled = np.asarray(y_value_pred.copy(), dtype=float)
n_params = y_value_test_unscaled.shape[1]

for i in range(y_value_test_unscaled.shape[0]):
    for j in range(y_value_test_unscaled.shape[1]):
        scaler = joblib.load(f'scalers/scaler_y_value__{headers[j]}_{y_encoding_format_name}_encoding.save')
        y_value_test_unscaled[i, j] = scaler.inverse_transform([[y_value_test_unscaled[i, j]]])[0][0]
        y_value_pred_unscaled[i, j] = scaler.inverse_transform([[y_value_pred_unscaled[i, j]]])[0][0]

# errors (unscaled, values only)
sq_errors_unscaled  = (y_value_test_unscaled - y_value_pred_unscaled) ** 2
abs_errors_unscaled = np.abs(y_value_test_unscaled - y_value_pred_unscaled)

# mask out parameters that are not defined (according to ground-truth exists)
sq_errors_unscaled_masked  = np.where(y_exists_test_cur == 1.0, sq_errors_unscaled,  np.nan)
abs_errors_unscaled_masked = np.where(y_exists_test_cur == 1.0, abs_errors_unscaled, np.nan)

# build dataframe (unscaled)
rows_unscaled = []
n_samples_to_show = min(N_SAMPLES_TO_SHOW, len(X_test_unscaled))
for i in range(n_samples_to_show):
    cav_freq, kappa = X_test_unscaled[i, 0], X_test_unscaled[i, 1]
    for j in range(n_params):
        rows_unscaled.append({
            "sample_idx": i,
            "cavity_frequency": cav_freq,
            "kappa": kappa,
            "param": headers[j],
            "exists_true": float(y_exists_test_cur[i, j]),
            "exists_pred_prob": float(y_exists_pred_prob[i, j]),
            "exists_pred_mask": float(y_exists_pred_mask[i, j]),
            "ref_unscaled":  float(y_value_test_unscaled[i, j]),
            "pred_unscaled": float(y_value_pred_unscaled[i, j]),
            "abs_error_unscaled": float(abs_errors_unscaled_masked[i, j]),
            "sq_error_unscaled":  float(sq_errors_unscaled_masked[i, j]),
        })
df_unscaled = pd.DataFrame(rows_unscaled)

# save (unscaled)
out_csv_unscaled = Path(f"predictions_and_errors_unscaled_{y_encoding_format_name}.csv")
df_unscaled.to_csv(out_csv_unscaled, index=False, float_format="%.6g")
print(f"\nSaved CSV -> {out_csv_unscaled.resolve()}\n")

# pretty print per-sample (unscaled)
for i in range(n_samples_to_show):
    sub = df_unscaled[df_unscaled["sample_idx"] == i].copy()
    sub = sub[[
        "param",
        "exists_true",
        "exists_pred_prob",
        "exists_pred_mask",
        "ref_unscaled",
        "pred_unscaled",
        "abs_error_unscaled",
        "sq_error_unscaled"
    ]]
    header_line = (
        f"— Sample {i} (Unscaled) — "
        f"X: cavity_frequency={X_test_unscaled[i,0]:.6g}, kappa={X_test_unscaled[i,1]:.6g}"
    )
    print(header_line)
    print(sub.to_string(index=False))
    print()

# global stats over defined parameters only
print("Global unscaled error stats (defined parameters only):")
print("  min abs_error:", float(np.nanmin(abs_errors_unscaled_masked)))
print("  median abs_error:", float(np.nanmedian(abs_errors_unscaled_masked)))
print("  max abs_error:", float(np.nanmax(abs_errors_unscaled_masked)))
print("\nHere onehot/linear encoding and the MLP which maps categorical data to 1s and 0s is probably throwing off the global average. These will be rounded in the future and will probably always round to the right number to reconstruct the correct category-- but for now it might throw off the overall average error. In the future we might want to just have it consider the non-categorical data when finding an overall average and reporting that number.\n")



Saved CSV -> /home/olivias/ML_qubit_design/model_predict_cavity_claw_RouteMeander_eigenmode/predictions_and_errors_unscaled_one_hot.csv

— Sample 0 (Unscaled) — X: cavity_frequency=5.24699e+09, kappa=168252
                                                          param  exists_true  exists_pred_prob  exists_pred_mask  ref_unscaled  pred_unscaled  abs_error_unscaled  sq_error_unscaled
   design_options.claw_opts.connection_pads.readout.claw_length          1.0          0.999959               1.0      0.000275   2.832581e-04        8.258074e-06       6.819579e-11
design_options.claw_opts.connection_pads.readout.ground_spacing          1.0          0.999962               1.0      0.000004   4.676063e-06        5.760632e-07       3.318488e-13
                                 design_options.claw_opts.pos_x          1.0          0.999958               1.0     -0.001500  -1.501037e-03        1.037287e-06       1.075963e-12
                          design_options.claw_opts.cross_length     