# Model Predictions (coupler_NCap_cap_matrix)

## Configuration

In [1]:
# The parameter file is where the hyperparameters are set. 
# It's reccomended to look at that file first, its interesting and you can set stuff there

from parameters import *

## Library

In [2]:
# Disable some console warnings
import os
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import tensorflow as tf# Disable some console warnings so you can be free of them printing. 
# Comment the next two lines if you are a professional and like looking at warnings.
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import os, gc
from pathlib import Path
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from tensorflow.keras.models import load_model

## Dataset

### Load

In [3]:
# Load all of the nice data you saved from the previous notebook, or downloaded from the drive

if DATA_AUGMENTATION:
    if 'Try Both' not in ENCODING_TYPE:
        encoding = ENCODING_TYPE.replace(' ','_')
        X_train = np.load('{}/npy/x_train_{}_encoding_augmented.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        X_val = np.load('{}/npy/x_val_{}_encoding_augmented.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        X_test = np.load('{}/npy/x_test_{}_encoding_augmented.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        y_train = np.load('{}/npy/y_train_{}_encoding_augmented.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        y_val = np.load('{}/npy/y_val_{}_encoding_augmented.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        y_test = np.load('{}/npy/y_test_{}_encoding_augmented.npy'.format(DATA_DIR, encoding), allow_pickle=True)
    
    elif 'Try Both' in ENCODING_TYPE:
        X_train_one_hot_encoding = np.load('{}/npy/x_train_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        X_val_one_hot_encoding = np.load('{}/npy/x_val_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        X_test_one_hot_encoding = np.load('{}/npy/x_test_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_train_one_hot_encoding = np.load('{}/npy/y_train_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_val_one_hot_encoding = np.load('{}/npy/y_val_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_test_one_hot_encoding = np.load('{}/npy/y_test_one_hot_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

        X_train_linear_encoding = np.load('{}/npy/x_train_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        X_val_linear_encoding = np.load('{}/npy/x_val_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        X_test_linear_encoding = np.load('{}/npy/x_test_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_train_linear_encoding = np.load('{}/npy/y_train_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_val_linear_encoding = np.load('{}/npy/y_val_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)
        y_test_linear_encoding = np.load('{}/npy/y_test_linear_encoding_augmented.npy'.format(DATA_DIR), allow_pickle=True)

else:
    if 'Try Both' not in ENCODING_TYPE:
        X_train = np.load('{}/npy/x_train_{}_encoding.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        X_val = np.load('{}/npy/x_val_{}_encoding.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        X_test = np.load('{}/npy/x_test_{}_encoding.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        y_train = np.load('{}/npy/y_train_{}_encoding.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        y_val = np.load('{}/npy/y_val_{}_encoding.npy'.format(DATA_DIR, encoding), allow_pickle=True)
        y_test = np.load('{}/npy/y_test_{}_encoding.npy'.format(DATA_DIR, encoding), allow_pickle=True)
    
    elif 'Try Both' in ENCODING_TYPE:
        X_train_one_hot_encoding = np.load('{}/npy/x_train_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        X_val_one_hot_encoding = np.load('{}/npy/x_val_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        X_test_one_hot_encoding = np.load('{}/npy/x_test_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_train_one_hot_encoding = np.load('{}/npy/y_train_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_val_one_hot_encoding = np.load('{}/npy/y_val_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_test_one_hot_encoding = np.load('{}/npy/y_test_one_hot_encoding.npy'.format(DATA_DIR), allow_pickle=True)

        X_train_linear_encoding = np.load('{}/npy/x_train_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        X_val_linear_encoding = np.load('{}/npy/x_val_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        X_test_linear_encoding = np.load('{}/npy/x_test_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_train_linear_encoding = np.load('{}/npy/y_train_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_val_linear_encoding = np.load('{}/npy/y_val_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)
        y_test_linear_encoding = np.load('{}/npy/y_test_linear_encoding.npy'.format(DATA_DIR), allow_pickle=True)


### Visualize

In [4]:
# Decide which model file & test set to use
chosen_path = "model/mlp_6_1000_3800_300_400_4_best_model.keras"      
X_test_cur = np.asarray(X_test)
y_test_cur = np.asarray(y_test)
y_encoding_format_name = encoding    

# Load y headers for labeling columns
y_headers_csv = f'y_characteristics_{y_encoding_format_name}_encoding.csv'
with open(y_headers_csv, 'r') as f:
    headers = f.readline().strip().split(',')

In [5]:
#run on CPU
tf.keras.backend.clear_session()
gc.collect()
try:
    tf.config.experimental.reset_memory_stats('GPU:0')
except Exception:
    pass

with tf.device('/CPU:0'):
    chosen_model = load_model(chosen_path, compile=False)  #dont compile it because we just need to predict
    y_pred = chosen_model.predict(X_test_cur, verbose=0)

print(f"\n—— {os.path.basename(chosen_path)} ——")
chosen_model.summary()
print(f"Samples: {len(X_test_cur)} | Targets dim: {y_test_cur.shape[1]}")

E0000 00:00:1764194150.275793   67022 cuda_executor.cc:1309] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1764194150.351747   67022 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...



—— mlp_6_1000_3800_300_400_4_best_model.keras ——


I0000 00:00:1764194150.890994  109556 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Samples: 65 | Targets dim: 4


# Scaled

In [6]:
#use a smaller view if you want
N_SAMPLES_TO_SHOW = 3


n_samples = min(N_SAMPLES_TO_SHOW, len(X_test_cur))
n_params  = y_test_cur.shape[1]

# scaled errors
sq_errors  = (y_test_cur - y_pred) ** 2
abs_errors = np.abs(y_test_cur - y_pred)

# scaled dataframe
rows = []
for i in range(n_samples):
    top_to_top, top_to_bottom, top_to_ground, bottom_to_bottom, bottom_to_ground, ground_to_ground = X_test_cur[i, 0], X_test_cur[i, 1], X_test_cur[i, 2],X_test_cur[i, 3],X_test_cur[i, 4],X_test_cur[i, 5]
    for j in range(n_params):
        rows.append({
            "sample_idx": i,
            "top_to_bottom": top_to_bottom,
            "top_to_ground": top_to_ground,
            "bottom_to_bottom": bottom_to_bottom,
            "bottom_to_ground": bottom_to_ground,
            "ground_to_ground": ground_to_ground,
            "param": headers[j],
            "ref":  y_test_cur[i, j],
            "pred": y_pred[i, j],
            "abs_error": abs_errors[i, j],
            "sq_error":  sq_errors[i, j],
        })
df = pd.DataFrame(rows)

# save scaled predictions
out_csv = Path(f"predictions_and_errors_{y_encoding_format_name}.csv")
df.to_csv(out_csv, index=False, float_format="%.6g")
print(f"\nSaved CSV -> {out_csv.resolve()}\n")

#pretty print per-sample (scaled)
for i in range(n_samples):
    sub = df[df["sample_idx"] == i].copy()
    sub = sub[["param", "ref", "pred", "abs_error", "sq_error"]]
    header_line = (
        f"— Sample {i} — "
        f"X: top_to_top={X_test_cur[i,0]:.9g}, top_to_bottom={X_test_cur[i,1]:.9g}, top_to_ground={X_test_cur[i,2]:.9g}, bottom_to_bottom={X_test_cur[i,3]:.9g},bottom_to_ground={X_test_cur[i,4]:.9g},ground_to_ground={X_test_cur[i,5]:.9g}"
    )
    print(header_line)
    print(sub.to_string(index=False))
    print()

#print global stats (scaled)
print("Global scaled error stats:")
print("  min abs_error:", float(abs_errors.min()))
print("  median abs_error:", float(np.median(abs_errors)))
print("  max abs_error:", float(abs_errors.max()))
print("\nHere onehot/linear encoding and the MLP which maps categorical data to 1s and 0s is probably throwing off the global average. These will be rounded in the future and will probably always  round to the right number to reconstruct the correct category-- but for now it might throw off  the overall average error. In the future we might want to just have it consider the non-categorical data when finding an overall average and reporting that number.\n")


Saved CSV -> /home/olivias/ML_qubit_design/model_predict_coupler_NCap_cap_matrix/predictions_and_errors_one_hot.csv

— Sample 0 — X: top_to_top=0.00894376485, top_to_bottom=0.00566720538, top_to_ground=0.0434120775, bottom_to_bottom=0.00692052511,bottom_to_ground=0.0087706938,ground_to_ground=0.0110262114
                       param  ref     pred  abs_error  sq_error
      design_options.cap_gap  0.0 0.468750   0.468750  0.219727
    design_options.cap_width  0.2 0.255371   0.055371  0.003066
design_options.finger_length  0.0 0.353760   0.353760  0.125146
 design_options.finger_count  0.0 0.146240   0.146240  0.021386

— Sample 1 — X: top_to_top=0.0920458865, top_to_bottom=0.0642235232, top_to_ground=0.164009221, bottom_to_bottom=0.111459797,bottom_to_ground=0.142843431,ground_to_ground=0.146250336
                       param      ref     pred  abs_error  sq_error
      design_options.cap_gap 1.000000 0.600586   0.399414  0.159532
    design_options.cap_width 0.600000 0.482666   0.1

# Unscaled

In [7]:
#load X feature names for the X scalers
with open('X_names', 'r') as f:
    X_index_names = f.read().splitlines()

#unscale X
X_test_unscaled = np.asarray(X_test_cur.copy())
for i in range(X_test_unscaled.shape[0]):
    for j in range(X_test_unscaled.shape[1]):
        scaler = joblib.load(f'scalers/scaler_X_{X_index_names[j]}.save')
        X_test_unscaled[i, j] = scaler.inverse_transform([[X_test_unscaled[i, j]]])[0][0]

#unscale y (refs and preds)
y_test_unscaled = np.asarray(y_test_cur.copy())
y_pred_unscaled = np.asarray(y_pred.copy())
for i in range(y_test_unscaled.shape[0]):
    for j in range(y_test_unscaled.shape[1]):
        scaler = joblib.load(f'scalers/scaler_y_{headers[j]}_{y_encoding_format_name}_encoding.save')
        y_test_unscaled[i, j] = scaler.inverse_transform([[y_test_unscaled[i, j]]])[0][0]
        y_pred_unscaled[i, j] = scaler.inverse_transform([[y_pred_unscaled[i, j]]])[0][0]

# Errors (unscaled)
sq_errors_unscaled  = (y_test_unscaled - y_pred_unscaled) ** 2
abs_errors_unscaled = np.abs(y_test_unscaled - y_pred_unscaled)

#build dataframe (unscaled)
rows_unscaled = []
for i in range(min(N_SAMPLES_TO_SHOW, len(X_test_unscaled))):
    top_to_top, top_to_bottom, top_to_ground, bottom_to_bottom, bottom_to_ground, ground_to_ground = X_test_unscaled[i, 0], X_test_unscaled[i, 1], X_test_unscaled[i, 2], X_test_unscaled[i, 3], X_test_unscaled[i, 4], X_test_unscaled[i, 5]
    for j in range(n_params):
        rows_unscaled.append({
            "sample_idx": i,
            "top_to_top": top_to_top,
            "top_to_bottom": top_to_bottom,
            "top_to_ground": top_to_ground,
            "bottom_to_bottom": bottom_to_bottom,
            "bottom_to_ground": bottom_to_ground,
            "ground_to_ground": ground_to_ground,
            "param": headers[j],
            "ref_unscaled":  y_test_unscaled[i, j],
            "pred_unscaled": y_pred_unscaled[i, j],
            "abs_error_unscaled": abs_errors_unscaled[i, j],
            "sq_error_unscaled":  sq_errors_unscaled[i, j],
        })
df_unscaled = pd.DataFrame(rows_unscaled)

# save (unscaled)
out_csv_unscaled = Path(f"predictions_and_errors_unscaled_{y_encoding_format_name}.csv")
df_unscaled.to_csv(out_csv_unscaled, index=False, float_format="%.6g")
print(f"\nSaved CSV -> {out_csv_unscaled.resolve()}\n")

# Pretty print per-sample (unscaled)
for i in range(min(N_SAMPLES_TO_SHOW, len(X_test_unscaled))):
    sub = df_unscaled[df_unscaled["sample_idx"] == i].copy()
    sub = sub[["param", "ref_unscaled", "pred_unscaled", "abs_error_unscaled", "sq_error_unscaled"]]
    header_line = (
        f"— Sample {i} (Unscaled) — "
        f"X: top_to_top={X_test_unscaled[i,0]:.9g}, top_to_bottom={X_test_unscaled[i,1]:.9g}, top_to_ground={X_test_unscaled[i,2]:.9g}, bottom_to_bottom={X_test_unscaled[i,3]:.9g},bottom_to_ground={X_test_unscaled[i,4]:.9g},ground_to_ground={X_test_unscaled[i,5]:.9g}"
    )
    print(header_line)
    print(sub.to_string(index=False))
    print()

#print global stats (unscaled)
print("Global unscaled error stats:")
print("  min abs_error:", float(abs_errors_unscaled.min()))
print("  median abs_error:", float(np.median(abs_errors_unscaled)))
print("  max abs_error:", float(abs_errors_unscaled.max()))
print("\nHere onehot/linear encoding and the MLP which maps categorical data to 1s and 0s is probably throwing off the global average. These will be rounded in the future and will probably always  round to the right number to reconstruct the correct category-- but for now it might throw off  the overall average error. In the future we might want to just have it consider the non-categorical data when finding an overall average and reporting that number.\n")



Saved CSV -> /home/olivias/ML_qubit_design/model_predict_coupler_NCap_cap_matrix/predictions_and_errors_unscaled_one_hot.csv

— Sample 0 (Unscaled) — X: top_to_top=14.91395, top_to_bottom=0.6691, top_to_ground=14.05235, bottom_to_bottom=13.62255,bottom_to_ground=12.78564,ground_to_ground=58.0137
                       param  ref_unscaled  pred_unscaled  abs_error_unscaled  sq_error_unscaled
      design_options.cap_gap      0.000002       0.000003        9.398369e-07       8.832934e-13
    design_options.cap_width      0.000007       0.000007        5.505806e-07       3.031390e-13
design_options.finger_length      0.000016       0.000027        1.062407e-05       1.128708e-10
 design_options.finger_count      1.000000       2.316406        1.316406e+00       1.732925e+00

— Sample 1 (Unscaled) — X: top_to_top=21.64499, top_to_bottom=3.83797, top_to_ground=17.53586, bottom_to_bottom=27.51404,bottom_to_ground=23.29018,ground_to_ground=73.35986
                       param  ref_unscaled 