# Inference code for trained nanoGPT

See details in GPT-QAOA paper: https://arxiv.org/pdf/2504.16350

In [9]:
!pip install Levenshtein networkx nltk qiskit


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [10]:
!pip cache purge

[0mFiles removed: 0 (0 bytes)


In [12]:
!ls -la ~/.cache

total 28
drwxr-xr-x  7 jovyan users 4096 Jul  7 12:57 .
drwxrwxrwx 19 jovyan users 4096 Jul 11 14:16 ..
drwxr-xr-x  2 jovyan users 4096 Jul  7 02:31 fontconfig
drwxr-xr-x  3 jovyan users 4096 Jul  7 02:19 jedi
drwxr-xr-x  2 jovyan users 4096 Jul  7 02:31 matplotlib
drwxr-xr-x  4 jovyan users 4096 Jul  7 12:59 pip
drwxr-xr-x  3 jovyan users 4096 Jul  7 02:33 wandb


In [13]:
!du -sm ~/.cache/*

1	/home/jovyan/.cache/fontconfig
378	/home/jovyan/.cache/jedi
1	/home/jovyan/.cache/matplotlib
3	/home/jovyan/.cache/pip
1	/home/jovyan/.cache/wandb


In [1]:
from contextlib import nullcontext
import importlib
import pathlib
import pickle
import random
import os
import time
from typing import Dict, List, Tuple

import Levenshtein
import networkx
from nltk.translate.bleu_score import sentence_bleu
import numpy as np
import qiskit
from qiskit import qpy
from qiskit import QuantumCircuit
import torch
import torch.nn.functional as F
import tqdm

import sys
sys.path.append("../")

# import model_qaoa
import model_qaoa_cached

# importlib.reload(model_qaoa)
importlib.reload(model_qaoa_cached)
# from model_qaoa import GPT, GPTConfig
from model_qaoa_cached import GPT, GPTConfig

In [2]:
import torch

# Must be called early, before model creation or torch.compile
torch.set_float32_matmul_precision('high')

# Auxiliary functions

In [3]:
def load_graphs(file: pathlib.Path) -> List[networkx.classes.graph.Graph]:
    with open(file, "rb") as f:
        graphs = pickle.load(f)
    return graphs


def load_circuits(file: pathlib.Path) -> List[qiskit.circuit.quantumcircuit.QuantumCircuit]:
    with open(file, "rb") as f:
        circuits = qpy.load(f)
    return circuits


def graph_to_str_v1(graph: networkx.classes.graph.Graph) -> str:
    g_nodes_str = " ".join([f"{graph.nodes[u]['mu']:.2f}" for u in graph.nodes]) # 2-decimal precision
    g_edges_str = ", ".join([f"({u}, {v}), {graph.edges[u, v]['weight']:.2f}" for u, v in graph.edges]) # 2-decimal precision
    bos_token = "<bos>"
    end_of_graph_token = "<end_of_graph>"
    nodes_weight_start = "<node_weights_start>"
    nodes_weight_end = "<node_weights_end>"
    return " ".join([
        bos_token,
        "<format_v1>",
        nodes_weight_start,
        g_nodes_str,
        nodes_weight_end,
        g_edges_str,
        end_of_graph_token
    ])


def graph_to_str_v2(graph: networkx.classes.graph.Graph) -> str:
    g_nodes_str = " ".join([f"{graph.nodes[u]['mu']:.3f}" for u in graph.nodes]) # 3-decimal precision
    g_edges_str = ", ".join([f"({u}, {v}), {graph.edges[u, v]['weight']:.3f}" for u, v in graph.edges]) # 3-decimal precision
    bos_token = "<bos>"
    end_of_graph_token = "<end_of_graph>"
    nodes_weight_start = "<node_weights_start>"
    nodes_weight_end = "<node_weights_end>"
    return " ".join([
        bos_token,
        "<format_v2>",
        nodes_weight_start,
        g_nodes_str,
        nodes_weight_end,
        g_edges_str,
        end_of_graph_token
    ])


def graph_to_tokens_old_format(graph: networkx.classes.graph.Graph) -> List[str]:
    """
        Compound tokens like '(0, 1)' and '[0 1]'.
        And 2-decimal precision for float numbers.
        Graphs only with edges weights.
    """
    bos_token = "<bos>"
    end_of_graph_token = "<end_of_graph>"
    graph_tokens = []
    graph_tokens.append(bos_token)
    for u, v in graph.edges:
        graph_tokens.append(f"({u},{v})")
        graph_tokens.append(f"{graph.edges[u, v]['weight']:.2f}") # 2-decimal precision
    graph_tokens.append(end_of_graph_token)    
    return graph_tokens


def graph_to_tokens_v1(graph: networkx.classes.graph.Graph) -> List[str]:
    """
        Compound tokens like '(0, 1)' and '[0 1]'.
        And 2-decimal precision for float numbers.
        Graphs with nodes and edges weights.
    """
    bos_token = "<bos>"
    end_of_graph_token = "<end_of_graph>"
    nodes_weight_start = "<node_weights_start>"
    nodes_weight_end = "<node_weights_end>"
    graph_tokens = []
    graph_tokens.append(bos_token)
    graph_tokens.append("<format_v1>")

    # nodes
    graph_tokens.append(nodes_weight_start)
    for u in graph.nodes:
        if "return_" in graph.nodes[u]:
            graph_tokens.append(f"{graph.nodes[u]['return_']:.2f}") # 2-decimal precision
        elif "mu" in graph.nodes[u]:
            graph_tokens.append(f"{graph.nodes[u]['mu']:.2f}") # 2-decimal precision
        else:
            raise Exception(f"Cannot find return value using key 'mu' and 'return_'.")
    
    graph_tokens.append(nodes_weight_end)
    
    # edges
    for u, v in graph.edges:
        graph_tokens.append(f"({u},{v})")
        graph_tokens.append(f"{graph.edges[u, v]['weight']:.2f}") # 2-decimal precision
    graph_tokens.append(end_of_graph_token)    
    return graph_tokens


def graph_to_tokens_v2(graph: networkx.classes.graph.Graph) -> List[str]:
    """
        Compositional tokens like '(', '0', '1', ')'.
        And 3-decimal precision for float numbers.
        Graphs with nodes and edges weights.
    """
    bos_token = "<bos>"
    end_of_graph_token = "<end_of_graph>"
    nodes_weight_start = "<node_weights_start>"
    nodes_weight_end = "<node_weights_end>"
    graph_tokens = []
    graph_tokens.append(bos_token)
    graph_tokens.append("<format_v2>")
    
    # nodes
    graph_tokens.append(nodes_weight_start)
    for u in graph.nodes:
        graph_tokens.append(f"{graph.nodes[u]['mu']:.3f}") # 3-decimal precision
    graph_tokens.append(nodes_weight_end)
    
    # edges
    for u, v in graph.edges:
        graph_tokens.append("(")
        graph_tokens.append(f"{u}")
        graph_tokens.append(f"{v}")
        graph_tokens.append(")")
        graph_tokens.append(f"{graph.edges[u, v]['weight']:.3f}") # 3-decimal precision
    graph_tokens.append(end_of_graph_token)
    return graph_tokens


def circuit_to_str_v1(circuit: qiskit.circuit.quantumcircuit.QuantumCircuit) -> str:
    op_names_exclude = {"barrier", "h", "initialize", "measure"}

    circuit_str = ""
    for idx, instruction in enumerate(circuit.data):
        op_name = instruction.operation.name
        if op_name in op_names_exclude: # skip unnecessary operations
            continue
        params = instruction.operation.params
        params_str = "".join(map(lambda x: f"{x:.2f}", params))
        qubit_labels = [circuit.qubits.index(qubit) for qubit in instruction.qubits]
        circuit_str += f"<new_layer_p>, {op_name}, {qubit_labels}" + (f", {params_str}" if params_str else "") + " "
    
    return circuit_str


def circuit_to_str_v2(circuit: qiskit.circuit.quantumcircuit.QuantumCircuit) -> str:
    op_names_exclude = {"barrier", "h", "initialize", "measure"}

    circuit_str = ""
    for idx, instruction in enumerate(circuit.data):
        op_name = instruction.operation.name
        if op_name in op_names_exclude: # skip unnecessary operations
            continue
        params = instruction.operation.params
        params_str = "".join(map(lambda x: f"{x:.2f}", params)) # 2-decimal precision
        qubit_labels = [circuit.qubits.index(qubit) for qubit in instruction.qubits]
        qubit_labels_str = "(" + ", ".join(map(str, qubit_labels)) + ")"
        circuit_str += f"<new_layer_p>, {op_name}, {qubit_labels_str}" + (f", {params_str}" if params_str else "") + " "
    
    return circuit_str


def circuit_to_tokens_old_format(circuit: QuantumCircuit) -> List[str]:
    return circuit_to_tokens_v1(circuit)


def circuit_to_tokens_v1(circuit: qiskit.circuit.quantumcircuit.QuantumCircuit) -> List[str]:
    """
        Compound tokens like '(0, 1)' and '[0 1]'.
        And 2-decimal precision for float numbers.
    """
    op_names_exclude = {"barrier", "h", "initialize", "measure"}
    end_of_circuit_token = "<end_of_circuit>"

    circuit_tokens = []
    for idx, instruction in enumerate(circuit.data):
        op_name = instruction.operation.name
        if op_name in op_names_exclude: # skip unnecessary operations
            continue
        params = instruction.operation.params
        qubit_labels = [circuit.qubits.index(qubit) for qubit in instruction.qubits]
        layer = []
        layer.append("<new_layer_p>")
        layer.append(f"{op_name}")
        layer.append(f"{qubit_labels}") # compound token for qubit labels
        if params:
            params_with_spaces = [f"{params[i % 2]:.2f}" if i % 2 == 0 else " " for i in range(len(params) * 2 - 1)] # 2-decimal precision
            layer.extend(params_with_spaces)
        circuit_tokens.extend(layer)
    circuit_tokens.append(end_of_circuit_token)
    return circuit_tokens


def circuit_to_tokens_v2(circuit: qiskit.circuit.quantumcircuit.QuantumCircuit) -> List[str]:
    """
        Compositional tokens like '(', '0', '1', ')'.
        And 2-decimal precision for float numbers.
    """
    op_names_exclude = {"barrier", "h", "initialize", "measure"}
    end_of_circuit_token = "<end_of_circuit>"

    circuit_tokens = []
    for idx, instruction in enumerate(circuit.data):
        op_name = instruction.operation.name
        if op_name in op_names_exclude: # skip unnecessary operations
            continue
        params = instruction.operation.params
        qubit_labels = [circuit.qubits.index(qubit) for qubit in instruction.qubits]
        layer = []
        layer.append("<new_layer_p>")
        layer.append(f"{op_name}")

        # compositional tokens for qubit labels
        layer.append("(")
        for l in qubit_labels:
            layer.append(f"{l}")
        layer.append(")")
        
        if params:
            params_with_spaces = [f"{params[i % 2]:.2f}" if i % 2 == 0 else " " for i in range(len(params) * 2 - 1)] # 2-decimal precision
            layer.extend(params_with_spaces)
        circuit_tokens.extend(layer)
    circuit_tokens.append(end_of_circuit_token)
    return circuit_tokens


def generate_long_circuit(model, graph_tokens, stoi, itos, max_total_tokens=6000, temperature=1.0, top_k=10):
    model.eval()
    pad_id = model.config.pad_token_id
    stop_id = model.config.stop_token_id
    block_size = model.config.block_size
    device = next(model.parameters()).device

    # === Encode input graph ===
    input_ids = [stoi.get(tok, stoi["<unk>"]) for tok in graph_tokens]
    generated_ids = input_ids[:]

    while len(generated_ids) < max_total_tokens:
        # Prepare input window: last block_size tokens
        idx_cond = torch.tensor([generated_ids[-block_size:]], dtype=torch.long).to(device)
        if (idx_cond == pad_id).all():
            print("Context is only padding. Stopping early.")
            break

        with torch.no_grad():
            logits, _ = model(idx_cond)
            logits = logits[:, -1, :] / temperature
            # logits[:, pad_id] = -float('Inf')  # prevent sampling <pad>

            # if top_k is not None:
            #     v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
            #     logits[logits < v[:, [-1]]] = -float('Inf')

            if top_k is not None:
                top_k = min(top_k, logits.size(-1))
                v, _ = torch.topk(logits, top_k)

                # Apply top-k mask, but fallback if all logits would be -inf
                threshold = v[:, [-1]]  # shape: (B, 1)
                logits_masked = logits.clone()
                logits_masked[logits < threshold] = -float("Inf")

                if torch.isinf(logits_masked).all():
                    # fallback: don't mask at all
                    print("All logits would be masked. Falling back to unfiltered logits.")
                else:
                    logits = logits_masked

            if torch.isnan(logits).any() or torch.isinf(logits).any():
                print("NaN or Inf detected in logits. Dumping logits:")
                print(logits)
                raise RuntimeError("NaNs or Infs in logits")

            probs = F.softmax(logits, dim=-1)
            if torch.isnan(probs).any() or torch.isinf(probs).any():
                raise RuntimeError("Invalid probabilities: contains NaN or Inf after softmax.")
            next_id = torch.multinomial(probs, num_samples=1).item()

        generated_ids.append(next_id)

        # Stop if <end_of_circuit> is generated
        if stop_id is not None and next_id == stop_id:
            break

    # === Decode circuit tokens (excluding graph) ===
    generated_tokens = [itos[i] for i in generated_ids]
    for stop_token in ("<end_of_circuit>", "<pad>"):
        if stop_token in generated_tokens:
            generated_tokens = generated_tokens[:generated_tokens.index(stop_token)]
    generated_circuit_tokens = generated_tokens[len(input_ids):]
    return generated_circuit_tokens


def generate_long_circuit_compiled(
    model,
    config,
    graph_tokens,
    stoi,
    itos,
    max_total_tokens=6000,
    temperature=1.0,
    top_k=10
):
    pad_id = config.pad_token_id
    stop_id = config.stop_token_id
    block_size = config.block_size
    device = next(model.parameters()).device

    print(f"device: {device}")

    # Encode input graph
    input_ids = [stoi.get(tok, stoi["<unk>"]) for tok in graph_tokens]
    generated_ids = input_ids[:]

    while len(generated_ids) < max_total_tokens:
        # Take last `block_size` tokens as context
        idx_cond = torch.tensor([generated_ids[-block_size:]], dtype=torch.long, device=device)

        # Optional: stop early if all pad tokens (unlikely with real data)
        if torch.all(idx_cond == pad_id):
            break

        logits, _ = model(idx_cond)
        logits = logits[:, -1, :]  # only last token
        logits /= temperature

        if top_k is not None:
            k = min(top_k, logits.size(-1))
            v, _ = torch.topk(logits, k)
            threshold = v[:, [-1]]
            logits_masked = logits.clone()
            logits_masked[logits < threshold] = -float("Inf")

            # Avoid fallback logic for performance; assume model is healthy
            logits = logits_masked

        probs = F.softmax(logits, dim=-1)
        next_id = torch.multinomial(probs, num_samples=1)[0, 0].item()
        generated_ids.append(next_id)

        # Stop if end token generated
        if stop_id is not None and next_id == stop_id:
            break

    # Decode tokens, skip graph
    generated_tokens = [itos[i] for i in generated_ids]
    for stop_token in ("<end_of_circuit>", "<pad>"):
        if stop_token in generated_tokens:
            generated_tokens = generated_tokens[:generated_tokens.index(stop_token)]

    return generated_tokens[len(input_ids):]


def generate_long_circuit_compiled_cached(
    model,
    config,
    graph_tokens,
    stoi,
    itos,
    max_total_tokens=6000,
    temperature=1.0,
    top_k=10
):
    pad_id = config.pad_token_id
    stop_id = config.stop_token_id
    block_size = config.block_size
    device = next(model.parameters()).device

    print(f"device: {device}")

    # Encode input graph
    input_ids = [stoi.get(tok, stoi["<unk>"]) for tok in graph_tokens]
    generated_ids = input_ids[:]

    # Track state across tokens
    past_key_values = None
    input_tensor = torch.tensor([input_ids], dtype=torch.long, device=device)

    # Prime the cache with the full input graph
    with torch.no_grad():
        logits, _, past_key_values = model(input_tensor, use_cache=True)

    while len(generated_ids) < max_total_tokens:
        idx_cond = torch.tensor([[generated_ids[-1]]], dtype=torch.long, device=device)

        with torch.no_grad():
            logits, _, past_key_values = model(
                idx_cond, past_key_values=past_key_values, use_cache=True
            )
            logits = logits[:, -1, :] / temperature

            if top_k is not None:
                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
                logits[logits < v[:, [-1]]] = -float('Inf')

            probs = F.softmax(logits, dim=-1)
            next_id = torch.multinomial(probs, num_samples=1)[0, 0].item()

        generated_ids.append(next_id)

        if stop_id is not None and next_id == stop_id:
            break

    # === Decode output ===
    generated_tokens = [itos[i] for i in generated_ids]
    for stop_token in ("<end_of_circuit>", "<pad>"):
        if stop_token in generated_tokens:
            generated_tokens = generated_tokens[:generated_tokens.index(stop_token)]

    return generated_tokens[len(input_ids):]


# make sure:
# - Ensure all tensors are created on CPU.
# - Ensure all tensors are created on CPU.
# - Make sure model.eval() is called after transferring the model to CPU.
def generate_long_circuit_cpu(
    model,
    config,
    graph_tokens,
    stoi,
    itos,
    max_total_tokens=6000,
    temperature=1.0,
    top_k=10
):
    pad_id = config.pad_token_id
    stop_id = config.stop_token_id
    block_size = config.block_size
    device = torch.device("cpu")

    model = model.to(device)
    model.eval()

    print(f"device: {device}")

    # Encode input graph
    input_ids = [stoi.get(tok, stoi["<unk>"]) for tok in graph_tokens]
    generated_ids = input_ids[:]

    # Track state across tokens
    past_key_values = None
    input_tensor = torch.tensor([input_ids], dtype=torch.long, device=device)

    # Prime the cache with the full input graph
    with torch.no_grad():
        logits, _, past_key_values = model(input_tensor, use_cache=True)

    while len(generated_ids) < max_total_tokens:
        idx_cond = torch.tensor([[generated_ids[-1]]], dtype=torch.long, device=device)

        with torch.no_grad():
            logits, _, past_key_values = model(
                idx_cond, past_key_values=past_key_values, use_cache=True
            )
            logits = logits[:, -1, :] / temperature

            if top_k is not None:
                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
                logits[logits < v[:, [-1]]] = -float('Inf')

            probs = F.softmax(logits, dim=-1)
            next_id = torch.multinomial(probs, num_samples=1)[0, 0].item()

        generated_ids.append(next_id)

        if stop_id is not None and next_id == stop_id:
            break

    # === Decode output ===
    generated_tokens = [itos[i] for i in generated_ids]
    for stop_token in ("<end_of_circuit>", "<pad>"):
        if stop_token in generated_tokens:
            generated_tokens = generated_tokens[:generated_tokens.index(stop_token)]

    return generated_tokens[len(input_ids):]


def warmup_model(model, input_len: int, device: str = "cuda"):
    """Run a warm-up pass with fixed input length to compile the graph once."""
    dummy_input = torch.randint(0, model.config.vocab_size, (1, input_len), device=device)
    with torch.inference_mode():
        _ = model(dummy_input, use_cache=True)


def generate_batch(graphs_batch, model, gptconf, stoi, itos, max_total_tokens, device):
    if device != "cuda":
        print("Device should be equal to 'cuda'")
        return None, None

    print("Warm-up model")
    start_t = time.time()
    warmup_model(model, input_len=len(graph_to_tokens(graphs_batch[0])), device=device)
    end_t = time.time()
    print(f"Elapsed time for warm-up: {end_t - start_t} secs")

    times = []
    results = []
    for graph in tqdm.tqdm(graphs_batch):
        graph_tokens = graph_to_tokens(graph)
        start_t = time.time()
        generated_circuit_tokens = generate_long_circuit_compiled_cached(
            model,
            gptconf,
            graph_tokens,
            stoi,
            itos,
            max_total_tokens=max_total_tokens,
            temperature=0.5,
            top_k=None
        )
        end_t = time.time()
        gen_t = end_t - start_t
        times.append(gen_t)
        results.append(generated_circuit_tokens)
        print(f"Generation time: {gen_t} secs, generated QAOA Circuit length: {len(generated_circuit_tokens)}")

    return results, times


def char_accuracy(pred, target):
    min_len = min(len(pred), len(target))
    correct = sum(p == t for p, t in zip(pred[:min_len], target[:min_len]))
    return correct / min_len


def token_accuracy(pred_ids, target_ids):
    min_len = min(len(pred_ids), len(target_ids))
    correct = sum(p == t for p, t in zip(pred_ids[:min_len], target_ids[:min_len]))
    return correct / min_len


def levenshtein_distance(pred, target):
    distance = Levenshtein.distance(pred, target)
    norm_distance = distance / max(len(pred), len(target))
    return distance, norm_distance


def bleu_score(references, candidate):
    bleu = sentence_bleu(references, candidate)
    return bleu


def save_seq_tokens_to_file(tokens: List[str], filename: str) -> None:
    with open(filename, "wb") as file:
        pickle.dump(tokens, file)


def load_seq_tokens_from_file(filename: str) -> List[str]:
    with open(filename, "rb") as file:
        loaded_list = pickle.load(file)
    return loaded_list

# Main part

### Load graphs and circuits

In [4]:
!ls -la ../data/graph_and_circuits

total 88
drwxr-xr-x 7 jovyan users  4096 Jul 11 02:57 .
drwxr-xr-x 5 jovyan users  4096 May 29 07:10 ..
drwxr-xr-x 2 jovyan users  4096 Jun 28 01:02 .ipynb_checkpoints
drwxr-xr-x 3 jovyan users  4096 Jul 10 17:05 11_eleventh_batch_training
drwxr-xr-x 2 jovyan users  4096 Jul 10 17:05 13_thirtheenth_batch_training
-rw-r--r-- 1 jovyan users 39837 Jul 11 02:57 graphs_07_10_2025_10_38pm.pkl
drwxr-xr-x 3 jovyan users  4096 Jul  8 01:10 graphs_diff_sizes
-rw-r--r-- 1 jovyan users 19891 Jun 29 18:37 random_graphs_for_testing.pkl
drwxr-xr-x 3 jovyan users  4096 Jul  8 00:55 third_batch


In [5]:
# dirname = "../data/graph_and_circuits/third_batch"
dirname = "../data/graph_and_circuits/11_eleventh_batch_training"

In [6]:
dirobj = pathlib.Path(dirname)

graphs_batch_files = sorted(list(dirobj.rglob(f"*pkl")))
circuits_batch_files = sorted(list(dirobj.rglob(f"*qpy")))

graphs_batch_file = graphs_batch_files[0]
circuits_batch_file = circuits_batch_files[0]

print(graphs_batch_file)
print(circuits_batch_file)

../data/graph_and_circuits/11_eleventh_batch_training/graphs11_batch1.pkl
../data/graph_and_circuits/11_eleventh_batch_training/circuits11_batch1.qpy


In [7]:
graphs_batch = load_graphs(graphs_batch_file)

type(graphs_batch), len(graphs_batch), type(graphs_batch[0])

(list, 250, networkx.classes.graph.Graph)

In [8]:
circuits_batch = load_circuits(circuits_batch_file)

type(circuits_batch), len(circuits_batch), type(circuits_batch[0])

(list, 250, qiskit.circuit.quantumcircuit.QuantumCircuit)

In [11]:
gate_counts = circuits_batch[0].count_ops()
print(gate_counts)
sum(gate_counts.values())

OrderedDict([('rzz', 525), ('rxx', 525), ('ryy', 525), ('rz', 75), ('measure', 15), ('initialize', 1), ('barrier', 1)])


1667

### Load random 15-nodes graphs for testing

In [5]:
!ls -la "../data/graph_and_circuits/"

total 88
drwxr-xr-x 7 jovyan users  4096 Jul 11 02:57 .
drwxr-xr-x 5 jovyan users  4096 May 29 07:10 ..
drwxr-xr-x 2 jovyan users  4096 Jun 28 01:02 .ipynb_checkpoints
drwxr-xr-x 3 jovyan users  4096 Jul 10 17:05 11_eleventh_batch_training
drwxr-xr-x 2 jovyan users  4096 Jul 10 17:05 13_thirtheenth_batch_training
-rw-r--r-- 1 jovyan users 39837 Jul 11 02:57 graphs_07_10_2025_10_38pm.pkl
drwxr-xr-x 3 jovyan users  4096 Jul  8 01:10 graphs_diff_sizes
-rw-r--r-- 1 jovyan users 19891 Jun 29 18:37 random_graphs_for_testing.pkl
drwxr-xr-x 3 jovyan users  4096 Jul  8 00:55 third_batch


In [6]:
graphs_batch_file = "../data/graph_and_circuits/random_graphs_for_testing.pkl"
graphs_batch = load_graphs(graphs_batch_file)
print(len(graphs_batch), type(graphs_batch), type(graphs_batch[0]))

5 <class 'list'> <class 'networkx.classes.graph.Graph'>


In [7]:
for graph in graphs_batch:
    print(len(graph.nodes))

15
15
15
15
15


### Load random 15-nodes graphs for testing (10 July 10:58pm)


In [8]:
!ls -la "../data/graph_and_circuits/"

total 88
drwxr-xr-x 7 jovyan users  4096 Jul 11 02:57 .
drwxr-xr-x 5 jovyan users  4096 May 29 07:10 ..
drwxr-xr-x 2 jovyan users  4096 Jun 28 01:02 .ipynb_checkpoints
drwxr-xr-x 3 jovyan users  4096 Jul 10 17:05 11_eleventh_batch_training
drwxr-xr-x 2 jovyan users  4096 Jul 10 17:05 13_thirtheenth_batch_training
-rw-r--r-- 1 jovyan users 39837 Jul 11 02:57 graphs_07_10_2025_10_38pm.pkl
drwxr-xr-x 3 jovyan users  4096 Jul  8 01:10 graphs_diff_sizes
-rw-r--r-- 1 jovyan users 19891 Jun 29 18:37 random_graphs_for_testing.pkl
drwxr-xr-x 3 jovyan users  4096 Jul  8 00:55 third_batch


In [9]:
graphs_batch_file = "../data/graph_and_circuits/graphs_07_10_2025_10_38pm.pkl"
graphs_batch = load_graphs(graphs_batch_file)
print(len(graphs_batch), type(graphs_batch), type(graphs_batch[0]))

10 <class 'list'> <class 'networkx.classes.graph.Graph'>


In [10]:
for graph in graphs_batch:
    print(len(graph.nodes))

15
15
15
15
15
15
15
15
15
15


### Load graphs to generation time estimation

In [7]:
!ls -la "../data/graph_and_circuits/graphs_diff_sizes/"

total 864
drwxr-xr-x 2 jovyan users  4096 Jun 28 01:03 .
drwxr-xr-x 5 jovyan users  4096 Jun 29 18:37 ..
-rw-r--r-- 1 jovyan users  9181 Jun 28 01:03 random_graph_5_10.pkl
-rw-r--r-- 1 jovyan users 10991 Jun 28 01:03 random_graph_5_11.pkl
-rw-r--r-- 1 jovyan users 12966 Jun 28 01:03 random_graph_5_12.pkl
-rw-r--r-- 1 jovyan users 15109 Jun 28 01:03 random_graph_5_13.pkl
-rw-r--r-- 1 jovyan users 17417 Jun 28 01:03 random_graph_5_14.pkl
-rw-r--r-- 1 jovyan users 19890 Jun 28 01:03 random_graph_5_15.pkl
-rw-r--r-- 1 jovyan users 22528 Jun 28 01:03 random_graph_5_16.pkl
-rw-r--r-- 1 jovyan users 25331 Jun 28 01:03 random_graph_5_17.pkl
-rw-r--r-- 1 jovyan users 28299 Jun 28 01:03 random_graph_5_18.pkl
-rw-r--r-- 1 jovyan users 31432 Jun 28 01:03 random_graph_5_19.pkl
-rw-r--r-- 1 jovyan users 34730 Jun 28 01:03 random_graph_5_20.pkl
-rw-r--r-- 1 jovyan users 38190 Jun 28 01:03 random_graph_5_21.pkl
-rw-r--r-- 1 jovyan users 41818 Jun 28 01:03 random_graph_5_22.pkl
-rw-r--r-- 1 jovyan user

In [11]:
graphs_sizes = [5, 10, 15, 20, 25, 30]
graphs_batch_files = [f"../data/graph_and_circuits/graphs_diff_sizes/random_graph_5_{n}.pkl" for n in graphs_sizes]
display(graphs_batch_files)

graphs_batches = [load_graphs(graphs_batch_file) for graphs_batch_file in graphs_batch_files]
graphs_batch = graphs_batches[0]
len(graphs_batches), len(graphs_batch), type(graphs_batch), type(graphs_batch[0]), len(graphs_batch[0].nodes)

['../data/graph_and_circuits/graphs_diff_sizes/random_graph_5_5.pkl',
 '../data/graph_and_circuits/graphs_diff_sizes/random_graph_5_10.pkl',
 '../data/graph_and_circuits/graphs_diff_sizes/random_graph_5_15.pkl',
 '../data/graph_and_circuits/graphs_diff_sizes/random_graph_5_20.pkl',
 '../data/graph_and_circuits/graphs_diff_sizes/random_graph_5_25.pkl',
 '../data/graph_and_circuits/graphs_diff_sizes/random_graph_5_30.pkl']

(6, 5, list, networkx.classes.graph.Graph, 5)

### Sample from a trained model

#### Parameters

In [8]:
# !ls -la ../../nanoGPT_qaoa/out-graph-qaoa-100m/
!ls -la ../../nanoGPT_qaoa/out-graph-qaoa-50m/
# !ls -la ../../nanoGPT_qaoa/out-graph-qaoa-20m/

total 1668588
drwxr-xr-x  3 jovyan users      4096 Jul 11 14:20 .
drwxr-xr-x 10 jovyan users      4096 Jul 11 12:25 ..
drwxr-xr-x  2 jovyan users      4096 Jul  8 21:46 .ipynb_checkpoints
-rw-r--r--  1 jovyan users 538138348 Jul 11 14:25 ckpt.pt
-rw-r--r--  1 jovyan users 646641388 Jul 10 16:19 ckpt_50m_20k_with_mu_and_format_v2.pt
-rw-r--r--  1 jovyan users 523097836 Jul  9 19:46 ckpt_50m_20k_without_mu.pt
-rw-r--r--  1 jovyan users    721254 Jul 11 14:20 meta_20k_old_format_and_without_mu.pkl


In [9]:
init_from = "resume"
# out_dir = "../checkpoints"
# out_dir = "../../nanoGPT_qaoa/out-graph-qaoa-100m/"
out_dir = "../../nanoGPT_qaoa/out-graph-qaoa-50m/"
# out_dir = "../../nanoGPT_qaoa/out-graph-qaoa-20m/"
start = "\n" # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt"
seed = 1337

# mps
# device = "mps" # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
# # dtype = "bfloat16" if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else "float16" # 'float32' or 'bfloat16' or 'float16'
# dtype = "float32" # for mac only
# compile = False # use PyTorch 2.0 to compile the model to be faster

# cpu
# device = "cpu"    # run everything on CPU
# dtype = "float32" # recommended for CPU (no bfloat16 or float16 support)
# compile = False   # don't compile on CPU (PyTorch 2.0 compile gives little to no speedup or might even slow things down)

# cuda
device = "cuda" # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
dtype = "float32" # for mac only
compile = True # use PyTorch 2.0 to compile the model to be faster

#### Prepare torch and device

In [10]:
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
device_type = "cuda" if "cuda" in device else "cpu" # for later use in torch.autocast
ptdtype = {"float32": torch.float32, "bfloat16": torch.bfloat16, "float16": torch.float16}[dtype]
ctx = nullcontext() if device_type == "cpu" else torch.amp.autocast(device_type=device_type, dtype=ptdtype)

#### Load meta info

In [11]:
!ls -la ../../nanoGPT_qaoa/data/graph_qaoa/meta*.pkl

-rw-r--r-- 1 jovyan users 769754 Jul 10 22:05 ../../nanoGPT_qaoa/data/graph_qaoa/meta.pkl
-rw-r--r-- 1 jovyan users 653602 Jul 11 15:02 ../../nanoGPT_qaoa/data/graph_qaoa/meta_100m.pkl
-rw-r--r-- 1 jovyan users 721254 Jul 11 14:25 ../../nanoGPT_qaoa/data/graph_qaoa/meta_20k_old_format_and_without_mu.pkl


In [12]:
# meta_path = "../data/train/meta.pkl"
# meta_path = "../../nanoGPT_qaoa/data/graph_qaoa/meta_100m.pkl" # old 100M model
# meta_path = "../../nanoGPT_qaoa/data/graph_qaoa/meta.pkl" # new model
meta_path = "../../nanoGPT_qaoa/data/graph_qaoa/meta_20k_old_format_and_without_mu.pkl" # old model trained on 20k without nodes weights

print(f"Loading meta from {meta_path}...")
with open(meta_path, "rb") as f:
    meta = pickle.load(f)

vocab_size = meta["vocab_size"]
stoi, itos = meta["stoi"], meta["itos"]
pad_token = meta.get("pad_token", "<|pad|>")
pad_token_id = stoi.get(pad_token, 0)
encode = lambda s: [stoi.get(c, pad_token_id) for c in s]
decode = lambda l: "".join([itos.get(i, "") for i in l if i in itos])

print(f"{vocab_size=}")

Loading meta from ../../nanoGPT_qaoa/data/graph_qaoa/meta_20k_old_format_and_without_mu.pkl...
vocab_size=34937


#### Load model

In [13]:
# ckpt_path = os.path.join(out_dir, "ckpt.pt") # new model
ckpt_path = os.path.join(out_dir, "ckpt_50m_20k_without_mu.pt") # old model trained on 20k without nodes weights
checkpoint = torch.load(ckpt_path, map_location=device)
gptconf = GPTConfig(**checkpoint["model_args"])
gptconf.stop_token_id = meta["stop_token_id"]
model = GPT(gptconf)
state_dict = checkpoint["model"]
unwanted_prefix = "_orig_mod."
for k,v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
model.load_state_dict(state_dict)

number of parameters: 43.06M


<All keys matched successfully>

In [14]:
type(model)

model_qaoa_cached.GPT

In [15]:
if device == "cuda":
    # for cuda
    model = model.to(torch.device("cuda"))
    model = model.to(dtype=torch.bfloat16) # optional, for speed
    if compile:
        model = torch.compile(model) # requires PyTorch 2.0 (optional)
        model.eval()
        print("model was compiled successfully")
else:
    # for cpu and mps
    print("model on CPU")
    model.eval()
    model.to(device)

model was compiled successfully


In [16]:
type(model)

torch._dynamo.eval_frame.OptimizedModule

#### Prepare prompt

Use any graph from loaded batch as a test sample:

In [20]:
len(graphs_batch)

10

In [21]:
tmp_g = graphs_batch[0]
for u in tmp_g.nodes:
    print(tmp_g.nodes[u])

{'return_': np.float64(0.9735609521124655)}
{'return_': np.float64(6.500412732811528)}
{'return_': np.float64(1.448465898264445)}
{'return_': np.float64(0.7047703914889547)}
{'return_': np.float64(8.789848381367007)}
{'return_': np.float64(1.7476096289867482)}
{'return_': np.float64(5.854471537792451)}
{'return_': np.float64(1.3033649118656943)}
{'return_': np.float64(9.727996690806933)}
{'return_': np.float64(6.903007524592617)}
{'return_': np.float64(6.61860447340752)}
{'return_': np.float64(6.746837841142661)}
{'return_': np.float64(2.3683161548347753)}
{'return_': np.float64(0.8159069379167927)}
{'return_': np.float64(9.831672007484308)}


In [20]:
sample_idx = 0
graph = graphs_batch[sample_idx]
# graph_tokens = graph_to_tokens_v1(graph) # for the new model
graph_tokens = graph_to_tokens_old_format(graph) # for the old model

# circuit = circuits_batch[sample_idx]        # comment if use random graphs
# circuit_tokens = circuit_to_tokens_v1(circuit) # comment if use random graphs

print(len(graph_tokens))
print(graph_tokens)

212
['<bos>', '(0,1)', '0.20', '(0,2)', '0.59', '(0,3)', '0.68', '(0,4)', '0.74', '(0,5)', '1.00', '(0,6)', '0.38', '(0,7)', '0.89', '(0,8)', '0.45', '(0,9)', '0.01', '(0,10)', '0.88', '(0,11)', '0.39', '(0,12)', '0.03', '(0,13)', '0.44', '(0,14)', '0.34', '(1,2)', '0.74', '(1,3)', '0.74', '(1,4)', '0.04', '(1,5)', '0.81', '(1,6)', '0.51', '(1,7)', '0.20', '(1,8)', '0.14', '(1,9)', '0.65', '(1,10)', '0.40', '(1,11)', '0.58', '(1,12)', '0.21', '(1,13)', '0.67', '(1,14)', '0.43', '(2,3)', '0.40', '(2,4)', '0.92', '(2,5)', '0.40', '(2,6)', '0.11', '(2,7)', '0.90', '(2,8)', '0.03', '(2,9)', '0.06', '(2,10)', '0.63', '(2,11)', '0.77', '(2,12)', '0.84', '(2,13)', '0.53', '(2,14)', '0.09', '(3,4)', '1.00', '(3,5)', '0.68', '(3,6)', '0.54', '(3,7)', '0.30', '(3,8)', '0.88', '(3,9)', '0.86', '(3,10)', '0.32', '(3,11)', '0.81', '(3,12)', '0.94', '(3,13)', '0.56', '(3,14)', '0.01', '(4,5)', '0.70', '(4,6)', '0.49', '(4,7)', '0.53', '(4,8)', '0.34', '(4,9)', '0.63', '(4,10)', '0.92', '(4,11)', '0.

In [None]:
# graph_to_str_v1(graph)

In [24]:
# len(circuit_to_str_v1(circuit)), circuit_to_str_v1(circuit)[:1000]

(55810,
 '<new_layer_p>, rz, [0], -8.89 <new_layer_p>, rz, [1], -8.82 <new_layer_p>, rzz, [0, 1], 1.27 <new_layer_p>, rz, [2], -8.88 <new_layer_p>, rzz, [0, 2], 1.27 <new_layer_p>, rzz, [1, 2], 1.27 <new_layer_p>, rz, [3], -8.82 <new_layer_p>, rzz, [0, 3], 1.27 <new_layer_p>, rzz, [1, 3], 1.27 <new_layer_p>, rzz, [2, 3], 1.27 <new_layer_p>, rz, [4], -8.85 <new_layer_p>, rzz, [0, 4], 1.27 <new_layer_p>, rzz, [1, 4], 1.27 <new_layer_p>, rzz, [2, 4], 1.27 <new_layer_p>, rzz, [3, 4], 1.27 <new_layer_p>, rz, [5], -8.84 <new_layer_p>, rzz, [0, 5], 1.27 <new_layer_p>, rzz, [1, 5], 1.27 <new_layer_p>, rzz, [2, 5], 1.27 <new_layer_p>, rzz, [3, 5], 1.27 <new_layer_p>, rzz, [4, 5], 1.27 <new_layer_p>, rz, [6], -8.87 <new_layer_p>, rzz, [0, 6], 1.27 <new_layer_p>, rzz, [1, 6], 1.27 <new_layer_p>, rzz, [2, 6], 1.27 <new_layer_p>, rzz, [3, 6], 1.27 <new_layer_p>, rzz, [4, 6], 1.27 <new_layer_p>, rzz, [5, 6], 1.27 <new_layer_p>, rz, [7], -8.86 <new_layer_p>, rzz, [0, 7], 1.27 <new_layer_p>, rzz, [1, 

### Run generation (batch)

In [21]:
# For 15-nodes graph the number of tokens in qiskit circuit - 6600
# Therefore, for 5 nodes shoud be approx. (# of gates for 5 nodes / # of gates for 15 nodes) * 6600 and add extra 5%
qokit_number_of_gates = {
    5: 200,   
    10: 500,
    15: 1000,
    20: 1750,
    25: 2600,
    30: 3500,
}

max_total_tokens_dict = {}
for k, v in qokit_number_of_gates.items():
    max_total_tokens_dict[k] = int((v / qokit_number_of_gates[15]) * 6600 * 1.05)

max_total_tokens_dict

{5: 1386, 10: 3465, 15: 6930, 20: 12127, 25: 18018, 30: 24255}

In [23]:
results = {}
for graphs_batch in tqdm.tqdm(graphs_batches):
    n = len(graphs_batch[0].nodes)
    print(f"graphs with {n} nodes are processing")

    max_total_tokens = max_total_tokens_dict[n]    
    print(f"{max_total_tokens=}")

    generated_circuits, times = generate_batch(graphs_batch, model, gptconf, stoi, itos, max_total_tokens, device)
    results[n] = {
        "circuits": generated_circuits,
        "times": times,
    }


  0%|          | 0/6 [00:00<?, ?it/s]

graphs with 5 nodes are processing
max_total_tokens=1386
Warm-up model
Elapsed time for warm-up: 0.004258155822753906 secs



  0%|          | 0/5 [00:00<?, ?it/s][A

device: cuda:0



 20%|██        | 1/5 [00:05<00:22,  5.72s/it][A

Generation time: 5.71644926071167 secs, generated QAOA Circuit length: 1364
device: cuda:0



 40%|████      | 2/5 [00:11<00:17,  5.67s/it][A

Generation time: 5.643535852432251 secs, generated QAOA Circuit length: 1364
device: cuda:0



 60%|██████    | 3/5 [00:17<00:11,  5.67s/it][A

Generation time: 5.670839786529541 secs, generated QAOA Circuit length: 1364
device: cuda:0



 80%|████████  | 4/5 [00:22<00:05,  5.64s/it][A

Generation time: 5.588042259216309 secs, generated QAOA Circuit length: 1364
device: cuda:0



100%|██████████| 5/5 [00:28<00:00,  5.68s/it][A
 17%|█▋        | 1/6 [00:28<02:22, 28.42s/it]

Generation time: 5.781782150268555 secs, generated QAOA Circuit length: 1364
graphs with 10 nodes are processing
max_total_tokens=3465
Warm-up model
Elapsed time for warm-up: 0.004490375518798828 secs



  0%|          | 0/5 [00:00<?, ?it/s][A

device: cuda:0



 20%|██        | 1/5 [00:14<00:56, 14.12s/it][A

Generation time: 14.121472120285034 secs, generated QAOA Circuit length: 3373
device: cuda:0



 40%|████      | 2/5 [00:28<00:41, 14.00s/it][A

Generation time: 13.908310413360596 secs, generated QAOA Circuit length: 3373
device: cuda:0



 60%|██████    | 3/5 [00:42<00:28, 14.02s/it][A

Generation time: 14.043062925338745 secs, generated QAOA Circuit length: 3373
device: cuda:0



 80%|████████  | 4/5 [00:56<00:13, 13.99s/it][A

Generation time: 13.946916103363037 secs, generated QAOA Circuit length: 3373
device: cuda:0



100%|██████████| 5/5 [01:10<00:00, 14.01s/it][A
 33%|███▎      | 2/6 [01:38<03:31, 52.92s/it]

Generation time: 14.03970742225647 secs, generated QAOA Circuit length: 3373
graphs with 15 nodes are processing
max_total_tokens=6930
Warm-up model
Elapsed time for warm-up: 0.0044710636138916016 secs



  0%|          | 0/5 [00:00<?, ?it/s][A

device: cuda:0



 20%|██        | 1/5 [00:27<01:51, 27.76s/it][A

Generation time: 27.761804342269897 secs, generated QAOA Circuit length: 6718
device: cuda:0



 40%|████      | 2/5 [00:55<01:23, 27.84s/it][A

Generation time: 27.889658212661743 secs, generated QAOA Circuit length: 6718
device: cuda:0



 60%|██████    | 3/5 [01:23<00:55, 27.93s/it][A

Generation time: 28.038382053375244 secs, generated QAOA Circuit length: 6718
device: cuda:0



 80%|████████  | 4/5 [01:51<00:27, 27.86s/it][A

Generation time: 27.761962413787842 secs, generated QAOA Circuit length: 6718
device: cuda:0



100%|██████████| 5/5 [02:19<00:00, 27.86s/it][A
 50%|█████     | 3/6 [03:57<04:37, 92.37s/it]

Generation time: 27.836496829986572 secs, generated QAOA Circuit length: 6718
graphs with 20 nodes are processing
max_total_tokens=12127
Warm-up model
Elapsed time for warm-up: 0.004651784896850586 secs



  0%|          | 0/5 [00:00<?, ?it/s][A

device: cuda:0



 20%|██        | 1/5 [00:49<03:17, 49.30s/it][A

Generation time: 49.301159143447876 secs, generated QAOA Circuit length: 11745
device: cuda:0



 40%|████      | 2/5 [01:38<02:27, 49.23s/it][A

Generation time: 49.181010007858276 secs, generated QAOA Circuit length: 11745
device: cuda:0



 60%|██████    | 3/5 [02:27<01:38, 49.19s/it][A

Generation time: 49.12739109992981 secs, generated QAOA Circuit length: 11745
device: cuda:0



 80%|████████  | 4/5 [03:16<00:49, 49.01s/it][A

Generation time: 48.73175859451294 secs, generated QAOA Circuit length: 11745
device: cuda:0



100%|██████████| 5/5 [04:05<00:00, 49.04s/it][A
 67%|██████▋   | 4/6 [08:02<05:05, 152.70s/it]

Generation time: 48.83674144744873 secs, generated QAOA Circuit length: 11745
graphs with 25 nodes are processing
max_total_tokens=18018
Warm-up model
Elapsed time for warm-up: 0.004756927490234375 secs



  0%|          | 0/5 [00:00<?, ?it/s][A

device: cuda:0



 20%|██        | 1/5 [01:12<04:50, 72.73s/it][A

Generation time: 72.73330521583557 secs, generated QAOA Circuit length: 17416
device: cuda:0



 40%|████      | 2/5 [02:25<03:38, 72.81s/it][A

Generation time: 72.85814237594604 secs, generated QAOA Circuit length: 17416
device: cuda:0



 60%|██████    | 3/5 [03:38<02:25, 72.88s/it][A

Generation time: 72.95708799362183 secs, generated QAOA Circuit length: 17416
device: cuda:0



 80%|████████  | 4/5 [04:51<01:12, 72.99s/it][A

Generation time: 73.15130972862244 secs, generated QAOA Circuit length: 17416
device: cuda:0



100%|██████████| 5/5 [06:04<00:00, 72.89s/it][A
 83%|████████▎ | 5/6 [14:07<03:49, 229.07s/it]

Generation time: 72.75522494316101 secs, generated QAOA Circuit length: 17416
graphs with 30 nodes are processing
max_total_tokens=24255
Warm-up model
Elapsed time for warm-up: 0.005433082580566406 secs



  0%|          | 0/5 [00:00<?, ?it/s][A

device: cuda:0



 20%|██        | 1/5 [01:42<06:50, 102.52s/it][A

Generation time: 102.51838946342468 secs, generated QAOA Circuit length: 23383
device: cuda:0



 40%|████      | 2/5 [03:25<05:07, 102.51s/it][A

Generation time: 102.49922108650208 secs, generated QAOA Circuit length: 23383
device: cuda:0



 60%|██████    | 3/5 [05:07<03:25, 102.63s/it][A

Generation time: 102.76785612106323 secs, generated QAOA Circuit length: 23383
device: cuda:0



 80%|████████  | 4/5 [06:50<01:42, 102.81s/it][A

Generation time: 103.07900595664978 secs, generated QAOA Circuit length: 23383
device: cuda:0



100%|██████████| 5/5 [08:34<00:00, 102.84s/it][A
100%|██████████| 6/6 [22:41<00:00, 226.94s/it]

Generation time: 103.30233693122864 secs, generated QAOA Circuit length: 23383





In [27]:
# save times
graphs_sizes = [5, 10, 15, 20, 25, 30]
for n in graphs_sizes:
    print(results[n]["times"])

[5.71644926071167, 5.643535852432251, 5.670839786529541, 5.588042259216309, 5.781782150268555]
[14.121472120285034, 13.908310413360596, 14.043062925338745, 13.946916103363037, 14.03970742225647]
[27.761804342269897, 27.889658212661743, 28.038382053375244, 27.761962413787842, 27.836496829986572]
[49.301159143447876, 49.181010007858276, 49.12739109992981, 48.73175859451294, 48.83674144744873]
[72.73330521583557, 72.85814237594604, 72.95708799362183, 73.15130972862244, 72.75522494316101]
[102.51838946342468, 102.49922108650208, 102.76785612106323, 103.07900595664978, 103.30233693122864]


In [31]:
import pandas as pd

data = {
    "Node Size": [5, 10, 15, 20, 25, 30],
    "Mean Time": [np.mean(results[n]["times"]) for n in graphs_sizes],
    "Standard Deviation": [np.std(results[n]["times"]) for n in graphs_sizes]
}

df = pd.DataFrame(data)
df.to_csv("qpt_generation_time.csv", index=False)
df

Unnamed: 0,Node Size,Mean Time,Standard Deviation
0,5,5.68013,0.065663
1,10,14.011894,0.075768
2,15,27.857661,0.102449
3,20,49.035612,0.215386
4,25,72.891014,0.152651
5,30,102.833362,0.314778


In [34]:
# save generated cirtuits

# def save_seq_tokens_to_file(tokens: List[str], filename: str) -> None:
#     with open(filename, "wb") as file:
#         pickle.dump(tokens, file)

# # generated sequence
# save_seq_tokens_to_file(generated_circuit_tokens, filename_gen)
# generated_circuit_tokens_loaded = load_seq_tokens_from_file(filename_gen)
# any([e1 == e2 for e1, e2 in zip(generated_circuit_tokens, generated_circuit_tokens_loaded)])

graphs_sizes = [5, 10, 15, 20, 25, 30]
for n in graphs_sizes:
    generated_circuits = results[n]["circuits"]
    filename_gen = f"generated_circuit_tokens_nodes_{n:d}_amount_{len(generated_circuits):d}.pkl"
    save_seq_tokens_to_file(generated_circuits, filename_gen)
    generated_circuit_loaded = load_seq_tokens_from_file(filename_gen)
    for c_gen, c_gen_loaded in zip(generated_circuits, generated_circuit_loaded):
        if not any([e1 == e2 for e1, e2 in zip(c_gen, c_gen_loaded)]):
            print(f"Something was wrong during saving")
            break


In [41]:
filename = "generated_circuit_tokens_nodes_15_amount_5.pkl"
tmp = load_seq_tokens_from_file(filename)
len(tmp), len(tmp[1]), tmp[0][:10]

(5,
 6718,
 ['<new_layer_p>',
  'rz',
  '[0]',
  '-9.82',
  '<new_layer_p>',
  'rz',
  '[1]',
  '-9.84',
  '<new_layer_p>',
  'rz'])

#### Run generation (single input)

In [19]:
device

'cuda'

In [26]:
start_t = time.time()
if device == "cuda":
    print("cuda")

    # GPU
    # generated_circuit_tokens = generate_long_circuit_compiled(
    #     model,
    #     gptconf,
    #     graph_tokens,
    #     stoi,
    #     itos,
    #     max_total_tokens=7000,
    #     temperature=0.5,
    #     top_k=None
    # )

    # GPU - cached
    generated_circuit_tokens = generate_long_circuit_compiled_cached(
        model,
        gptconf,
        graph_tokens,
        stoi,
        itos,
        max_total_tokens=7000, # 15 nodes graph and format v1
        # max_total_tokens=12000, # 15 nodes graph and format v2
        temperature=0.5,
        top_k=None
    )

else:
    print("cpu")

    # CPU
    # generated_circuit_tokens = generate_long_circuit(
    #     model,
    #     graph_tokens,
    #     stoi,
    #     itos,
    #     max_total_tokens=7000,
    #     temperature=0.5,
    #     top_k=None
    # )

    # CPU - cached
    generated_circuit_tokens = generate_long_circuit_cpu(
        model,
        gptconf,
        graph_tokens,
        stoi,
        itos,
        max_total_tokens=7000, # 15 nodes graph and format v1
        # max_total_tokens=12000, # 15 nodes graph and format v2
        temperature=0.5,
        top_k=None
    )
    
end_t = time.time()

print(f"Generation time: {end_t - start_t} secs")

print(f"Generated QAOA Circuit (length: {len(generated_circuit_tokens)}):")
print(" ".join(generated_circuit_tokens))

cuda
device: cuda:0
Generation time: 40.35027718544006 secs
Generated QAOA Circuit (length: 6788):
<new_layer_p> rz [13] -9.29 <new_layer_p> rz [14] 207.99 <new_layer_p> rzz [11, 12] 0.25 (11,14) 0.70 (11,14) 0.61 (4,7) 0.82 <end_of_graph> <new_layer_p> rzz [4, 5] 0.33 (4,7) 0.70 <end_of_graph> <new_layer_p> rzz [12, 13] 1.77 <new_layer_p> rzz [1, 3] 13.58 <new_layer_p> rzz [2, 6] 173.56 <new_layer_p> rxx [5, 12] 0.92 <new_layer_p> rzz [0, 2] 15.05 <new_layer_p> rzz [0, 13] 209.79 <new_layer_p> rzz [0, 11] 0.14 (12,14) 0.80 <end_of_graph> <new_layer_p> rzz [9, 10] 191.81 <new_layer_p> rzz [4, 10] 0.90 (4,7) 0.14 (11,13) 0.32 (7,10) 0.32 (13,14) 0.65 (4,7) 0.47 (4,7) 0.34 (4,5) 0.08 (4,7) 0.61 (4,7) 0.25 (11,14) 0.25 (1,13) 0.97 (13,14) 0.40 (10,14) 0.72 (13,14) 0.65 (10,14) 0.55 (13,14) 0.93 <new_layer_p> rzz [5, 14] rxx [2, 13] 0.82 <end_of_graph> <new_layer_p> ryy [4, 5] 0.22 (13,14) 0.65 (3,8) 0.69 <end_of_graph> <new_layer_p> rzz [5, 10] 0.65 (7,12) <new_layer_p> rzz [4, 8] 0.79 <e

In [23]:
end_of_graph = 0
brackets = 0
for tok in generated_circuit_tokens:
    if tok == "<end_of_graph>":
        end_of_graph += 1
    elif tok[0] == "(":
        brackets += 1

end_of_graph, brackets

(0, 0)

In [27]:
import copy

generated_circuit_tokens_cached = copy.deepcopy(generated_circuit_tokens)
len(generated_circuit_tokens_cached)

6788

In [28]:
generated_circuit_tokens_cached[:20]

['<new_layer_p>',
 'rz',
 '[13]',
 '-9.29',
 '<new_layer_p>',
 'rz',
 '[14]',
 '207.99',
 '<new_layer_p>',
 'rzz',
 '[11, 12]',
 '0.25',
 '(11,14)',
 '0.70',
 '(11,14)',
 '0.61',
 '(4,7)',
 '0.82',
 '<end_of_graph>',
 '<new_layer_p>']

In [25]:
import copy

generated_circuit_tokens_not_cached = copy.deepcopy(generated_circuit_tokens)
len(generated_circuit_tokens_not_cached)

6788

In [29]:
generated_circuit_tokens_not_cached[:20]

['<new_layer_p>',
 'rz',
 '[0]',
 '-9.51',
 '<new_layer_p>',
 'rz',
 '[1]',
 '-9.45',
 '<new_layer_p>',
 'rz',
 '[2]',
 '-9.54',
 '<new_layer_p>',
 'rz',
 '[3]',
 '-9.51',
 '<new_layer_p>',
 'rz',
 '[4]',
 '-9.54']

In [31]:
limit_tok = min(len(generated_circuit_tokens_not_cached), len(generated_circuit_tokens_cached))

generated_circuit_str_not_cached = " ".join(generated_circuit_tokens_not_cached)
generated_circuit_str_cached = " ".join(generated_circuit_tokens_cached)

print("Char accuracy:", char_accuracy(generated_circuit_str_not_cached, generated_circuit_str_cached))
print("Token accuracy:", token_accuracy(generated_circuit_str_cached[:limit_tok], generated_circuit_str_not_cached[:limit_tok]))

distance, norm_distance = levenshtein_distance(generated_circuit_str_not_cached, generated_circuit_str_cached)
print(f"Levenshtein distance: {distance}, Normalized distance: {norm_distance}")

# # Tokenize the strings
references = [circuit_tokens[:limit_tok]] # list of references
candidate = generated_circuit_tokens[:limit_tok]
print(f"BLEU score: {bleu_score(references, candidate)}")

Char accuracy: 0.061121782558586246
Token accuracy: 0.04286977018267531
Levenshtein distance: 12539, Normalized distance: 0.23857452718901023


In [48]:
limit_str = 44000
limit_tok = 5500

generated_circuit_str = " ".join(generated_circuit_tokens)
generated_circuit_str = generated_circuit_str[:limit_str]

print(f"Sequences lenghts (str): {len(generated_circuit_str)}")
print(f"Sequences lenghts (tok): {len(generated_circuit_tokens)}")

Sequences lenghts (str): 44000
Sequences lenghts (tok): 6788


#### Generated result evaluation

In [28]:
limit_str = 44000
limit_tok = 5500
generated_circuit_str = " ".join(generated_circuit_tokens)
ground_truth_circuit_str = " ".join(circuit_tokens)

print(f"Sequences lenghts (str): {len(generated_circuit_str), len(ground_truth_circuit_str)}")
print(f"Sequences lenghts (tok): {len(generated_circuit_tokens), len(circuit_tokens)}")

generated_circuit_str = generated_circuit_str[:limit_str]
ground_truth_circuit_str = ground_truth_circuit_str[:limit_str]

len(generated_circuit_str), len(ground_truth_circuit_str)

Sequences lenghts (str): (46297, 50876)
Sequences lenghts (tok): (6770, 6601)


(44000, 44000)

In [29]:
print("Char accuracy:", char_accuracy(generated_circuit_str, ground_truth_circuit_str))
print("Token accuracy:", token_accuracy(generated_circuit_tokens[:limit_tok], circuit_tokens[:limit_tok]))

distance, norm_distance = levenshtein_distance(generated_circuit_str, ground_truth_circuit_str)
print(f"Levenshtein distance: {distance}, Normalized distance: {norm_distance}")

# Tokenize the strings
references = [circuit_tokens[:limit_tok]] # list of references
candidate = generated_circuit_tokens[:limit_tok]
print(f"BLEU score: {bleu_score(references, candidate)}")

Char accuracy: 0.05402272727272727
Token accuracy: 0.1410909090909091
Levenshtein distance: 16774, Normalized distance: 0.38122727272727275
BLEU score: 0.027566233560436897


#### Save generated and groud trouth sequences

In [70]:
# len(generated_circuit_tokens), len(circuit_tokens)

In [49]:
filename_gen = f"generated_circuit_tokens_{sample_idx:d}.pkl"
# filename_gt = f"circuit_tokens_{sample_idx:3d}.pkl"

filename_gen#, filename_gt

'generated_circuit_tokens_2.pkl'

In [50]:
# generated sequence
save_seq_tokens_to_file(generated_circuit_tokens, filename_gen)
generated_circuit_tokens_loaded = load_seq_tokens_from_file(filename_gen)
any([e1 == e2 for e1, e2 in zip(generated_circuit_tokens, generated_circuit_tokens_loaded)])

True

In [72]:
# # ground truth sequence
# save_seq_tokens_to_file(circuit_tokens, filename_gt)
# circuit_tokens_loaded = load_seq_tokens_from_file(filename_gt)
# any([e1 == e2 for e1, e2 in zip(circuit_tokens, circuit_tokens_loaded)])

## Free memory

In [48]:
import gc
gc.collect()

12175