In [1]:
import os, sys

print("CWD:", os.getcwd())
print("Files here:", os.listdir())

CWD: /Users/dylanthomas/PycharmProjects/phys139-project
Files here: ['best_model.h5', 'LICENSE', 'best_model.keras', 'plotting.py', 'checkpoints', 'checkpoints_backup', 'MWPM PyMatching.py', '__pycache__', 'model backups', 'README.md', '.gitignore', 'circuit_model.py', 'Train', 'db', 'GenerateDatasets.ipynb', '.git', '.vscode', 'source-model', 'database_io.py', 'simple_model.ipynb', 'data_example.ipynb', '.idea', 'resources.txt']


In [4]:
import sqlite3
import numpy as np
from circuit_model import SurfaceCode, convert_simple

base_error = 4.8e-4

def GenerateCycleSets(k_min,
                      k_max,
                      num_samples,
                      distance=3,
                      pqx=base_error, pqy=base_error, pqz=base_error,
                      pax=base_error, pay=base_error, paz=base_error,
                      pm=0.0014,
                      db_dir="db/"):
    """
    Generates AND SAVES test datasets for SurfaceCode with n_max = 2^(k/2) cycles.
    Each dataset is saved as a separate SQLite database:
        surf_test_2^(k/2).db
    """

    # loop over desired powers
    for k in range(k_min, k_max + 1):
        n_max = int(2 ** (k/2))
        print(f"\n=== Generating test dataset for n_max = {n_max} cycles ===")

        # create circuit model instance
        surf = SurfaceCode(
            seed=0,
            distance=distance,
            pqx=pqx, pqy=pqy, pqz=pqz,
            pax=pax, pay=pay, paz=paz,
            pm=pm
        )

        # prepare database filename
        cycle_dir = os.path.join(db_dir, "variable_cycles")
        os.makedirs(cycle_dir, exist_ok=True)

        fname = f"surf_test_{n_max}.db"
        db_path = os.path.join(cycle_dir, fname)


        print(f"Saving dataset to {db_path}")

        # create SQLite db
        conn = sqlite3.connect(db_path)
        c = conn.cursor()

        # wipe old tables
        c.execute("DROP TABLE IF EXISTS data")
        c.execute("DROP TABLE IF EXISTS info")
        conn.commit()

        # create info table
        c.execute("""
            CREATE TABLE info (error_model_gitv, distance, pqx, pqy, pqz,
                               pax, pay, paz, pm)
        """)

        # git version always unknown in this context
        entries = [(0, distance, pqx, pqy, pqz, pax, pay, paz, pm)]
        c.executemany("INSERT INTO info VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", entries)

        # create TEST table format:
        # (seed, syndromes, events, fstabs, err_signal, parities)
        c.execute("""
            CREATE TABLE data (
                seed INTEGER PRIMARY KEY,
                syndromes BLOB,
                events BLOB,
                fstabs BLOB,
                err_signal BLOB,
                parities BLOB
            )
        """)

        # generate test data
        rows = []
        for i in range(num_samples):
            seed = 2 * 10**8 + i  # consistent with testing mode
            run = surf.make_run(seed=seed, n_steps=n_max, condensed=True)

            # unpack run
            seed_val, synd, events, fstabs, errsig, pars = run

            # convert arrays to binary blobs for storage
            rows.append((
                int(seed_val),
                synd.tobytes(),
                events.tobytes(),
                fstabs.tobytes(),
                errsig.tobytes(),
                pars.tobytes()
            ))

            if (i + 1) % 500 == 0:
                print(f"  Generated {i+1}/{num_samples} runs...")

        # insert into db
        c.executemany(
            "INSERT OR REPLACE INTO data VALUES (?, ?, ?, ?, ?, ?)",
            rows
        )

        conn.commit()
        conn.close()

        print(f"✔ Completed dataset for n_max = {n_max} cycles.")


In [None]:
def Generate_PyRatio_Sets(n_max = 300,
                      num_samples=2000,
                      distance=3,
                      pqx=base_error, pqz=base_error,
                      pax=base_error, paz=base_error,
                      pm=0.0014,
                      db_dir="db/"):
    """
    Generates AND SAVES test datasets for SurfaceCode with n_max cycles.
    Each dataset is saved as a separate SQLite database:
        surf_test_Pyratios.db
    """

    ratio_values = [0.0, 0.5, 1.0, 1.5, 2.0]

    for ratio in ratio_values:
        print(f"\n=== Generating test dataset for py = {ratio} * px,pz ===")

        # define per-ratio error parameters
        pqy_ratio = ratio * pqx
        pay_ratio = ratio * pax
        # create circuit model instance
        surf = SurfaceCode(
            seed=0,
            distance=distance,
            pqx=pqx, pqy=pqy_ratio, pqz=pqz,
            pax=pax, pay=pay_ratio, paz=paz,
            pm=pm
        )

        # prepare database filename
        cycle_dir = os.path.join(db_dir, "variable_ratio")
        os.makedirs(cycle_dir, exist_ok=True)

        fname = f"surf_test_py{ratio}.db"
        db_path = os.path.join(cycle_dir, fname)


        print(f"Saving dataset to {db_path}")

        # create SQLite db
        conn = sqlite3.connect(db_path)
        c = conn.cursor()

        # wipe old tables
        c.execute("DROP TABLE IF EXISTS data")
        c.execute("DROP TABLE IF EXISTS info")
        conn.commit()

        # create info table
        c.execute("""
            CREATE TABLE info (error_model_gitv, distance, pqx, pqy, pqz,
                               pax, pay, paz, pm)
        """)

        # git version always unknown in this context
        entries = [(0, distance, pqx, pqy, pqz, pax, pay, paz, pm)]
        c.executemany("INSERT INTO info VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", entries)

        # create TEST table format:
        # (seed, syndromes, events, fstabs, err_signal, parities)
        c.execute("""
            CREATE TABLE data (
                seed INTEGER PRIMARY KEY,
                syndromes BLOB,
                events BLOB,
                fstabs BLOB,
                err_signal BLOB,
                parities BLOB
            )
        """)

        # generate test data
        rows = []
        for i in range(num_samples):
            seed = 2 * 10**8 + i  # consistent with testing mode
            run = surf.make_run(seed=seed, n_steps=n_max, condensed=True)

            # unpack run
            seed_val, synd, events, fstabs, errsig, pars = run

            # convert arrays to binary blobs for storage
            rows.append((
                int(seed_val),
                synd.tobytes(),
                events.tobytes(),
                fstabs.tobytes(),
                errsig.tobytes(),
                pars.tobytes()
            ))

            if (i + 1) % 500 == 0:
                print(f"  Generated {i+1}/{num_samples} runs...")

        # insert into db
        c.executemany(
            "INSERT OR REPLACE INTO data VALUES (?, ?, ?, ?, ?, ?)",
            rows
        )

        conn.commit()
        conn.close()

        print(f"✔ Completed dataset for n_max = {n_max} cycles.")

In [5]:
# try:
#     conn.close()
# except NameError:
#     pass
#
#
# GenerateCycleSets(k_min = 2, k_max = 18,
#                       num_samples=int(1e4),
#                       distance=3)


=== Generating test dataset for n_max = 2 cycles ===
Saving dataset to db/variable_cycles/surf_test_2.db
  Generated 500/10000 runs...
  Generated 1000/10000 runs...
  Generated 1500/10000 runs...
  Generated 2000/10000 runs...
  Generated 2500/10000 runs...
  Generated 3000/10000 runs...
  Generated 3500/10000 runs...
  Generated 4000/10000 runs...
  Generated 4500/10000 runs...
  Generated 5000/10000 runs...
  Generated 5500/10000 runs...
  Generated 6000/10000 runs...
  Generated 6500/10000 runs...
  Generated 7000/10000 runs...
  Generated 7500/10000 runs...
  Generated 8000/10000 runs...
  Generated 8500/10000 runs...
  Generated 9000/10000 runs...
  Generated 9500/10000 runs...
  Generated 10000/10000 runs...
✔ Completed dataset for n_max = 2 cycles.

=== Generating test dataset for n_max = 2 cycles ===
Saving dataset to db/variable_cycles/surf_test_2.db
  Generated 500/10000 runs...
  Generated 1000/10000 runs...
  Generated 1500/10000 runs...
  Generated 2000/10000 runs...
  G

In [None]:
try:
    conn.close()
except NameError:
    pass


Generate_PyRatio_Sets(n_max = 100,
                      num_samples=int(1e4),
                      distance=3)