In [1]:
import postprocessing as pp
import numpy as np
import time
import math

In [2]:
def waste_ratio(old_data, new_data):
    return 1 - len(new_data)/len(old_data)

def throughput(time, new_data):
    return len(new_data) / time / 1e6 #Mb/s

def read_file(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        data = f.read().strip()  # Read the file and remove any surrounding whitespace

        # Collect digits until the first non-digit character is encountered
        digits = []
        for c in data:
            if c.isdigit():
                digits.append(int(c))
            else:
                break  # Stop at the first non-digit character

        return np.array(digits, dtype=np.uint8)
parity_blocksize = 4

In [3]:
# Get datasets
# dataset_tags stores names of dataset, combine with dictionary to minimize clutter
# Each dataset tag represent the type of data
dataset_tags = ["IBM_Brisbane", "SimMethod1", "SimMethod2", "SimMethod3", "SimMethod4"]
datapaths = ["../Data/Feb5_brisbane_30bits_1024shots_30chunks_2mods.txt", 
             "../Data/Feb11_simulator_method1_100000qubits_1024shots_30chunkSize_3mods.txt", 
             "../Data/Feb11_simulator_method2_100000qubits_1024shots_30chunkSize_3mods.txt",
             "../../SC-Quantathon-2024/renamed-datasets/concatenation_sherbrooke_1k.txt",
             "../../SC-Quantathon-2024/renamed-datasets/iteration_sherbrooke_1k.txt"]
datasets = {}

for i, tag in enumerate(dataset_tags):
    datasets[tag] = read_file(datapaths[i])


In [4]:
# Measure before entropy
before_entropy = {}
for tag in dataset_tags:
    before_entropy[tag] = pp.shannon_entropy(datasets[tag].copy())

In [5]:
# Apply transformations, and measure the throughput

# Get toeplitz matrix

# block_size = 
# toeplitz_size = dataset_size
# first_row = np.random.randint(0, 2, toeplitz_size)
# first_column = np.random.randint(0, 2, toeplitz_size)
# toeplitz_matrix = pp.create_toeplitz(first_row, first_column)
neumann_datasets = {}
toeplitz_datasets = {}
FFT_datasets = {}
parity_datasets = {}
# Use average across all datasets for throughput
# Throughput is current just the time it takes
timed_throughput = [0, 0, 0, 0]
waste_ratios = [0, 0, 0, 0]

for tag in dataset_tags:
    # Manually apply each method
    t0 = time.time()
    neumann_datasets[tag] = pp.apply_von_neumann_extractor(datasets[tag].copy())
    t1 = time.time()
    timed_throughput[0] += throughput(t1 - t0, neumann_datasets[tag].copy())
    waste_ratios[0] += waste_ratio(datasets[tag].copy(), neumann_datasets[tag].copy())


    t0 = time.time()
    toeplitz_datasets[tag] = pp.apply_toeplitz_transformation(datasets[tag].copy())
    t1 = time.time()
    timed_throughput[1] += throughput(t1 - t0, toeplitz_datasets[tag].copy())
    waste_ratios[1] += waste_ratio(datasets[tag].copy(), toeplitz_datasets[tag].copy())

    t0 = time.time()
    FFT_datasets[tag] = pp.apply_fft_toeplitz(datasets[tag].copy())
    t1 = time.time()
    timed_throughput[2] += throughput(t1 - t0, FFT_datasets[tag].copy())
    waste_ratios[2] += waste_ratio(datasets[tag].copy(), FFT_datasets[tag].copy())

    t0 = time.time()
    parity_datasets[tag] = pp.apply_parity_extractor(datasets[tag].copy(), blocksize=parity_blocksize)
    t1 = time.time()
    timed_throughput[3] += throughput(t1 - t0, parity_datasets[tag].copy())
    waste_ratios[3] += waste_ratio(datasets[tag].copy(), parity_datasets[tag].copy())

for i in range(0,4):
    timed_throughput[i] /= len(dataset_tags)
    waste_ratios[i] /= len(dataset_tags)
    





In [6]:
# Measure entropy of each applied transform
# neumann_datasets = {}
# toeplitz_datasets = {}
# FFT_datasets = {}
# parity_datasets = {}

neumann_entropy = {}
toeplitz_entropy = {}
FFT_entropy = {}
parity_entropy = {}

for tag in dataset_tags:
    neumann_entropy[tag] = pp.shannon_entropy(neumann_datasets[tag].copy())
    toeplitz_entropy[tag] = pp.shannon_entropy(toeplitz_datasets[tag].copy())
    FFT_entropy[tag] = pp.shannon_entropy(FFT_datasets[tag].copy())
    parity_entropy[tag] = pp.shannon_entropy(parity_datasets[tag].copy())

In [8]:
# Display results
def entropy_as_log(f):
    if 1.0 - f < 1e-50:
        return 0
    else:
        return -math.log10(1-f)
    
print("Method\t\t", end="")
for tag in dataset_tags:
    print(f"{tag}\t\t", end="")
print("\tWasteRatio\t\t\tThroughput")
print("Control\t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(before_entropy[tag]):.7f}\t\t", end="")
print("\t0.0000000\t\t", end="")
print("\tN/A\t\t", end="")
# print(f"{timed_throughput[0]}:.7f\t\t")
print('\n')

print("Von Neumann\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(neumann_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[0]:.7f}\t\t", end="")
print(f"\t{timed_throughput[0]:.7f}\t\t", end="")
print('\n')

print("Toeplitz\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(toeplitz_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[1]:.7f}\t\t", end="")
print(f"\t{timed_throughput[1]:.7f}\t\t", end="")
print('\n')

print("FFT   \t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(FFT_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[2]:.7f}\t\t", end="")
print(f"\t{timed_throughput[2]:.7f}\t\t", end="")
print('\n')

print("Parity\t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(parity_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[3]:.7f}\t\t", end="")
print(f"\t{timed_throughput[3]:.7f}\t\t", end="")
print('\n')


Method		IBM_Brisbane		SimMethod1		SimMethod2		SimMethod3		SimMethod4			WasteRatio			Throughput
Control		7.5603954		5.4336382		5.1647533		2.4765024		2.5567935			0.0000000			N/A		

Von Neumann	7.9158515		4.4332409		4.3591345		2.1766440		2.0723484			0.7537132			2.1688472		

Toeplitz	3.9944849		6.6452014		6.2140017		4.8423501		3.5358973			0.0417280			2.2902953		

FFT   		3.9709439		4.9684018		4.6057797		2.1972750		2.8867740			0.0417280			2.0816037		

Parity		5.9125778		5.0292495		7.7336155		2.4268146		3.7335970			0.7500000			28.6013027		

