# Postprocessing Comparison on Toy Datasets
This file runs our postprocessing methods on toy datasets and outputs the entropy, waste ratio, and throughput. The methods are defined in `postprocessing.py`. 

In [1]:
import postprocessing as pp
import numpy as np
import time
import math

In [2]:
#Waste ratio is the portion of input data destroyed during processing
def waste_ratio(old_data, new_data):
    return 1 - len(new_data)/len(old_data)

#Note: For methods that destroy input data, throughput is based on the amount of output data, not the amount of input
def throughput(time, new_data):
    return len(new_data) / time / 1e6 #Mb/s

def read_file(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        digits = []
        for line in f:
            line = line.strip()
            if line and line[0].isdigit(): 
                digits += line
        if len(digits) == 0:
            print(f"Invalid file: {filepath}")
        return np.array(digits, dtype=np.uint8)
parity_blocksize = 4

In [3]:
# Get datasets
# dataset_tags stores names of dataset, combine with dictionary to minimize clutter
# Each dataset tag represent the type of data
dataset_size = 10000000
dataset_tags = ["classical_low_data", "classical_high_data", "quantum_data"]
datasets = {}

# Manual entry of each of your datasets, ensure they match with tag
classical_low_data = pp.generate_low_entropy_array(dataset_size)
classical_high_data = pp.generate_high_entropy_array(dataset_size)
quantum_data = pp.generate_qrng_data(dataset_size)

datasets[dataset_tags[0]] = classical_low_data
datasets[dataset_tags[1]] = classical_high_data
datasets[dataset_tags[2]] = quantum_data

In [4]:
# Measure before entropy
before_entropy = {}
for tag in dataset_tags:
    before_entropy[tag] = pp.shannon_entropy(datasets[tag].copy())

In [5]:
# Apply transformations, and measure the throughput
neumann_datasets = {}
toeplitz_datasets = {}
FFT_datasets = {}
parity_datasets = {}
# Use average across all datasets for throughput
timed_throughput = [0, 0, 0, 0]
waste_ratios = [0, 0, 0, 0]

for tag in dataset_tags:
    # Manually apply each method
    t0 = time.time()
    neumann_datasets[tag] = pp.apply_von_neumann_extractor(datasets[tag].copy())
    t1 = time.time()
    timed_throughput[0] += throughput(t1 - t0, neumann_datasets[tag].copy())
    waste_ratios[0] += waste_ratio(datasets[tag].copy(), neumann_datasets[tag].copy())


    t0 = time.time()
    toeplitz_datasets[tag] = pp.apply_toeplitz_transformation(datasets[tag].copy())
    t1 = time.time()
    timed_throughput[1] += throughput(t1 - t0, toeplitz_datasets[tag].copy())
    waste_ratios[1] += waste_ratio(datasets[tag].copy(), toeplitz_datasets[tag].copy())

    t0 = time.time()
    FFT_datasets[tag] = pp.apply_fft_toeplitz(datasets[tag].copy())
    t1 = time.time()
    timed_throughput[2] += throughput(t1 - t0, FFT_datasets[tag].copy())
    waste_ratios[2] += waste_ratio(datasets[tag].copy(), FFT_datasets[tag].copy())

    t0 = time.time()
    parity_datasets[tag] = pp.apply_parity_extractor(datasets[tag].copy(), blocksize=parity_blocksize)
    t1 = time.time()
    timed_throughput[3] += throughput(t1 - t0, parity_datasets[tag].copy())
    waste_ratios[3] += waste_ratio(datasets[tag].copy(), parity_datasets[tag].copy())

for i in range(0,4):
    timed_throughput[i] /= len(dataset_tags)
    waste_ratios[i] /= len(dataset_tags)
    





In [6]:
# Measure entropy of each applied transform
neumann_entropy = {}
toeplitz_entropy = {}
FFT_entropy = {}
parity_entropy = {}

for tag in dataset_tags:
    neumann_entropy[tag] = pp.shannon_entropy(neumann_datasets[tag].copy())
    toeplitz_entropy[tag] = pp.shannon_entropy(toeplitz_datasets[tag].copy())
    FFT_entropy[tag] = pp.shannon_entropy(FFT_datasets[tag].copy())
    parity_entropy[tag] = pp.shannon_entropy(parity_datasets[tag].copy())

In [7]:
# Display results
def entropy_as_log(f):
    return -math.log10(1-f)

print("Method\t\tP=0.9\t\t\tP=0.5\t\t\tSimulator\t\t\tWaste Ratio\t\t\tThroughput(Mb/s)")
print("Control\t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(before_entropy[tag]):.7f}\t\t", end="")
print("\t0.0000000\t\t", end="")
print("\tN/A\t\t", end="")
# print(f"{timed_throughput[0]}:.7f\t\t")
print('\n')

print("Von Neumann\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(neumann_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[0]:.7f}\t\t", end="")
print(f"\t{timed_throughput[0]:.7f}\t\t", end="")
print('\n')

print("Toeplitz\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(toeplitz_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[1]:.7f}\t\t", end="")
print(f"\t{timed_throughput[1]:.7f}\t\t", end="")
print('\n')

print("FFT   \t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(FFT_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[2]:.7f}\t\t", end="")
print(f"\t{timed_throughput[2]:.7f}\t\t", end="")
print('\n')

print("Parity\t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(parity_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[3]:.7f}\t\t", end="")
print(f"\t{timed_throughput[3]:.7f}\t\t", end="")
print('\n')


Method		P=0.9			P=0.5			Simulator			Waste Ratio			Throughput
Control		0.2748098		7.2644544		7.0783868			0.0000000			N/A		

Von Neumann	7.2721752		7.4678074		6.7949298			0.8033378			1.0390285		

Toeplitz	6.5858425		6.8346587		7.0389555			0.0000000			2.5262835		

FFT   		6.7125255		6.6207089		7.0035760			0.0000000			2.1957290		

Parity		0.9049188		6.9154407		9.5397957			0.7500000			23.3160697		

