In [1]:
import postprocessing as pp
import numpy as np
import time
import math

In [2]:
def waste_ratio(old_data, new_data):
    return 1 - len(new_data)/len(old_data)

def throughput(time, new_data):
    return len(new_data) / time / 1e6 #Mb/s

def read_file(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        data = f.read().strip()  # Read the file and remove any surrounding whitespace

        # Convert to a NumPy array of integers
        array = np.array([int(c) for c in data], dtype=np.uint8)
        return array

In [10]:
# Get datasets
# dataset_tags stores names of dataset, combine with dictionary to minimize clutter
# Each dataset tag represent the type of data
dataset_tags = ["IBM_Brisbane", "IBM_Brisbane", "IBM_Brisbane"]
datapaths = ["ibm_brisbane_30_1024_30_2.txt", "ibm_brisbane_30_1024_30_2.txt", "ibm_brisbane_30_1024_30_2.txt"]
datasets = {}

for i, tag in enumerate(dataset_tags):
    datasets[tag] = read_file(datapaths[i])


In [11]:
# Measure before entropy
before_entropy = {}
for tag in dataset_tags:
    before_entropy[tag] = pp.shannon_entropy(datasets[tag])

In [12]:
# Apply transformations, and measure the throughput

# Get toeplitz matrix

# block_size = 
# toeplitz_size = dataset_size
# first_row = np.random.randint(0, 2, toeplitz_size)
# first_column = np.random.randint(0, 2, toeplitz_size)
# toeplitz_matrix = pp.create_toeplitz(first_row, first_column)
neumann_datasets = {}
toeplitz_datasets = {}
FFT_datasets = {}
parity_datasets = {}
# Use average across all datasets for throughput
# Throughput is current just the time it takes
timed_throughput = [0, 0, 0, 0]
waste_ratios = [0, 0, 0, 0]

for tag in dataset_tags:
    # Manually apply each method
    t0 = time.time()
    neumann_datasets[tag] = pp.apply_von_neumann_extractor(datasets[tag])
    t1 = time.time()
    timed_throughput[0] += throughput(t1 - t0, neumann_datasets[tag])
    waste_ratios[0] += waste_ratio(datasets[tag], neumann_datasets[tag])


    t0 = time.time()
    toeplitz_datasets[tag] = pp.apply_toeplitz_transformation(datasets[tag])
    t1 = time.time()
    timed_throughput[1] += throughput(t1 - t0, toeplitz_datasets[tag])
    waste_ratios[1] += waste_ratio(datasets[tag], toeplitz_datasets[tag])

    t0 = time.time()
    FFT_datasets[tag] = pp.apply_fft_toeplitz(datasets[tag])
    t1 = time.time()
    timed_throughput[2] += throughput(t1 - t0, FFT_datasets[tag])
    waste_ratios[2] += waste_ratio(datasets[tag], FFT_datasets[tag])

    t0 = time.time()
    parity_datasets[tag] = pp.apply_parity_extractor(datasets[tag])
    t1 = time.time()
    timed_throughput[3] += throughput(t1 - t0, parity_datasets[tag])
    waste_ratios[3] += waste_ratio(datasets[tag], parity_datasets[tag])

for i in range(0,4):
    timed_throughput[i] /= len(dataset_tags)
    waste_ratios[i] /= len(dataset_tags)
    





In [13]:
# Measure entropy of each applied transform
# neumann_datasets = {}
# toeplitz_datasets = {}
# FFT_datasets = {}
# parity_datasets = {}

neumann_entropy = {}
toeplitz_entropy = {}
FFT_entropy = {}
parity_entropy = {}

for tag in dataset_tags:
    neumann_entropy[tag] = pp.shannon_entropy(neumann_datasets[tag])
    toeplitz_entropy[tag] = pp.shannon_entropy(toeplitz_datasets[tag])
    FFT_entropy[tag] = pp.shannon_entropy(FFT_datasets[tag])
    parity_entropy[tag] = pp.shannon_entropy(parity_datasets[tag])

In [16]:
# Display results
def entropy_as_log(f):
    return -math.log10(1-f)
print("Method\t\t", end="")
for tag in dataset_tags:
    print(f"{tag}\t\t", end="")
print("\tWasteRatio\t\t\tThroughput")
print("Control\t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(before_entropy[tag]):.7f}\t\t", end="")
print("\t0.0000000\t\t", end="")
print("\tN/A\t\t", end="")
# print(f"{timed_throughput[0]}:.7f\t\t")
print('\n')

print("Von Neumann\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(neumann_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[0]:.7f}\t\t", end="")
print(f"\t{timed_throughput[0]:.7f}\t\t", end="")
print('\n')

print("Toeplitz\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(toeplitz_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[1]:.7f}\t\t", end="")
print(f"\t{timed_throughput[1]:.7f}\t\t", end="")
print('\n')

print("FFT   \t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(FFT_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[2]:.7f}\t\t", end="")
print(f"\t{timed_throughput[2]:.7f}\t\t", end="")
print('\n')

print("Parity\t\t", end="")
for tag in dataset_tags:
    print(f"{entropy_as_log(parity_entropy[tag]):.7f}\t\t", end="")
print(f"\t{waste_ratios[3]:.7f}\t\t", end="")
print(f"\t{timed_throughput[3]:.7f}\t\t", end="")
print('\n')


Method		IBM_Brisbane		IBM_Brisbane		IBM_Brisbane			WasteRatio			Throughput
Control		7.5603954		7.5603954		7.5603954			0.0000000			N/A		

Von Neumann	7.9158515		7.9158515		7.9158515			0.7490560			1.3583762		

Toeplitz	4.3002129		4.3002129		4.3002129			0.0000000			1.8301500		

FFT   		4.4558668		4.4558668		4.4558668			0.0000000			1.7742240		

Parity		5.9125778		5.9125778		5.9125778			0.7500000			24.4624244		

