# Importing Packages

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

# import my tANS function
from Functions import Coder, Utils

In [2]:
range_ = (0,51)

# Importing the data

In [3]:
# importing the data
data = [np.load(f"trace/mobilenet_v2/weight_{i}.npy") for i in range(range_[0],range_[1])]
#data_flat = data.flatten()
np.unique(data[0]), data[0].flatten().shape

(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
         40,  43,  44,  45,  46,  47,  48,  51,  52,  55,  56,  58,  59,
         61,  62,  64,  66,  67,  70,  82,  86,  94, 105, 112, 119, 127,
        128, 129, 153, 162, 173, 180, 181, 183, 187, 188, 189, 191, 192,
        193, 196, 197, 198, 199, 200, 204, 208, 209, 210, 211, 212, 213,
        214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 225, 226, 227,
        228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240,
        241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
        254, 255], dtype=uint8),
 (864,))

# Compressing the data


1. Convert each data point to a *(symbol, offset)* pair
2. Compress Values to value stream
3. Put offset in offset stream

### Get frequencies

In [4]:
# Calculate frequency of each uint8 value
def calculate_frequency(array):
    # Ensure the input array is of type uint8
    if array.dtype != np.uint8:
        raise ValueError("Input array must be of type uint8")
    
    # Initialize an array of zeros with a length of 256 to store frequencies
    frequency = np.zeros(256, dtype=int)
    
    # Iterate through the array and count the occurrences of each value
    for value in array:
        frequency[value] += 1
        
    return frequency

In [5]:
freqs = [calculate_frequency(d.flatten()) for d in data]

# rescale the frequencies to a power of 2
freqs = [Utils.rescale_list_to_power_of_2(freq, 2**10) for freq in freqs]
print(sum(freqs[0])), print(len(freqs[0]))

1024
256


(None, None)

In [6]:
import time
nbits = 8 # takes 4 bits to represent each symbol

all_run_times = []
all_build_times = []
all_comp_ratios = []
all_bps = []

for i in range(len(freqs)):
    print(f"Compressing Layer {i}")

    run_times = []
    build_times = []
    comp_ratios = []
    bp_sym = []
    
    time_start = time.time()
    
    c = Coder.Coder(sum(freqs[i]), [i for i in range(len(freqs[i]))], freqs[i], fast = False)
    
    time_end = time.time()
    build_time_taken = time_end - time_start

    msg = list(data[i].flatten())

    time_start = time.time()
    out, comp_bits = c.encode_decode(msg)
    time_end = time.time()
    run_time_taken = time_end - time_start

    if out != msg:
        print("Error in encoding and decoding")
        break
    
    run_times.append(run_time_taken)
    build_times.append(build_time_taken)
    comp_ratios.append(len(msg) * nbits / comp_bits)
    bp_sym.append(comp_bits / len(msg))
    
    # print average stats
    print("\tAverage run time taken: %f seconds" % np.mean(run_times))
    print("\tAverage build time taken: %f seconds" % np.mean(build_times))
    print("\tAverage compression ratio: %f" % np.mean(comp_ratios))
    print("\tAverage bits per symbol: %f" % np.mean(bp_sym))
    
    # add stats to all lists
    all_run_times.append(run_times)
    all_build_times.append(build_times)
    all_comp_ratios.append(comp_ratios)
    all_bps.append(bp_sym)

Compressing Layer 0
	Average run time taken: 0.201652 seconds
	Average build time taken: 0.380554 seconds
	Average compression ratio: 1.372790
	Average bits per symbol: 5.827546
Compressing Layer 1
	Average run time taken: 0.072772 seconds
	Average build time taken: 0.371672 seconds
	Average compression ratio: 1.475032
	Average bits per symbol: 5.423611
Compressing Layer 2
	Average run time taken: 0.127743 seconds
	Average build time taken: 0.324795 seconds
	Average compression ratio: 1.210760
	Average bits per symbol: 6.607422
Compressing Layer 3
	Average run time taken: 0.432860 seconds
	Average build time taken: 0.365896 seconds
	Average compression ratio: 1.201056
	Average bits per symbol: 6.660807
Compressing Layer 4
	Average run time taken: 0.265079 seconds
	Average build time taken: 0.375103 seconds
	Average compression ratio: 1.229674
	Average bits per symbol: 6.505787
Compressing Layer 5
	Average run time taken: 0.665390 seconds
	Average build time taken: 0.379838 seconds
	Ave

In [7]:
# display stats in a dataframe
import pandas as pd

freqs = freqs

stats_1 = pd.DataFrame({"Layer": [i for i in range(len(freqs))],
                      "Run Time": [np.mean(all_run_times[i]) for i in range(len(freqs))],
                      "Build Time": [np.mean(all_build_times[i]) for i in range(len(freqs))],
                      "Compression Ratio": [np.mean(all_comp_ratios[i]) for i in range(len(freqs))],
                      "Bits per Symbol": [np.mean(all_bps[i]) for i in range(len(freqs))]})

In [8]:
stats_1

Unnamed: 0,Layer,Run Time,Build Time,Compression Ratio,Bits per Symbol
0,0,0.201652,0.380554,1.37279,5.827546
1,1,0.072772,0.371672,1.475032,5.423611
2,2,0.127743,0.324795,1.21076,6.607422
3,3,0.43286,0.365896,1.201056,6.660807
4,4,0.265079,0.375103,1.229674,6.505787
5,5,0.66539,0.379838,1.124725,7.112847
6,6,1.021387,0.315802,1.165255,6.865451
7,7,0.389977,0.373455,1.187085,6.739198
8,8,1.038126,0.367356,1.190698,6.71875
9,9,1.049426,0.371443,1.198388,6.675637


In [9]:
# get average compressino ration
print("Average Compression Ratio:", np.mean(stats_1["Compression Ratio"]))

Average Compression Ratio: 1.2518844134416625
