# Importing Packages

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

# import my tANS function
from Functions import Coder, Utils, CompTensor

In [2]:
range_ = (1,7)

# Importing The Symbol Table

In [4]:
s_tabs = [ pd.read_csv(f"trace/mobilenet_v2/input_{i}_flat.apack", sep = " ", header = None) for i in range(range_[0],range_[1])]

for s_tab in s_tabs:            
    s_tab.columns = ["vmin","OL","abits","obits","vcnt"]

# s_tabs[0].vmin, bisect.bisect_right(s_tabs[0].vmin,47) - 1
s_tabs[0]

Unnamed: 0,vmin,OL,abits,obits,vcnt
0,0,0,136178,0,23358
1,1,5,273098,305040,61008
2,32,4,273917,246472,61618
3,48,4,350503,356392,89098
4,64,4,415726,471844,117961
5,80,4,461519,566536,141634
6,96,4,489597,630684,157671
7,112,4,492689,639620,159905
8,128,4,472310,591748,147937
9,144,4,433024,504964,126241


# Importing the data

In [4]:
# importing the data
data = [np.load(f"trace/mobilenet_v2/input_{i}.npy") for i in range(range_[0],range_[1])]
#data_flat = data.flatten()
np.unique(data[0]), data[0].shape

(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
        130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
        143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
        156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
        169, 170, 171, 172, 173, 174, 175, 176, 177

# Compressing the data


1. Convert each data point to a *(symbol, offset)* pair
2. Compress Values to value stream
3. Put offset in offset stream

### *(Symbol, Offset)* Pair

In [6]:
# converting each data point to a symbol, offset pair

comp_tensors = []
for i, dat in enumerate(data):
    print(f"Processing data {i}")
    comp_tensors.append([CompTensor.CompTensor(d,s_tabs[i]) for d in dat])

# Access points of the first CompTensor of the first sublist
print(comp_tensors[0][0].points[0])

Processing data 0
Processing data 1
Processing data 2
Processing data 3
Processing data 4
Processing data 5
Point: 128, Symbol: 8, Offset: 0


### Prep Symbols for Compression

In [7]:
# Getting freqs, must be a power of 2
freqs = []

for s_tab in s_tabs:
    # Get frequencies from the symbol table
    freq = list(s_tab.vcnt)

    # rescale so the sum of freq is 2**10, this ensures the Coder works effieciently
    # before I was just rescaling to the most accurate power of 2, but the coder would time out
    # building the object
    # Note: the rescale_list_to_power_of_2 function ensures that the sum of the list is a power of 2
    #       and also that no element is zero (bumps up the smallest elements to 1)
    freq = Utils.rescale_list_to_power_of_2(freq, 2**10)
    
    # append to freqs
    freqs.append(freq)

    print("Sum:", sum(freq))
    
print(freqs[0])

Sum: 1024
Sum: 1024
Sum: 1024
Sum: 1024
Sum: 1024
Sum: 1024
[17, 46, 46, 67, 89, 108, 120, 120, 112, 95, 74, 53, 35, 21, 11, 10]


### OFfsets

In [8]:
def int_to_binary_list(value, nbits):
    if value >= 2**nbits or value < 0:
        raise ValueError(f"Value {value} cannot be represented in {nbits} bits.")
    
    binary_list = [int(bit) for bit in bin(value)[2:].zfill(nbits)]
    return binary_list

In [9]:
# make offset bitstream for one tensor

offset_stream = []

for i in range(len(comp_tensors)):
    offset_stream.append([])
    for j in range(len(comp_tensors[i])):
        offset_stream[i].append([])
        for k in range(len(comp_tensors[i][j].points)):
            offset_stream[i][j].extend(int_to_binary_list(comp_tensors[i][j].points[k].off, comp_tensors[i][j].points[k].OL))

In [10]:
# get shape of the offset stream
print(len(offset_stream), len(offset_stream[0]), len(offset_stream[0][0]), len(offset_stream[0][0])/8)

6 9 601242 75155.25


In [11]:
import time
nbits = 8 # takes 4 bits to represent each symbol

all_run_times = []
all_build_times = []
all_comp_ratios = []
all_bps = []

for i in range(len(freqs)):
    print(f"Compressing Layer {i}")

    run_times = []
    build_times = []
    comp_ratios = []
    bp_sym = []

    for j in range(len(comp_tensors[i])):
        print(f"\tCompressing tensor {j}")
        
        # compressing the symbols
        time_start = time.time()
        
        c = Coder.Coder(sum(freqs[i]), [i for i in range(len(freqs[i]))], freqs[i], fast = False)
        
        time_end = time.time()
        build_time_taken = time_end - time_start

        msg = [p.symbol for p in comp_tensors[i][j].points]

        time_start = time.time()
        out, comp_bits = c.encode_decode(msg)
        time_end = time.time()
        run_time_taken = time_end - time_start
        
        # factoring in the offset bits  
        total_comp_bits = comp_bits + len(offset_stream[i][j])

        if out != msg:
            print("Error in encoding and decoding")
            break
        
        run_times.append(run_time_taken)
        build_times.append(build_time_taken)
        comp_ratios.append(len(msg) * nbits / total_comp_bits)
        bp_sym.append(total_comp_bits / len(msg))
        
    # print average stats
    print("\tAverage run time taken: %f seconds" % np.mean(run_times))
    print("\tAverage build time taken: %f seconds" % np.mean(build_times))
    print("\tAverage compression ratio: %f" % np.mean(comp_ratios))
    print("\tAverage bits per symbol: %f" % np.mean(bp_sym))
    
    # add stats to all lists
    all_run_times.append(run_times)
    all_build_times.append(build_times)
    all_comp_ratios.append(comp_ratios)
    all_bps.append(bp_sym)

Compressing Layer 0
	Compressing tensor 0
	Compressing tensor 1
	Compressing tensor 2
	Compressing tensor 3
	Compressing tensor 4
	Compressing tensor 5
	Compressing tensor 6
	Compressing tensor 7
	Compressing tensor 8
	Average run time taken: 118.523167 seconds
	Average build time taken: 0.074647 seconds
	Average compression ratio: 1.038416
	Average bits per symbol: 7.704040
Compressing Layer 1
	Compressing tensor 0
	Compressing tensor 1
	Compressing tensor 2
	Compressing tensor 3
	Compressing tensor 4
	Compressing tensor 5
	Compressing tensor 6
	Compressing tensor 7
	Compressing tensor 8
	Average run time taken: 101.235105 seconds
	Average build time taken: 0.014001 seconds
	Average compression ratio: 1.038416
	Average bits per symbol: 7.704040
Compressing Layer 2
	Compressing tensor 0
	Compressing tensor 1
	Compressing tensor 2
	Compressing tensor 3
	Compressing tensor 4
	Compressing tensor 5
	Compressing tensor 6
	Compressing tensor 7
	Compressing tensor 8
	Average run time taken: 2

In [12]:
# display stats in a dataframe
import pandas as pd

freqs = freqs

stats_1 = pd.DataFrame({"Layer": [i for i in range(len(freqs))],
                      "Run Time": [np.mean(all_run_times[i]) for i in range(len(freqs))],
                      "Build Time": [np.mean(all_build_times[i]) for i in range(len(freqs))],
                      "Compression Ratio": [np.mean(all_comp_ratios[i]) for i in range(len(freqs))],
                      "Bits per Symbol": [np.mean(all_bps[i]) for i in range(len(freqs))]})

In [13]:
stats_1

Unnamed: 0,Layer,Run Time,Build Time,Compression Ratio,Bits per Symbol
0,0,118.523167,0.074647,1.038416,7.70404
1,1,101.235105,0.014001,1.038416,7.70404
2,2,2.039025,0.013399,2.772173,2.886605
3,3,0.00542,0.012967,2.071002,3.86441
4,4,0.005318,0.011892,1.366112,5.856667
5,5,0.005362,0.012687,1.366112,5.856667


In [14]:
# get average compressino ration
print("Average Compression Ratio:", np.mean(stats_1["Compression Ratio"]))

Average Compression Ratio: 1.6087051983992662
