### Import

In [None]:
import altair as alt

%run implementations.ipynb
%run utilities.ipynb

### Load Data

In [None]:
sizes = pd.read_csv('npmpackages/npm_no_scope_full_stats_nonzero_downloads.csv', header=None)
sizes.head()

In [None]:
sizes = pd.read_csv('unsplash/lite/unsplash_lite.csv', header=None)
sizes.head()

### Mutual Information & MaxL Analysis

In [None]:
size_list = sizes[1].tolist()

size_counts = defaultdict(int)

for size in size_list:
    size_counts[size] += 1
    
uniq_sizes = sorted(list(size_counts.keys()))

In [None]:
c_list = [1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.09, 1.1]

MI_and_MaxL = []

for c in c_list:
    # MI_per_request
    MI, MaxL, itr = BlahutArimoto(sizes, c, max_itr=10000000, eps=1e-2)
    MI_and_MaxL.append(("MI_per_request", c, MI, math.log2(MaxL)))
    print(str(c) + '\t' + "MI_per_request")
    
    # P-ALPaCA
    MI, MaxL = p_alpaca(sizes, c)
    MI_and_MaxL.append(("P-ALPaCA", c, MI, math.log2(MaxL)))
    print(str(c) + '\t' + "P-ALPaCA")
    
    # MI_per_object
    partition, dyn_solution = opt_partition_MI(sizes, c)

    size_map = {}    
    current_block = 0
    current_ceiling = partition[current_block][1]

    for size in uniq_sizes:
        if size > current_ceiling:
            current_block += 1
            current_ceiling = partition[current_block][1]
        size_map[size] = current_ceiling

    dyn_list = []

    for row in sizes.itertuples(index=False):
        dyn_size = size_map[row[1]]
        dyn_list.append((row[0], dyn_size, row[2]))
    
    dyn_sizes = pd.DataFrame(dyn_list)
    
    MI, MaxL = calc_MI_and_MaxL_fixed(dyn_sizes)
    MI_and_MaxL.append(("MI_per_object", c, MI, MaxL))
    print(str(c) + '\t' + "MI_per_object")    
    
    # Pure_MaxL
    size_map = pureMaxL(sizes, c)    

    dyn_list = []

    for row in sizes.itertuples(index=False):
        dyn_size = size_map[row[1]]
        dyn_list.append((row[0], dyn_size, row[2]))
    
    dyn_sizes = pd.DataFrame(dyn_list)
    
    MI, MaxL = calc_MI_and_MaxL_fixed(dyn_sizes)
    MI_and_MaxL.append(("Pure_MaxL", c, MI, MaxL))
    print(str(c) + '\t' + "Pure_MaxL")
    
    # D-ALPaCA
    min_size = sizes[1].min()
    bin_size = int(c*min_size) - min_size
    
    d_alpaca_list = []

    for row in sizes.itertuples(index=False):
        d_alpaca_size = getDALPaCA(row[1], bin_size)
        d_alpaca_list.append((row[0], d_alpaca_size, row[2]))
    
    d_alpaca_sizes = pd.DataFrame(d_alpaca_list)
    
    MI, MaxL = calc_MI_and_MaxL_fixed(d_alpaca_sizes)
    MI_and_MaxL.append(("D-ALPaCA", c, MI, MaxL))
    print(str(c) + '\t' + "D-ALPaCA")
    
# Padme
padme_list = []

max_overhead = 0.

for row in sizes.itertuples(index=False):
    padme_size = getPadme(row[1])
    padme_list.append((row[0], padme_size, row[2]))
    
    overhead = padme_size / row[1]
    if overhead > max_overhead:
        max_overhead = overhead
    
padme_sizes = pd.DataFrame(padme_list)

MI, MaxL = calc_MI_and_MaxL_fixed(padme_sizes)
MI_and_MaxL.append(("Padme", max_overhead, MI, MaxL))
print(str(max_overhead) + '\t' + "Padme")

In [None]:
df_MI_and_MaxL = pd.DataFrame(MI_and_MaxL, columns =['Method', 'c', 'MI', 'MaxL'])

In [None]:
alt.Chart(df_MI_and_MaxL).mark_line().encode(
    alt.X('MI', scale=alt.Scale(domain=(5.0, 13))),
    alt.Y('MaxL', scale=alt.Scale(domain=(5.5, 14))),
    color='Method',
    strokeDash='Method',
)

In [None]:
df_MI_and_MaxL_temp = df_MI_and_MaxL.loc[df_MI_and_MaxL['Method'] != 'D-ALPaCA', ['Method', 'c', 'MI', 'MaxL']]

alt.Chart(df_MI_and_MaxL_temp).mark_line().encode(
    alt.X('c', scale=alt.Scale(domain=(1.01, 1.09))),
    alt.Y('MI', scale=alt.Scale(domain=(5.0, 8.5))),
    color='Method',
    strokeDash='Method',
)

In [None]:
df_MI_and_MaxL.to_csv('evaluation/MI-and-MaxL-nodejs_1-01_to_1-10.csv')

__Print__ $\LaTeX{}$

In [None]:
df_MI_and_MaxL = pd.read_csv('evaluation/MI-and-MaxL-unsplash_1-01_to_1-10.csv')
df_MI_and_MaxL.head()

In [None]:
methods = ["D-ALPaCA", "Pure_MaxL", "P-ALPaCA", "MI_per_object", "MI_per_request"]
legend = {
          "MI_per_request": "[style={fill=white},error bars/.cd, y dir=both, y explicit]", 
          "MI_per_object": "[black!100!white,fill=black!25!white,error bars/.cd, y dir=both, y explicit]", 
          "P-ALPaCA": "[black!100!white,fill=black!50!white,error bars/.cd, y dir=both, y explicit]", 
          "Pure_MaxL": "[black!100!white,fill=black!75!white,error bars/.cd, y dir=both, y explicit]", 
          "Padme": "[style={fill=white,postaction={pattern=north east lines}},error bars/.cd, y dir=both, y explicit]", 
          "D-ALPaCA": "[black!100!white,fill=black!100!white,error bars/.cd, y dir=both, y explicit]"
         }

In [None]:
df_method_temp = df_MI_and_MaxL.loc[df_MI_and_MaxL['Method'] == "Padme", ['c', 'MI', 'MaxL']]
print("\\addplot" + legend["Padme"])
print("coordinates {%")
print("(1.01, -0.01)")
print("(1.02, -0.01)")
#print("(1.03, -0.01)")
for row in df_method_temp.itertuples():
    error = row.MaxL - row.MI
    print("(1.03, " + str(row.MI) + ") += (0," +str(error) + ")")
print("(1.04, -0.01)")
print("(1.05, -0.01)")
print("(1.06, -0.01)")
print("(1.07, -0.01)")
print("(1.08, -0.01)")
print("(1.09, -0.01)")
#for row in df_method_temp.itertuples():
#    error = row.MaxL - row.MI
#    print("(1.09, " + str(row.MI) + ") += (0," +str(error) + ")")
print("(1.1, -0.01)")
print("};")

for method in methods:
    df_method_temp = df_MI_and_MaxL.loc[df_MI_and_MaxL['Method'] == method, ['c', 'MI', 'MaxL']]
    print("\\addplot" + legend[method])
    print("coordinates {%")
    for row in df_method_temp.itertuples():
        error = row.MaxL - row.MI
        print("("+str(row.c)+", "+str(row.MI)+") += (0,"+str(error)+")")        
    print("};")