### Import

In [None]:
import altair as alt

%run implementations.ipynb
%run utilities.ipynb

### Load Data

In [None]:
sizes = pd.read_csv('npmpackages/npm_no_scope_full_stats_nonzero_downloads.csv', header=None)
total_downloads = sizes[2].sum()

In [None]:
sizes = pd.read_csv('unsplash/lite/unsplash_lite.csv', header=None)
total_downloads = sizes[2].sum()

### Average Overhead

In [None]:
size_list = sizes[1].tolist()

size_counts = defaultdict(int)

for size in size_list:
    size_counts[size] += 1
    
uniq_sizes = sorted(list(size_counts.keys()))

In [None]:
c_list = [1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.09, 1.1]

In [None]:
avg_overhead_list = []

PRP

In [None]:
for c in c_list:
    i_max, cndl_idxs, p_ji, p_i, s, orig_p_j_dict, size_to_i, this_MI, max_L = BlahutArimoto_init(sizes, c, max_itr=10000000, eps=5e-3)

    max_overhead = 0.
    avg_overhead = 0.
    
    for i in range(len(uniq_sizes)):
        i_num_cndls = i_max[i] - i + 1
        i_idx = cndl_idxs[i]
        
        for offset in range(i_num_cndls):
            j = i + offset
            idx = i_idx + offset
            prob = p_ji[idx]
            
            overhead = s[j] / s[i]
            if overhead > max_overhead and prob > 0.:
                max_overhead = overhead
        
            avg_overhead += overhead * prob * p_i[i]
            
    avg_overhead_list.append(("PRP", c, avg_overhead))
    print(avg_overhead)

P-ALPaCA

In [None]:
for c in c_list:
    i_max, cndl_idxs, p_ji, p_i, s, orig_p_j_dict, size_to_i, this_MI, max_L = p_alpaca_return_all(sizes, c)

    max_overhead = 0.
    avg_overhead = 0.
    
    for i in range(len(uniq_sizes)):
        i_num_cndls = i_max[i] - i + 1
        i_idx = cndl_idxs[i]
        
        for offset in range(i_num_cndls):
            j = i + offset
            idx = i_idx + offset
            prob = p_ji[idx]
            
            overhead = s[j] / s[i]
            if overhead > max_overhead and prob > 0.:
                max_overhead = overhead
        
            avg_overhead += overhead * prob * p_i[i]
            
    avg_overhead_list.append(("P-ALPaCA", c, avg_overhead))
    print(avg_overhead)

POP

In [None]:
for c in c_list:
    partition, dyn_solution = opt_partition_MI(sizes, c)

    size_map = {}    
    current_block = 0
    current_ceiling = partition[current_block][1]

    for size in uniq_sizes:
        if size > current_ceiling:
            current_block += 1
            current_ceiling = partition[current_block][1]
        size_map[size] = current_ceiling

    max_overhead = 0.
    avg_overhead = 0.

    for row in sizes.itertuples(index=False):
        dyn_size = size_map[row[1]]
    
        overhead = dyn_size / row[1]
        if overhead > max_overhead:
            max_overhead = overhead
        
        avg_overhead += overhead * (row[2] / total_downloads)
        
    avg_overhead_list.append(("POP", c, avg_overhead))
    print(avg_overhead)

PwoD

In [None]:
for c in c_list:
    size_map = pureMaxL(sizes, c)

    max_overhead = 0.
    avg_overhead = 0.

    for row in sizes.itertuples(index=False):
        dyn_size = size_map[row[1]]
    
        overhead = dyn_size / row[1]
        if overhead > max_overhead:
            max_overhead = overhead
        
        avg_overhead += overhead * (row[2] / total_downloads)
        
    avg_overhead_list.append(("PwoD", c, avg_overhead))
    print(avg_overhead)

D-ALPaCA

In [None]:
for c in c_list:
    min_size = sizes[1].min()
    bin_size = int(c*min_size) - min_size
    
    d_alpaca_list = []

    max_overhead = 0.
    avg_overhead = 0.

    for row in sizes.itertuples(index=False):
        d_alpaca_size = getDALPaCA(row[1], bin_size)
    
        overhead = d_alpaca_size / row[1]
        if overhead > max_overhead:
            max_overhead = overhead
        
        avg_overhead += overhead * (row[2] / total_downloads)
        
    avg_overhead_list.append(("D-ALPaCA", c, avg_overhead))
    print(avg_overhead)

Padme

In [None]:
max_overhead = 0.
avg_overhead = 0.

for row in sizes.itertuples(index=False):
    padme_size = getPadme(row[1])
    
    overhead = padme_size / row[1]
    if overhead > max_overhead:
        max_overhead = overhead
        
    avg_overhead += overhead * (row[2] / total_downloads)
    
avg_overhead_list.append(("Padme", max_overhead, avg_overhead))
print(avg_overhead)

In [None]:
df_avg_overhead = pd.DataFrame(avg_overhead_list, columns =['Method', 'c', 'Avg Overhead'])

In [None]:
alt.Chart(df_avg_overhead).mark_line().encode(
    alt.X('c', scale=alt.Scale(domain=(1.01, 1.09))),
    alt.Y('Avg Overhead', scale=alt.Scale(domain=(1.00, 1.05))),
    color='Method',
    strokeDash='Method',
)

In [None]:
df_avg_overhead.to_csv('evaluation/avg_overhead-unsplash_1-01_to_1-10.csv')

__Print__ $\LaTeX{}$

In [None]:
df_avg_overhead = pd.read_csv('evaluation/avg_overhead-nodejs_1-01_to_1-10.csv')
df_runtimes.head()

In [None]:
methods = ["PRP", "PwoD", "POP", "P-ALPaCA", "Padme", "D-ALPaCA"]
legend = {
          "PRP": "[line width=1pt, densely dotted, curvecolor]", 
          "PwoD": "[line width=1pt, solid, curvecolor]", 
          "POP": "[line width=1pt, dash pattern=on 1pt off 3pt on 3pt off 3pt, curvecolor]", 
          "P-ALPaCA": "[line width=1pt, densely dotted, black]",
          #"Padme": "[line width=1pt, solid, black, mark=*]",
          "Padme": "[scatter, only marks, black, mark=*]",
          "D-ALPaCA": "[line width=1pt, dash pattern=on 1pt off 3pt on 3pt off 3pt, black]"
         }

In [None]:
for method in methods:
    df_method_temp = df_avg_overhead.loc[df_avg_overhead['Method'] == method, ['c', 'Avg Overhead']]
    print("\\addplot " + legend[method])
    print("table {%")
    for row in df_method_temp.itertuples():
        print(str(row.c) + '\t' + str(row[2]))        
    print("};")