In [1]:
from IPython.display import display
import pandas as pd 
import numpy as np
import itertools
import math

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


In [2]:
df = pd.read_csv("collision_summary.csv")
# Remove useless experiments! 
df = df[df['C_size'] > df['log2(nbytes)']]

# Get all the possible values of C
C_sizes = df["C_size"].unique()
print(f"C_sizes are {C_sizes}")
print(f"Column Names are {df.columns.values.tolist()}")

C_sizes are [16 18 20 22 24 26]
Column Names are ['C_size', 'A_size', 'log2(nbytes)', 'difficulty', '#points', '#distinguished_points', 'log2(#distinguished_points)', '#collisions', 'log2(#collisions)', '#updates', 'time(sec)']


In [3]:
# Ignore experiments where we have more memory than the problem size
df = df[df['C_size'] > df['log2(nbytes)']]
# Get all C_sizes 
C_sizes = df["C_size"].unique()


In [4]:
print("avg stats for a normal run...\n"
     "scale: sqrt(n^3/w)\n"
     "----------------------\n")

for c in C_sizes:
    print(f"C_size = {c}, scale = 2^{c*3/2}/sqrt(w)")
    df_C = df[df["C_size"] == c]
    
    difficulties = df_C["difficulty"].unique()
    rams = df_C["log2(nbytes)"].unique()
    
    for d in difficulties:
        df_C_d = df_C[df_C['difficulty'] == d]

        for m in rams:
            df_C_d_m = df_C_d[df_C_d['log2(nbytes)'] == m]
            
            # Calculate the mean of the desired column (e.g., 'Column1')
            ndist_avg = df_C_d_m["log2(#distinguished_points)"].mean()
            npoints_avg = df_C_d_m["#points"].mean()
            mean_collisions = df_C_d_m["log2(#collisions)"].mean()
            
            # in the papaer they have divided by sqrt(n^3/w)
            scale_factor = (c*3 - m)/2
            
            ndist_scaled = 2**(ndist_avg - scale_factor)
            npoints =  2**(npoints_avg - scale_factor)
            ncollisions_scaled = 2**(mean_collisions - scale_factor)
            
            ndist_std_scaled = df_C_d_m["#distinguished_points"].std() / scale_factor
            npoints_std_scaled = df_C_d_m["#points"].std() / scale_factor
            ncollisions_std_scaled = df_C_d_m["#collisions"].std() / scale_factor

            # skip entries that don't have values
            if not (math.isnan(ndist_avg) or math.isnan(mean_collisions)):
                print(f"#distinguihsed_scaled = {ndist_scaled:.03f}  σ(ndist) = {ndist_std_scaled:.2f}, #collisions_scaled = {ncollisions_scaled:.03f},  σ(col) = {ncollisions_std_scaled:.2f}, diff = {d}, mem=2^{m:.03f}, nruns = {df_C_d_m.shape[0]}")
                # print(f"#collisions = 2^{(mean_collisions/scale_factor):.03f}, #distinguihsed_pts = 2^{(mean_value/scale_factor):.03f} pts, diff = {d}, mem=2^{m:.03f}, nruns = {df_C_d_m.shape[0]}")
        print("-"*40)
    print("="*40 + "\n")

avg stats for a normal run...
scale: sqrt(n^3/w)
----------------------

C_size = 16, scale = 2^24.0/sqrt(w)
#distinguihsed_scaled = 0.686  σ(ndist) = 22643.57, #collisions_scaled = 0.062,  σ(col) = 2058.62, diff = 6, mem=2^12.322, nruns = 319
----------------------------------------
#distinguihsed_scaled = 1.011  σ(ndist) = 23165.64, #collisions_scaled = 0.099,  σ(col) = 2281.69, diff = 5, mem=2^13.322, nruns = 320
#distinguihsed_scaled = 0.833  σ(ndist) = 15581.71, #collisions_scaled = 0.142,  σ(col) = 2670.15, diff = 5, mem=2^14.322, nruns = 320
----------------------------------------
#distinguihsed_scaled = 1.139  σ(ndist) = 16494.70, #collisions_scaled = 0.202,  σ(col) = 2981.33, diff = 4, mem=2^15.322, nruns = 320
----------------------------------------

C_size = 18, scale = 2^27.0/sqrt(w)
#distinguihsed_scaled = 0.688  σ(ndist) = 144775.12, #collisions_scaled = 0.036,  σ(col) = 7669.46, diff = 6, mem=2^13.322, nruns = 446
#distinguihsed_scaled = 0.700  σ(ndist) = 84633.24, #co