In [178]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob

In [179]:
# read npy file results/clarkson_woodruff_100_5_0.1_laplace_1_results.npy
# and plot the results
dist_values=["laplace", "point_mass", "cauchy", "gamma", "normal"]
delta_values=[0.01, 0.05, 0.1, 0.2]
sketch_values=["sparse_sign", "clarkson_woodruff", "uniform_sparse", "normal", "uniform_dense", "proposal1", "proposal2"]
tau=[1.35]
k_values=[20, 40, 80, 160]
rows = []
for dist in dist_values:
    for delta in delta_values:
        for sketch in sketch_values:
            for t in tau:
                for k in k_values:
                    if sketch == 'proposal1' or sketch == 'proposal2':
                        prefix = 'results/'+sketch+'_1000_'+str(k)+'_'+str(delta)+'_'+dist+'_1_'
                        files = glob.glob(f"{prefix}*results.npy")
                        for f in files:
                            res = np.load(f)
                            t = float(f.split('_')[-2])
                            for mse in res:
                                rows.append([sketch, k, delta, dist, t, mse])
                    else:
                        res = np.load('results/'+sketch+'_1000_'+str(k)+'_'+str(delta)+'_'+dist+'_1_'+str(t)+'_results.npy')
                        for mse in res:
                            rows.append([sketch, k, delta, dist, t, mse])

df = pd.DataFrame(rows, columns=['sketch', 'k', 'delta', 'dist', 't', 'mse'])
print(df)
                    

# res = np.load('results/clarkson_woodruff_1000_20_0.1_cauchy_1_1.35_results.npy')
# print(res)

            sketch    k  delta     dist     t       mse
0      sparse_sign   20   0.01  laplace  1.35  0.000510
1      sparse_sign   20   0.01  laplace  1.35  0.000246
2      sparse_sign   20   0.01  laplace  1.35  0.000244
3      sparse_sign   20   0.01  laplace  1.35  0.000356
4      sparse_sign   20   0.01  laplace  1.35  0.000294
...            ...  ...    ...      ...   ...       ...
71995    proposal2  160   0.20   normal  1.18  0.001104
71996    proposal2  160   0.20   normal  1.18  0.001463
71997    proposal2  160   0.20   normal  1.18  0.000660
71998    proposal2  160   0.20   normal  1.18  0.001133
71999    proposal2  160   0.20   normal  1.18  0.000930

[72000 rows x 6 columns]


In [180]:
# make proposoal1 with 1.35 and another value to two different sketches
# make proposal2 with 1.35 and another value to two different sketches
# change df accordingly
def get_param(row):
    if row['sketch'] == 'proposal1':
        if row['t'] == 1.35:
            return "proposal1"
        else:
            return "proposal1(adaptive)"
    elif row['sketch'] == 'proposal2':
        if row['t'] == 1.35:
            return "proposal2"
        else:
            return "proposal2(adaptive)"
    else:
        return row['sketch']
df['sketch'] = df.apply(get_param, axis=1)
df = df.drop(columns=['t'])

In [181]:
from scipy.stats import median_abs_deviation
# Compute MSE and robust error using MAD or IQR
def robust_error(arr):
    return 1.4826 * median_abs_deviation(arr) / np.sqrt(len(arr))  # Scaled for 95% CI

# table with row and column, other values are picked from params
def check_table(df, column, row='sketch', **params):
    names = ['k', 'delta', 'dist']
    for p in names:
        if p != column:
            df = df[df[p] == params[p]]

    # Aggregate MSE and error
    agg_results = df.groupby(['sketch', column]).agg(
        mse_median=('mse', 'median'),
        mse_error=('mse', robust_error)
    ).reset_index()

    # Create formatted results as MSE ± Error
    agg_results['mse_formatted'] = agg_results.apply(
        lambda row: f"{row['mse_median']:.4f} ± {row['mse_error']:.4f}", axis=1
    )

    pivot_table = agg_results.pivot_table(
        index='sketch', columns=column, values='mse_formatted', aggfunc='first'
    )
    return pivot_table

# Let's see how things turn out when sketched dimension grows

In [182]:
pivot_table = check_table(df, column='k', row='sketch', k=40, delta=0.1, dist='normal')
pivot_table

k,20,40,80,160
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,0.2306 ± 0.0297,0.0060 ± 0.0002,0.0020 ± 0.0001,0.0010 ± 0.0000
normal,0.2654 ± 0.0346,0.0055 ± 0.0002,0.0021 ± 0.0001,0.0009 ± 0.0000
proposal1,0.5420 ± 0.0765,0.0052 ± 0.0002,0.0017 ± 0.0001,0.0008 ± 0.0000
proposal1(adaptive),0.5600 ± 0.0793,0.0053 ± 0.0002,0.0016 ± 0.0001,0.0008 ± 0.0000
proposal2,9.8166 ± 1.4468,0.0052 ± 0.0002,0.0020 ± 0.0001,0.0009 ± 0.0000
proposal2(adaptive),3.0389 ± 0.4462,0.0058 ± 0.0003,0.0019 ± 0.0001,0.0009 ± 0.0000
sparse_sign,0.0004 ± 0.0000,0.0004 ± 0.0000,0.0004 ± 0.0000,0.0004 ± 0.0000
uniform_dense,0.2729 ± 0.0338,0.0049 ± 0.0002,0.0019 ± 0.0001,0.0010 ± 0.0000
uniform_sparse,0.5048 ± 0.0704,0.0061 ± 0.0003,0.0020 ± 0.0001,0.0010 ± 0.0000


In [183]:
pivot_table = check_table(df, column='k', row='sketch', k=40, delta=0.1, dist='laplace')
pivot_table

k,20,40,80,160
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,0.2038 ± 0.0250,0.0070 ± 0.0003,0.0022 ± 0.0001,0.0010 ± 0.0000
normal,0.2174 ± 0.0257,0.0059 ± 0.0003,0.0022 ± 0.0001,0.0010 ± 0.0000
proposal1,0.6493 ± 0.0920,0.0054 ± 0.0002,0.0017 ± 0.0001,0.0008 ± 0.0000
proposal1(adaptive),0.6207 ± 0.0871,0.0054 ± 0.0003,0.0017 ± 0.0001,0.0008 ± 0.0000
proposal2,16.9261 ± 2.5014,0.0053 ± 0.0002,0.0021 ± 0.0001,0.0009 ± 0.0000
proposal2(adaptive),0.4935 ± 0.0687,0.0057 ± 0.0002,0.0020 ± 0.0001,0.0009 ± 0.0000
sparse_sign,0.0005 ± 0.0000,0.0005 ± 0.0000,0.0005 ± 0.0000,0.0005 ± 0.0000
uniform_dense,0.3406 ± 0.0416,0.0062 ± 0.0003,0.0022 ± 0.0001,0.0011 ± 0.0000
uniform_sparse,0.5408 ± 0.0754,0.0066 ± 0.0003,0.0022 ± 0.0001,0.0010 ± 0.0000


In [184]:
pivot_table = check_table(df, column='k', row='sketch', k=40, delta=0.1, dist='cauchy')
pivot_table

k,20,40,80,160
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,5.8899 ± 0.8172,0.1048 ± 0.0127,0.0314 ± 0.0033,0.0143 ± 0.0016
normal,8.3542 ± 1.1629,0.1067 ± 0.0123,0.0377 ± 0.0042,0.0151 ± 0.0015
proposal1,1.0500 ± 0.1498,0.0100 ± 0.0006,0.0030 ± 0.0002,0.0015 ± 0.0001
proposal1(adaptive),0.9964 ± 0.1418,0.0105 ± 0.0007,0.0038 ± 0.0003,0.0016 ± 0.0001
proposal2,68.4596 ± 10.1427,0.0077 ± 0.0004,0.0028 ± 0.0001,0.0014 ± 0.0001
proposal2(adaptive),1.2627 ± 0.1788,0.0105 ± 0.0007,0.0035 ± 0.0002,0.0018 ± 0.0001
sparse_sign,0.0071 ± 0.0008,0.0071 ± 0.0008,0.0071 ± 0.0008,0.0071 ± 0.0008
uniform_dense,8.2911 ± 1.1523,0.0994 ± 0.0110,0.0291 ± 0.0033,0.0155 ± 0.0019
uniform_sparse,1.2056 ± 0.1701,0.0118 ± 0.0009,0.0041 ± 0.0003,0.0030 ± 0.0003


In [185]:
pivot_table = check_table(df, column='k', row='sketch', k=40, delta=0.1, dist='point_mass')
pivot_table

k,20,40,80,160
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,3087.4627 ± 395.9681,62.4082 ± 2.8958,22.9365 ± 0.9959,10.7333 ± 0.4785
normal,3647.9810 ± 470.0464,59.9009 ± 2.6236,22.8845 ± 1.0887,10.5725 ± 0.3318
proposal1,2418.2769 ± 358.4407,2.9827 ± 0.4232,0.0248 ± 0.0021,0.0078 ± 0.0004
proposal1(adaptive),2734.6393 ± 402.8865,16.5652 ± 1.8400,0.3196 ± 0.0242,0.0160 ± 0.0012
proposal2,8686.2092 ± 1242.3819,0.0144 ± 0.0012,0.0046 ± 0.0003,0.5803 ± 0.0858
proposal2(adaptive),2501.9426 ± 370.3778,30.7145 ± 2.2962,5.1501 ± 0.4921,1.9962 ± 0.1414
sparse_sign,5.1331 ± 0.2194,5.1331 ± 0.2194,5.1331 ± 0.2194,5.1331 ± 0.2194
uniform_dense,3667.4109 ± 436.7993,70.1011 ± 3.4894,23.9689 ± 0.9293,10.2201 ± 0.3713
uniform_sparse,3146.5431 ± 434.8561,58.3723 ± 4.3089,22.4685 ± 0.8791,10.8385 ± 0.3776


In [186]:
pivot_table = check_table(df, column='k', row='sketch', k=40, delta=0.2, dist='cauchy')
pivot_table

k,20,40,80,160
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,19.8683 ± 2.6832,0.2356 ± 0.0260,0.0808 ± 0.0086,0.0374 ± 0.0037
normal,20.2898 ± 2.9103,0.2628 ± 0.0300,0.0754 ± 0.0084,0.0348 ± 0.0040
proposal1,4.6310 ± 0.6745,0.0162 ± 0.0011,0.0049 ± 0.0004,0.0027 ± 0.0002
proposal1(adaptive),3.7920 ± 0.5516,0.0182 ± 0.0016,0.0061 ± 0.0004,0.0023 ± 0.0001
proposal2,115.1846 ± 17.0211,0.0093 ± 0.0005,0.0044 ± 0.0003,0.0021 ± 0.0001
proposal2(adaptive),6.4684 ± 0.9507,0.0176 ± 0.0017,0.0065 ± 0.0005,0.0026 ± 0.0002
sparse_sign,0.0197 ± 0.0022,0.0197 ± 0.0022,0.0197 ± 0.0022,0.0197 ± 0.0022
uniform_dense,17.8025 ± 2.4045,0.2512 ± 0.0293,0.0883 ± 0.0098,0.0400 ± 0.0043
uniform_sparse,2.0186 ± 0.2896,0.0192 ± 0.0017,0.0083 ± 0.0008,0.0086 ± 0.0010


In [187]:
pivot_table = check_table(df, column='k', row='sketch', k=40, delta=0.2, dist='point_mass')
pivot_table

k,20,40,80,160
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,6840.0080 ± 831.1572,131.6612 ± 5.7751,45.2981 ± 1.9036,20.0187 ± 0.7674
normal,5853.3337 ± 677.9919,124.2310 ± 6.5136,43.2440 ± 1.5610,19.6280 ± 0.5652
proposal1,5831.2679 ± 699.1851,41.6679 ± 4.4995,0.4113 ± 0.0483,0.0302 ± 0.0029
proposal1(adaptive),5658.2494 ± 691.4565,77.9503 ± 5.5722,3.8857 ± 0.2935,0.3528 ± 0.0297
proposal2,37034.1420 ± 5325.2169,72.8869 ± 6.6497,9.1048 ± 1.0412,2.2721 ± 0.2041
proposal2(adaptive),3056.7012 ± 336.7009,102.1862 ± 6.4041,23.8674 ± 1.2516,6.1221 ± 0.2966
sparse_sign,10.2283 ± 0.4236,10.2283 ± 0.4236,10.2283 ± 0.4236,10.2283 ± 0.4236
uniform_dense,6603.3868 ± 808.0948,131.9935 ± 6.0187,45.6055 ± 1.8991,20.6390 ± 0.6635
uniform_sparse,5777.5679 ± 674.7145,130.0940 ± 7.8005,45.7971 ± 1.8319,20.7960 ± 0.7903


# Let's see how things turn out when contamination grows

In [188]:
pivot_table = check_table(df, column='delta', row='sketch', k=40, delta=0.2, dist='normal')
pivot_table

delta,0.01,0.05,0.10,0.20
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,0.0060 ± 0.0002,0.0060 ± 0.0002,0.0060 ± 0.0002,0.0060 ± 0.0002
normal,0.0055 ± 0.0002,0.0055 ± 0.0002,0.0055 ± 0.0002,0.0055 ± 0.0002
proposal1,0.0052 ± 0.0002,0.0052 ± 0.0002,0.0052 ± 0.0002,0.0052 ± 0.0002
proposal1(adaptive),0.0053 ± 0.0002,0.0053 ± 0.0002,0.0053 ± 0.0002,0.0053 ± 0.0002
proposal2,0.0052 ± 0.0002,0.0052 ± 0.0002,0.0052 ± 0.0002,0.0052 ± 0.0002
proposal2(adaptive),0.0058 ± 0.0003,0.0058 ± 0.0003,0.0058 ± 0.0003,0.0058 ± 0.0003
sparse_sign,0.0004 ± 0.0000,0.0004 ± 0.0000,0.0004 ± 0.0000,0.0004 ± 0.0000
uniform_dense,0.0049 ± 0.0002,0.0049 ± 0.0002,0.0049 ± 0.0002,0.0049 ± 0.0002
uniform_sparse,0.0061 ± 0.0003,0.0061 ± 0.0003,0.0061 ± 0.0003,0.0061 ± 0.0003


In [189]:
pivot_table = check_table(df, column='delta', row='sketch', k=40, delta=0.2, dist='cauchy')
pivot_table

delta,0.01,0.05,0.10,0.20
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,0.0083 ± 0.0006,0.0353 ± 0.0034,0.1048 ± 0.0127,0.2356 ± 0.0260
normal,0.0073 ± 0.0003,0.0340 ± 0.0035,0.1067 ± 0.0123,0.2628 ± 0.0300
proposal1,0.0057 ± 0.0003,0.0075 ± 0.0005,0.0100 ± 0.0006,0.0162 ± 0.0011
proposal1(adaptive),0.0057 ± 0.0003,0.0071 ± 0.0004,0.0105 ± 0.0007,0.0182 ± 0.0016
proposal2,0.0056 ± 0.0002,0.0062 ± 0.0003,0.0077 ± 0.0004,0.0093 ± 0.0005
proposal2(adaptive),0.0059 ± 0.0002,0.0073 ± 0.0004,0.0105 ± 0.0007,0.0176 ± 0.0017
sparse_sign,0.0005 ± 0.0000,0.0023 ± 0.0003,0.0071 ± 0.0008,0.0197 ± 0.0022
uniform_dense,0.0071 ± 0.0005,0.0320 ± 0.0034,0.0994 ± 0.0110,0.2512 ± 0.0293
uniform_sparse,0.0061 ± 0.0003,0.0073 ± 0.0005,0.0118 ± 0.0009,0.0192 ± 0.0017


In [190]:
pivot_table = check_table(df, column='delta', row='sketch', k=40, delta=0.2, dist='point_mass')
pivot_table

delta,0.01,0.05,0.10,0.20
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,6.7773 ± 0.4414,34.0144 ± 1.5655,62.4082 ± 2.8958,131.6612 ± 5.7751
normal,5.1479 ± 0.3138,32.3338 ± 1.4702,59.9009 ± 2.6236,124.2310 ± 6.5136
proposal1,0.0135 ± 0.0007,0.1230 ± 0.0156,2.9827 ± 0.4232,41.6679 ± 4.4995
proposal1(adaptive),0.0082 ± 0.0005,0.9594 ± 0.1409,16.5652 ± 1.8400,77.9503 ± 5.5722
proposal2,0.0076 ± 0.0004,0.0105 ± 0.0008,0.0144 ± 0.0012,72.8869 ± 6.6497
proposal2(adaptive),0.0059 ± 0.0003,0.0094 ± 0.0008,30.7145 ± 2.2962,102.1862 ± 6.4041
sparse_sign,0.4603 ± 0.0225,2.4913 ± 0.0774,5.1331 ± 0.2194,10.2283 ± 0.4236
uniform_dense,5.9490 ± 0.3537,33.4385 ± 1.3439,70.1011 ± 3.4894,131.9935 ± 6.0187
uniform_sparse,0.0077 ± 0.0005,31.4147 ± 2.3166,58.3723 ± 4.3089,130.0940 ± 7.8005


In [191]:
pivot_table = check_table(df, column='delta', row='sketch', k=160, delta=0.2, dist='normal')
pivot_table

delta,0.01,0.05,0.10,0.20
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0010 ± 0.0000
normal,0.0009 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000
proposal1,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0008 ± 0.0000
proposal1(adaptive),0.0008 ± 0.0000,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0008 ± 0.0000
proposal2,0.0009 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000
proposal2(adaptive),0.0009 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000
sparse_sign,0.0004 ± 0.0000,0.0004 ± 0.0000,0.0004 ± 0.0000,0.0004 ± 0.0000
uniform_dense,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0010 ± 0.0000
uniform_sparse,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0010 ± 0.0000


In [192]:
pivot_table = check_table(df, column='delta', row='sketch', k=160, delta=0.2, dist='cauchy')
pivot_table

delta,0.01,0.05,0.10,0.20
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,0.0013 ± 0.0001,0.0050 ± 0.0005,0.0143 ± 0.0016,0.0374 ± 0.0037
normal,0.0012 ± 0.0001,0.0052 ± 0.0005,0.0151 ± 0.0015,0.0348 ± 0.0040
proposal1,0.0009 ± 0.0000,0.0011 ± 0.0001,0.0015 ± 0.0001,0.0027 ± 0.0002
proposal1(adaptive),0.0009 ± 0.0000,0.0012 ± 0.0000,0.0016 ± 0.0001,0.0023 ± 0.0001
proposal2,0.0010 ± 0.0000,0.0011 ± 0.0001,0.0014 ± 0.0001,0.0021 ± 0.0001
proposal2(adaptive),0.0009 ± 0.0000,0.0012 ± 0.0001,0.0018 ± 0.0001,0.0026 ± 0.0002
sparse_sign,0.0005 ± 0.0000,0.0023 ± 0.0003,0.0071 ± 0.0008,0.0197 ± 0.0022
uniform_dense,0.0013 ± 0.0001,0.0055 ± 0.0006,0.0155 ± 0.0019,0.0400 ± 0.0043
uniform_sparse,0.0011 ± 0.0000,0.0020 ± 0.0001,0.0030 ± 0.0003,0.0086 ± 0.0010


In [193]:
pivot_table = check_table(df, column='delta', row='sketch', k=160, delta=0.2, dist='point_mass')
pivot_table

delta,0.01,0.05,0.10,0.20
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,0.9828 ± 0.0380,4.8845 ± 0.2323,10.7333 ± 0.4785,20.0187 ± 0.7674
normal,0.9512 ± 0.0419,5.4136 ± 0.1868,10.5725 ± 0.3318,19.6280 ± 0.5652
proposal1,0.0055 ± 0.0002,0.0072 ± 0.0003,0.0078 ± 0.0004,0.0302 ± 0.0029
proposal1(adaptive),0.0012 ± 0.0000,0.0026 ± 0.0001,0.0160 ± 0.0012,0.3528 ± 0.0297
proposal2,0.0014 ± 0.0001,0.0021 ± 0.0001,0.5803 ± 0.0858,2.2721 ± 0.2041
proposal2(adaptive),0.0011 ± 0.0000,0.4687 ± 0.0693,1.9962 ± 0.1414,6.1221 ± 0.2966
sparse_sign,0.4603 ± 0.0225,2.4913 ± 0.0774,5.1331 ± 0.2194,10.2283 ± 0.4236
uniform_dense,0.9715 ± 0.0354,5.4505 ± 0.2278,10.2201 ± 0.3713,20.6390 ± 0.6635
uniform_sparse,0.8186 ± 0.0713,5.2103 ± 0.2363,10.8385 ± 0.3776,20.7960 ± 0.7903


# What about distributions?

In [194]:
pivot_table = check_table(df, column='dist', row='sketch', k=40, delta=0.1, dist='point_mass')
pivot_table

dist,cauchy,gamma,laplace,normal,point_mass
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
clarkson_woodruff,0.1048 ± 0.0127,0.0073 ± 0.0003,0.0070 ± 0.0003,0.0060 ± 0.0002,62.4082 ± 2.8958
normal,0.1067 ± 0.0123,0.0063 ± 0.0002,0.0059 ± 0.0003,0.0055 ± 0.0002,59.9009 ± 2.6236
proposal1,0.0100 ± 0.0006,0.0053 ± 0.0003,0.0054 ± 0.0002,0.0052 ± 0.0002,2.9827 ± 0.4232
proposal1(adaptive),0.0105 ± 0.0007,0.0054 ± 0.0003,0.0054 ± 0.0003,0.0053 ± 0.0002,16.5652 ± 1.8400
proposal2,0.0077 ± 0.0004,0.0059 ± 0.0003,0.0053 ± 0.0002,0.0052 ± 0.0002,0.0144 ± 0.0012
proposal2(adaptive),0.0105 ± 0.0007,0.0059 ± 0.0002,0.0057 ± 0.0002,0.0058 ± 0.0003,30.7145 ± 2.2962
sparse_sign,0.0071 ± 0.0008,0.0005 ± 0.0000,0.0005 ± 0.0000,0.0004 ± 0.0000,5.1331 ± 0.2194
uniform_dense,0.0994 ± 0.0110,0.0064 ± 0.0003,0.0062 ± 0.0003,0.0049 ± 0.0002,70.1011 ± 3.4894
uniform_sparse,0.0118 ± 0.0009,0.0069 ± 0.0003,0.0066 ± 0.0003,0.0061 ± 0.0003,58.3723 ± 4.3089


In [195]:
pivot_table = check_table(df, column='dist', row='sketch', k=40, delta=0.2, dist='point_mass')
pivot_table

dist,cauchy,gamma,laplace,normal,point_mass
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
clarkson_woodruff,0.2356 ± 0.0260,0.0073 ± 0.0003,0.0074 ± 0.0004,0.0060 ± 0.0002,131.6612 ± 5.7751
normal,0.2628 ± 0.0300,0.0065 ± 0.0003,0.0066 ± 0.0003,0.0055 ± 0.0002,124.2310 ± 6.5136
proposal1,0.0162 ± 0.0011,0.0054 ± 0.0002,0.0064 ± 0.0003,0.0052 ± 0.0002,41.6679 ± 4.4995
proposal1(adaptive),0.0182 ± 0.0016,0.0061 ± 0.0003,0.0067 ± 0.0003,0.0053 ± 0.0002,77.9503 ± 5.5722
proposal2,0.0093 ± 0.0005,0.0052 ± 0.0003,0.0058 ± 0.0003,0.0052 ± 0.0002,72.8869 ± 6.6497
proposal2(adaptive),0.0176 ± 0.0017,0.0062 ± 0.0003,0.0060 ± 0.0003,0.0058 ± 0.0003,102.1862 ± 6.4041
sparse_sign,0.0197 ± 0.0022,0.0005 ± 0.0000,0.0005 ± 0.0000,0.0004 ± 0.0000,10.2283 ± 0.4236
uniform_dense,0.2512 ± 0.0293,0.0071 ± 0.0003,0.0070 ± 0.0003,0.0049 ± 0.0002,131.9935 ± 6.0187
uniform_sparse,0.0192 ± 0.0017,0.0075 ± 0.0003,0.0073 ± 0.0004,0.0061 ± 0.0003,130.0940 ± 7.8005


In [196]:
pivot_table = check_table(df, column='dist', row='sketch', k=160, delta=0.1, dist='point_mass')
pivot_table

dist,cauchy,gamma,laplace,normal,point_mass
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
clarkson_woodruff,0.0143 ± 0.0016,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0010 ± 0.0000,10.7333 ± 0.4785
normal,0.0151 ± 0.0015,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0009 ± 0.0000,10.5725 ± 0.3318
proposal1,0.0015 ± 0.0001,0.0009 ± 0.0000,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0078 ± 0.0004
proposal1(adaptive),0.0016 ± 0.0001,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0160 ± 0.0012
proposal2,0.0014 ± 0.0001,0.0010 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000,0.5803 ± 0.0858
proposal2(adaptive),0.0018 ± 0.0001,0.0010 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000,1.9962 ± 0.1414
sparse_sign,0.0071 ± 0.0008,0.0005 ± 0.0000,0.0005 ± 0.0000,0.0004 ± 0.0000,5.1331 ± 0.2194
uniform_dense,0.0155 ± 0.0019,0.0011 ± 0.0000,0.0011 ± 0.0000,0.0010 ± 0.0000,10.2201 ± 0.3713
uniform_sparse,0.0030 ± 0.0003,0.0011 ± 0.0000,0.0010 ± 0.0000,0.0010 ± 0.0000,10.8385 ± 0.3776


In [197]:
pivot_table = check_table(df, column='dist', row='sketch', k=160, delta=0.2, dist='point_mass')
pivot_table

dist,cauchy,gamma,laplace,normal,point_mass
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
clarkson_woodruff,0.0374 ± 0.0037,0.0011 ± 0.0000,0.0011 ± 0.0000,0.0010 ± 0.0000,20.0187 ± 0.7674
normal,0.0348 ± 0.0040,0.0011 ± 0.0000,0.0011 ± 0.0000,0.0009 ± 0.0000,19.6280 ± 0.5652
proposal1,0.0027 ± 0.0002,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0302 ± 0.0029
proposal1(adaptive),0.0023 ± 0.0001,0.0008 ± 0.0000,0.0008 ± 0.0000,0.0008 ± 0.0000,0.3528 ± 0.0297
proposal2,0.0021 ± 0.0001,0.0010 ± 0.0000,0.0010 ± 0.0000,0.0009 ± 0.0000,2.2721 ± 0.2041
proposal2(adaptive),0.0026 ± 0.0002,0.0010 ± 0.0000,0.0009 ± 0.0000,0.0009 ± 0.0000,6.1221 ± 0.2966
sparse_sign,0.0197 ± 0.0022,0.0005 ± 0.0000,0.0005 ± 0.0000,0.0004 ± 0.0000,10.2283 ± 0.4236
uniform_dense,0.0400 ± 0.0043,0.0011 ± 0.0000,0.0011 ± 0.0000,0.0010 ± 0.0000,20.6390 ± 0.6635
uniform_sparse,0.0086 ± 0.0010,0.0012 ± 0.0000,0.0012 ± 0.0000,0.0010 ± 0.0000,20.7960 ± 0.7903


# Check Time

In [198]:
# read npy file results/clarkson_woodruff_100_5_0.1_laplace_1_results.npy
# and plot the results
dist_values=["laplace", "point_mass", "cauchy", "gamma", "normal"]
delta_values=[0.01, 0.05, 0.1, 0.2]
sketch_values=["sparse_sign", "clarkson_woodruff", "uniform_sparse", "normal", "uniform_dense", "proposal1", "proposal2"]
tau=[1.35]
k_values=[20, 40, 80, 160]
rows = []
for dist in dist_values:
    for delta in delta_values:
        for sketch in sketch_values:
            for t in tau:
                for k in k_values:
                    if sketch == 'proposal1' or sketch == 'proposal2':
                        prefix = 'results/'+sketch+'_1000_'+str(k)+'_'+str(delta)+'_'+dist+'_1_'
                        files = glob.glob(f"{prefix}*times.npy")
                        for f in files:
                            res = np.load(f)
                            t = float(f.split('_')[-2])
                            for mse in res:
                                rows.append([sketch, k, delta, dist, t, mse])
                    else:
                        res = np.load('results/'+sketch+'_1000_'+str(k)+'_'+str(delta)+'_'+dist+'_1_'+str(t)+'_times.npy')
                        for mse in res:
                            rows.append([sketch, k, delta, dist, t, mse])

df = pd.DataFrame(rows, columns=['sketch', 'k', 'delta', 'dist', 't', 'mse'])
print(df)


            sketch    k  delta     dist     t       mse
0      sparse_sign   20   0.01  laplace  1.35  0.000321
1      sparse_sign   20   0.01  laplace  1.35  0.000159
2      sparse_sign   20   0.01  laplace  1.35  0.000145
3      sparse_sign   20   0.01  laplace  1.35  0.000139
4      sparse_sign   20   0.01  laplace  1.35  0.000139
...            ...  ...    ...      ...   ...       ...
71995    proposal2  160   0.20   normal  1.35  0.000293
71996    proposal2  160   0.20   normal  1.35  0.000298
71997    proposal2  160   0.20   normal  1.35  0.000307
71998    proposal2  160   0.20   normal  1.35  0.000315
71999    proposal2  160   0.20   normal  1.35  0.000293

[72000 rows x 6 columns]


In [199]:
from scipy.stats import median_abs_deviation
# Compute MSE and robust error using MAD or IQR
def robust_error(arr):
    return 1.4826 * median_abs_deviation(arr) / np.sqrt(len(arr))  # Scaled for 95% CI

# table with row and column, other values are picked from params
def check_table(df, column, row='sketch', **params):
    names = ['k', 'delta', 'dist']
    for p in names:
        if p != column:
            df = df[df[p] == params[p]]

    # Aggregate MSE and error
    agg_results = df.groupby(['sketch', column]).agg(
        mse_median=('mse', 'median'),
        mse_error=('mse', robust_error)
    ).reset_index()

    # Create formatted results as MSE ± Error
    agg_results['mse_formatted'] = agg_results.apply(
        lambda row: f"{row['mse_median']:.6f} ± {row['mse_error']:.6f}", axis=1
    )

    pivot_table = agg_results.pivot_table(
        index='sketch', columns=column, values='mse_formatted', aggfunc='first'
    )
    return pivot_table

In [200]:
pivot_table = check_table(df, column='k', row='sketch', k=40, delta=0.2, dist='point_mass')
pivot_table

k,20,40,80,160
sketch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
clarkson_woodruff,0.000054 ± 0.000002,0.000065 ± 0.000001,0.000076 ± 0.000001,0.000185 ± 0.000003
normal,0.000124 ± 0.000005,0.000176 ± 0.000009,0.000146 ± 0.000003,0.000304 ± 0.000015
proposal1,0.000404 ± 0.000021,0.000478 ± 0.000025,0.000268 ± 0.000004,0.000590 ± 0.000033
proposal2,0.000285 ± 0.000009,0.000499 ± 0.000026,0.000547 ± 0.000030,0.000615 ± 0.000033
sparse_sign,0.000143 ± 0.000002,0.000134 ± 0.000000,0.000327 ± 0.000022,0.000255 ± 0.000017
uniform_dense,0.000128 ± 0.000006,0.000132 ± 0.000004,0.000212 ± 0.000011,0.000309 ± 0.000016
uniform_sparse,0.000044 ± 0.000001,0.000124 ± 0.000003,0.000146 ± 0.000002,0.000083 ± 0.000001
