In [2]:
import sys
import os
import glob

# Add the parent directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from ml_attack import LWEDataset
from ml_attack.utils import get_percentage_true_b, get_train_default_params

import numpy as np
from collections import defaultdict
from tqdm import tqdm

from plotly import graph_objects as go


# Multiple attack

Attacking multiple hamming weights over multiple checkpoints

In [13]:
filenames = "./../data/dual/data_n_128_k_1_s_binary_2c7a7_*.pkl"

def extract_filenumber(f):
    return float(os.path.splitext(os.path.basename(f))[0].split('_')[-1])

if isinstance(filenames, str):
    filenames = sorted(glob.glob(filenames))
    if not filenames:
        print("No output files found.")
        exit(1)
  
numbers = range(1, 48, 4)
filenames = sorted(filenames, key=extract_filenumber)
filenames = set([f for f in filenames if extract_filenumber(f) in numbers] + [filenames[-1]])
print(f"Found {len(filenames)} output files.")

Found 7 output files.


In [3]:
percentages_b = defaultdict()
num_rep = 4 # Number of repetitions for each hamming weight

for filename in filenames:
    dataset = LWEDataset.load_reduced(filename)
    dataset.params['verbose'] = False
    dataset.params['seed'] = None
    dataset.params['secret_type'] = 'binary'
    
    hw_results = {}
    choosen_hw = range(1, dataset.params['n'] // 2, 8)

    for hw in tqdm(choosen_hw, desc=f"Processing {filename}"):
        all_perc_b = []
        dataset.params['hw'] = hw
        for _ in range(num_rep):
            dataset.initialize_secret()
            dataset.approximate_b()
            percentage_b = get_percentage_true_b(dataset, verbose=False)
            all_perc_b.append(percentage_b)
        hw_results[hw] = np.mean(all_perc_b)

    percentages_b[filename] = hw_results

Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_cdec2_1.pkl: 100%|██████████| 8/8 [00:25<00:00,  3.23s/it]
Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_cdec2_5.pkl: 100%|██████████| 8/8 [00:26<00:00,  3.26s/it]
Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_cdec2_9.pkl: 100%|██████████| 8/8 [00:26<00:00,  3.27s/it]
Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_cdec2_13.pkl: 100%|██████████| 8/8 [00:26<00:00,  3.31s/it]
Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_cdec2_17.pkl: 100%|██████████| 8/8 [00:27<00:00,  3.40s/it]
Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_cdec2_21.pkl: 100%|██████████| 8/8 [00:25<00:00,  3.25s/it]
Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_cdec2_25.pkl: 100%|██████████| 8/8 [00:27<00:00,  3.39s/it]
Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_cdec2_29.pkl: 100%|██████████| 8/8 [00:27<00:00,  3.49s/it]
Processing ./../reduced_data/dual/data_n_128_k_1_s_binary_c

In [4]:
fig = go.Figure()

for filename in filenames:
  hw = list(percentages_b[filename].keys())
  values = list(percentages_b[filename].values())
  fig.add_trace(go.Scatter(x=hw, y=values, mode='lines+markers', name=os.path.basename(filename)))

fig.update_layout(
  xaxis_title='Hamming Weight',
  yaxis_title='Percentage of True b',
  template='plotly_white',
  bargap=0.2,
  margin=dict(l=20, r=20, t=10, b=20),
  width=550,
  height=400,
  showlegend=False
)

fig.show()

In [None]:
fig = go.Figure()

hw_list = list(choosen_hw)
file_numbers = [int(os.path.splitext(os.path.basename(f))[0].split('_')[-1]) for f in filenames]

for hw in hw_list:
  percentages = [percentages_b[filename][hw] for filename in filenames]
  fig.add_trace(go.Scatter(
    x=file_numbers,
    y=percentages,
    mode='markers+lines',
    name=f'HW {hw}'
  ))

fig.update_layout(
  xaxis_title='File Number',
  yaxis_title='Percentage of True b',
  title='Percentage of True b per File for Each Hamming Weight',
  legend_title='Hamming Weight',
  template='plotly_white',
  bargap=0.2,
  margin=dict(l=20, r=20, t=10, b=20),
  width=550,
  height=400,
  showlegend=False
)

fig.show()


## Multiple training

In [None]:
filenames = "./../data/salsa/data_n_128_k_1_s_binary_2c7a7_*.pkl"

def extract_filenumber(f):
    return int(os.path.splitext(os.path.basename(f))[0].split('_')[-1])


if isinstance(filenames, str):
    filenames = sorted(glob.glob(filenames))
    if not filenames:
        print("No output files found.")
        exit(1)
  
filenames = sorted(filenames, key=extract_filenumber)
print(f"Found {len(filenames)} output files.")

Found 35 output files.


In [4]:
secret_type = 'binary'
n = 128
choosen_hw = range(1, n // 2)

train_args= get_train_default_params()
train_args.update({
    'epsilon': 1.1,
    'use_ransac': True,
    'train_percentages': [0.1]
})

success_dict = {hw: -1 for hw in choosen_hw}

for filename in filenames:
    # Load the dataset
    dataset = LWEDataset.load_reduced(filename)
    dataset.params['secret_type'] = secret_type
    dataset.params['verbose'] = False
    file_number = int(os.path.splitext(os.path.basename(filename))[0].split('_')[-1])

    success_hw = []
    
    for hw in tqdm(choosen_hw, desc=f"Processing {filename}"):
        dataset.params['hw'] = hw
        dataset.initialize_secret()
        dataset.params.update(train_args)
        found, _ = dataset.train()
        if found:
            success_dict[hw] = file_number
            success_hw.append(hw)
        else:
            break
    
    choosen_hw = [h for h in choosen_hw if h not in success_hw]

success_dict

Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_1.pkl:   0%|          | 0/63 [00:00<?, ?it/s]Extension for Scikit-learn* enabled (https://github.com/uxlfoundation/scikit-learn-intelex)


✔️ Patched scikit-learn (once).
Using RANSAC with residual threshold: 1859.089494887215
Using RANSAC with min_samples: 135
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_1.pkl:   2%|▏         | 1/63 [02:56<3:02:03, 176.18s/it]

RANSAC inliers: 1252 / 1348
Using RANSAC with residual threshold: 1859.089494887215
Using RANSAC with min_samples: 140
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_1.pkl:   3%|▎         | 2/63 [05:39<2:51:31, 168.71s/it]

RANSAC inliers: 1282 / 1348
Using RANSAC with residual threshold: 1859.089494887215
Using RANSAC with min_samples: 146
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_1.pkl:   5%|▍         | 3/63 [08:16<2:43:25, 163.42s/it]

RANSAC inliers: 1139 / 1348
Using RANSAC with residual threshold: 1859.089494887215
Using RANSAC with min_samples: 152
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_1.pkl:   6%|▋         | 4/63 [10:40<2:32:59, 155.58s/it]

RANSAC inliers: 1151 / 1348
Using RANSAC with residual threshold: 1859.089494887215
Using RANSAC with min_samples: 158
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_1.pkl:   6%|▋         | 4/63 [12:56<3:10:52, 194.11s/it]


RANSAC inliers: 1093 / 1348


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_2.pkl:   0%|          | 0/59 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1654.1667842149413
Using RANSAC with min_samples: 154
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_2.pkl:   0%|          | 0/59 [02:21<?, ?it/s]


RANSAC inliers: 1056 / 1388


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_3.pkl:   0%|          | 0/59 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1760.881810343897
Using RANSAC with min_samples: 152
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_3.pkl:   0%|          | 0/59 [02:23<?, ?it/s]


RANSAC inliers: 1108 / 1416


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_4.pkl:   0%|          | 0/59 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1805.7290079078864
Using RANSAC with min_samples: 151
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_4.pkl:   0%|          | 0/59 [02:15<?, ?it/s]


RANSAC inliers: 1201 / 1433


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_5.pkl:   0%|          | 0/59 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1790.516894642438
Using RANSAC with min_samples: 150
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_5.pkl:   0%|          | 0/59 [02:12<?, ?it/s]


RANSAC inliers: 1140 / 1449


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_6.pkl:   0%|          | 0/59 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1787.779139043747
Using RANSAC with min_samples: 149
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_6.pkl:   0%|          | 0/59 [02:08<?, ?it/s]


RANSAC inliers: 1256 / 1464


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_7.pkl:   0%|          | 0/59 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1765.7357531635362
Using RANSAC with min_samples: 148
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_7.pkl:   0%|          | 0/59 [02:23<?, ?it/s]


RANSAC inliers: 1213 / 1476


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_8.pkl:   0%|          | 0/59 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1722.2926580578574
Using RANSAC with min_samples: 147
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_8.pkl:   0%|          | 0/59 [02:20<?, ?it/s]


RANSAC inliers: 1207 / 1487


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_9.pkl:   0%|          | 0/59 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1727.9661455016992
Using RANSAC with min_samples: 146
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_9.pkl:   2%|▏         | 1/59 [02:08<2:04:40, 128.98s/it]

RANSAC inliers: 1203 / 1496
Using RANSAC with residual threshold: 1727.9661455016992
Using RANSAC with min_samples: 148
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_9.pkl:   2%|▏         | 1/59 [04:17<4:08:34, 257.14s/it]


RANSAC inliers: 1221 / 1496


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_10.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1761.6853436411393
Using RANSAC with min_samples: 148
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_10.pkl:   0%|          | 0/58 [02:10<?, ?it/s]


RANSAC inliers: 1243 / 1505


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_11.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1667.9797660643248
Using RANSAC with min_samples: 147
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_11.pkl:   0%|          | 0/58 [02:07<?, ?it/s]

RANSAC inliers: 1299 / 1514



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_12.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1694.5253760271635
Using RANSAC with min_samples: 146
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_12.pkl:   0%|          | 0/58 [02:14<?, ?it/s]


RANSAC inliers: 1233 / 1520


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_13.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1699.1643534396549
Using RANSAC with min_samples: 145
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_13.pkl:   0%|          | 0/58 [02:18<?, ?it/s]

RANSAC inliers: 1291 / 1526



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_14.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1676.6920558051202
Using RANSAC with min_samples: 145
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_14.pkl:   0%|          | 0/58 [02:22<?, ?it/s]


RANSAC inliers: 1246 / 1530


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_15.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1636.4360665788322
Using RANSAC with min_samples: 144
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_15.pkl:   0%|          | 0/58 [02:22<?, ?it/s]


RANSAC inliers: 1241 / 1533


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_16.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1636.9990073301817
Using RANSAC with min_samples: 143
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_16.pkl:   0%|          | 0/58 [02:32<?, ?it/s]


RANSAC inliers: 1215 / 1534


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_17.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1590.2377180786525
Using RANSAC with min_samples: 143
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_17.pkl:   0%|          | 0/58 [02:23<?, ?it/s]


RANSAC inliers: 1203 / 1535


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_18.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1584.5332909093452
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_18.pkl:   0%|          | 0/58 [02:20<?, ?it/s]

RANSAC inliers: 1190 / 1535



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_19.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1588.367558218185
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_19.pkl:   0%|          | 0/58 [02:26<?, ?it/s]


RANSAC inliers: 1256 / 1535


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_20.pkl:   0%|          | 0/58 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1584.5332909093452
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_20.pkl:   2%|▏         | 1/58 [02:16<2:09:48, 136.63s/it]

RANSAC inliers: 1245 / 1536
Using RANSAC with residual threshold: 1584.5332909093452
Using RANSAC with min_samples: 143
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_20.pkl:   3%|▎         | 2/58 [04:43<2:13:07, 142.63s/it]

RANSAC inliers: 1247 / 1536
Using RANSAC with residual threshold: 1584.5332909093452
Using RANSAC with min_samples: 144
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_20.pkl:   3%|▎         | 2/58 [07:04<3:17:52, 212.01s/it]

RANSAC inliers: 1160 / 1536



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_21.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1563.4339608694702
Using RANSAC with min_samples: 143
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_21.pkl:   0%|          | 0/56 [02:31<?, ?it/s]


RANSAC inliers: 1260 / 1536


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_22.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1563.4339608694702
Using RANSAC with min_samples: 143
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_22.pkl:   0%|          | 0/56 [03:02<?, ?it/s]

RANSAC inliers: 1253 / 1536



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_23.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1578.7206054270653
Using RANSAC with min_samples: 143
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_23.pkl:   0%|          | 0/56 [02:22<?, ?it/s]


RANSAC inliers: 1197 / 1536


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_24.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1551.244419812687
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_24.pkl:   0%|          | 0/56 [02:41<?, ?it/s]

RANSAC inliers: 1173 / 1536



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_25.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1551.244419812687
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_25.pkl:   0%|          | 0/56 [02:34<?, ?it/s]

RANSAC inliers: 1217 / 1536



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_26.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_26.pkl:   0%|          | 0/56 [02:30<?, ?it/s]

RANSAC inliers: 1195 / 1536



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_27.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_27.pkl:   0%|          | 0/56 [02:28<?, ?it/s]


RANSAC inliers: 1215 / 1536


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_28.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_28.pkl:   0%|          | 0/56 [03:03<?, ?it/s]

RANSAC inliers: 1212 / 1536



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_29.pkl:   0%|          | 0/56 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 141
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_29.pkl:   2%|▏         | 1/56 [03:11<2:55:06, 191.02s/it]

RANSAC inliers: 1256 / 1536
Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_29.pkl:   2%|▏         | 1/56 [05:39<5:10:57, 339.23s/it]


RANSAC inliers: 1222 / 1536


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_30.pkl:   0%|          | 0/55 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_30.pkl:   0%|          | 0/55 [02:39<?, ?it/s]

RANSAC inliers: 1183 / 1536



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_31.pkl:   0%|          | 0/55 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_31.pkl:   0%|          | 0/55 [02:35<?, ?it/s]

RANSAC inliers: 1165 / 1536



Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_32.pkl:   0%|          | 0/55 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_32.pkl:   0%|          | 0/55 [01:58<?, ?it/s]


RANSAC inliers: 1209 / 1536


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_33.pkl:   0%|          | 0/55 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_33.pkl:   0%|          | 0/55 [01:44<?, ?it/s]


RANSAC inliers: 1241 / 1536


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_34.pkl:   0%|          | 0/55 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 141
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_34.pkl:   0%|          | 0/55 [02:33<?, ?it/s]


RANSAC inliers: 1215 / 1536


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_35.pkl:   0%|          | 0/55 [00:00<?, ?it/s]

Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 141
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_35.pkl:   2%|▏         | 1/55 [02:50<2:33:13, 170.24s/it]

RANSAC inliers: 1217 / 1536
Using RANSAC with residual threshold: 1537.8782461560475
Using RANSAC with min_samples: 142
Using RANSAC with optimal max_trials: 100


Processing ./../reduced_data/salsa/data_n_128_k_1_s_binary_cdec2_35.pkl:   2%|▏         | 1/55 [05:13<4:42:08, 313.49s/it]

RANSAC inliers: 1198 / 1536





{1: 1,
 2: 1,
 3: 1,
 4: 1,
 5: 9,
 6: 20,
 7: 20,
 8: 29,
 9: 35,
 10: -1,
 11: -1,
 12: -1,
 13: -1,
 14: -1,
 15: -1,
 16: -1,
 17: -1,
 18: -1,
 19: -1,
 20: -1,
 21: -1,
 22: -1,
 23: -1,
 24: -1,
 25: -1,
 26: -1,
 27: -1,
 28: -1,
 29: -1,
 30: -1,
 31: -1,
 32: -1,
 33: -1,
 34: -1,
 35: -1,
 36: -1,
 37: -1,
 38: -1,
 39: -1,
 40: -1,
 41: -1,
 42: -1,
 43: -1,
 44: -1,
 45: -1,
 46: -1,
 47: -1,
 48: -1,
 49: -1,
 50: -1,
 51: -1,
 52: -1,
 53: -1,
 54: -1,
 55: -1,
 56: -1,
 57: -1,
 58: -1,
 59: -1,
 60: -1,
 61: -1,
 62: -1,
 63: -1}

In [3]:
from itertools import product

# Define the grid of hyperparameters to search
param_grid = get_train_default_params()
param_grid.update({
  'epsilon': [1.1, 1.0, 1.05, 1.15, 1.2, 1.3, 1.35, 1.4, 2],
  'use_ransac': False,
})

# Ensure all values in param_grid are lists for product()
for k, v in param_grid.items():
  if not isinstance(v, list):
    param_grid[k] = [v]

# Generate all combinations of hyperparameters
param_names = list(param_grid.keys())
param_combinations = list(product(*param_grid.values()))

results = []
secret_type = 'binary'
n = 128
choosen_hw = list(range(1, n // 2))

for params in param_combinations:
  grid_train_args = dict(zip(param_names, params))
  grid_success_dict = {hw: -1 for hw in choosen_hw}
  remaining_hw = choosen_hw.copy()
  for filename in filenames:
    dataset = LWEDataset.load_reduced(filename)
    dataset.params['secret_type'] = secret_type
    dataset.params['verbose'] = False
    file_number = int(os.path.splitext(os.path.basename(filename))[0].split('_')[-1])
    success_hw = []
    for hw in remaining_hw:
      dataset.params['hw'] = hw
      dataset.initialize_secret()
      dataset.params.update(grid_train_args)
      found, _ = dataset.train()
      if found:
        grid_success_dict[hw] = file_number
        success_hw.append(hw)
      else:
        break
    remaining_hw = [h for h in remaining_hw if h not in success_hw]
    if not remaining_hw:
      break
  # Save results for this hyperparameter combination
  results.append({'params': grid_train_args, 'success_dict': grid_success_dict})
  print(f"Trained with params {grid_train_args}, success_dict: {grid_success_dict}")

Trained with params {'train_percentages': 1.0, 'epsilon': 1.1, 'max_iter': 20000, 'alpha': 0.0001, 'warm_start': False, 'fit_intercept': True, 'tol': 1e-05, 'use_ransac': False, 'residual_factor': 1.5, 'min_samples': None, 'max_trials': None}, success_dict: {1: 1, 2: 1, 3: 7, 4: 7, 5: 8, 6: 11, 7: 12, 8: 15, 9: 15, 10: 20, 11: 20, 12: 20, 13: 24, 14: 26, 15: 26, 16: 26, 17: 26, 18: 26, 19: 27, 20: 30, 21: 33, 22: 34, 23: 35, 24: 35, 25: 35, 26: -1, 27: -1, 28: -1, 29: -1, 30: -1, 31: -1, 32: -1, 33: -1, 34: -1, 35: -1, 36: -1, 37: -1, 38: -1, 39: -1, 40: -1, 41: -1, 42: -1, 43: -1, 44: -1, 45: -1, 46: -1, 47: -1, 48: -1, 49: -1, 50: -1, 51: -1, 52: -1, 53: -1, 54: -1, 55: -1, 56: -1, 57: -1, 58: -1, 59: -1, 60: -1, 61: -1, 62: -1, 63: -1}
Trained with params {'train_percentages': 1.0, 'epsilon': 1.0, 'max_iter': 20000, 'alpha': 0.0001, 'warm_start': False, 'fit_intercept': True, 'tol': 1e-05, 'use_ransac': False, 'residual_factor': 1.5, 'min_samples': None, 'max_trials': None}, success

In [4]:
from collections import defaultdict

# Group results by identical success_dict values
grouped = defaultdict(list)
for i, res in enumerate(results):
  key = tuple(sorted(res['success_dict'].items()))
  grouped[key].append(i)

# Find the unique parameter sets for each group
group_params = []
for indices in grouped.values():
  params_list = [results[idx]['params'] for idx in indices]
  group_params.append(params_list)

# Find which features differ between groups
def get_differing_keys(param_groups):
  # param_groups: list of list of dicts (params for each group)
  all_keys = set()
  for group in param_groups:
    for params in group:
      all_keys.update(params.keys())
  differing_keys = set()
  for key in all_keys:
    values = set(tuple(params.get(key, None) for params in group) for group in param_groups)
    if len(values) > 1:
      differing_keys.add(key)
  return differing_keys

differing_keys = get_differing_keys(group_params)

# Print sets of indices that are grouped together, and only the differing features
for indices in grouped.values():
  if len(indices) > 1:
    params_list = [results[idx]['params'] for idx in indices]
    print("Grouped together:", indices)
    for params in params_list:
      diff_params = {k: v for k, v in params.items() if k in differing_keys}
      print(diff_params)

fig = go.Figure()
for indices in grouped.values():
  idx = indices[0]
  res = results[idx]
  y = list(res['success_dict'].values())

  y = [val for val in y if val != -1]
  x = [k for k, val in res['success_dict'].items() if val != -1]

  label = ', '.join(f'{k}={v}' for k, v in res['params'].items() if k in differing_keys)
  fig.add_trace(go.Scatter(x=x, y=y, mode='lines+markers', name=label))

fig.update_layout(
  xaxis_title='Hamming Weight',
  yaxis_title='Hours',
  width= 800,
  height= 400,
  autosize=True,
  margin=dict(l=40, r=20, t=40, b=40),
)
fig.show()



# Mean STD

In [4]:
LWEDataset.load_params_from_file("./../data/dual/data_n_128_k_1_s_binary_2c7a7_1.0.pkl")

{'n': 128,
 'q': 3329,
 'k': 1,
 'secret_type': 'binary',
 'eta': 3,
 'gaussian_std': 2,
 'hw': -1,
 'error_type': 'cbd',
 'num_gen': 4,
 'add_noise': True,
 'mod_q': True,
 'seed': 0,
 'approximation_std': 3,
 'approximation_threshold': 0.01,
 'save_to': '../data/dual',
 'float_type': 'd',
 'matrix_config': 'dual',
 'reduction_std': 2,
 'reduction_factor': 0.875,
 'reduction_resampling': True,
 'min_samples': 0,
 'num_matrices': 63,
 'algos': ['flatter', 'BKZ2.0'],
 'lookback': 4,
 'bkz_block_sizes': '40:40:1',
 'bkz_deltas': [0.96, 0.99],
 'flatter_alphas': [0.04, 0.025],
 'penalty': 4,
 'verbose': True,
 'checkpoint_filename': './best_reduction',
 'reload_checkpoint': False,
 'warmup_steps': 10,
 'flatter_alpha': 0.001,
 'bkz_delta': 0.99,
 'use_polish': True}

In [17]:
file_sets = [
  #"./../data/dual/data_n_128_k_1_s_binary_2c7a7_*.pkl",
 #"./../data/salsa/data_n_128_k_1_s_binary_2c7a7_*.pkl"
  "./../data/dual/data_n_150_k_1_s_binary_70e6e_*.pkl",
  "./../data/salsa/data_n_150_k_1_s_binary_70e6e_*.pkl"
]

mean_stds = []
std_stds = []
file_nums = []

for file_set in file_sets:
  files = sorted(glob.glob(file_set), key=extract_filenumber)
  means = []
  stds = []
  nums = []
  for f in files:
    ds = LWEDataset.load_reduced(f)
    _, _, std_B = ds.get_b_distribution()
    means.append(np.mean(std_B))
    stds.append(np.std(std_B))
    nums.append(ds.reduction_time / 3600)
  mean_stds.append(means)
  std_stds.append(stds)
  file_nums.append(nums)

# Make all lists in mean_stds, std_stds, and file_nums the same length by truncating to the minimum length
min_len = min(len(lst) for lst in mean_stds)
mean_stds = [lst[:min_len] for lst in mean_stds]
std_stds = [lst[:min_len] for lst in std_stds]
file_nums = [lst[:min_len] for lst in file_nums]

fig = go.Figure()

labels = ['dual', 'salsa']
colors = ['blue', 'red']

for i, label in enumerate(labels):
  fig.add_trace(go.Scatter(
    x=file_nums[i],
    y=mean_stds[i],
    mode='lines+markers',
    name=f'{label} mean',
    line=dict(color=colors[i])
  ))
  fig.add_trace(go.Scatter(
    x=file_nums[i] + file_nums[i][::-1],
    y=(np.array(mean_stds[i]) + np.array(std_stds[i])).tolist() + (np.array(mean_stds[i]) - np.array(std_stds[i]))[::-1].tolist(),
    fill='toself',
    fillcolor=f'rgba({0 if i == 0 else 255}, 0, {255 if i == 0 else 0}, 0.15)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo="skip",
    showlegend=False
  ))

fig.update_layout(
  xaxis_title='Hour',
  yaxis_title='Standard Deviation of B',
  template='plotly_white',
  width=700,
  height=450,
  bargap=0.2,
  margin=dict(l=20, r=20, t=10, b=20),
  showlegend=False,
)

fig.show()

fig.write_image("salsa_dual_std_b.pdf")