# Distributions

Check the approximation of b given A and the approx distribution of s and e:

In [2]:
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from ml_attack import get_b_distribution, get_vector_distribution
from kyber import MLWE
from ml_attack import transform_matrix_lwe, transform_vector_lwe, Reduction
from ml_attack.utils import get_default_params

import numpy as np
from scipy.stats import norm

import matplotlib.pyplot as plt

import plotly.graph_objs as go
import plotly.offline as pyo


  from .siever_params import SieverParams  # noqa


In [51]:
def check_distribution(params, save_to_file=False):
    mlwe = MLWE(params)
    input_bytes = mlwe.get_random_bytes()
    A = mlwe.generate_A(input_bytes)
    A_lwe = transform_matrix_lwe(A.to_list())[0] # take a single A vector
    A_lwe %= params['q']  # Ensure A_lwe is in the range [0, q)
    A_lwe[A_lwe > params['q'] // 2] -= params['q']  # Adjust values to be in the range [-q/2, q/2]

    num_samples = 50000
    
    b_values = np.zeros(num_samples)
    secrets = np.zeros((num_samples, params['n']))
    errors = np.zeros(num_samples)

    for i in range(num_samples):
        input_bytes = mlwe.get_random_bytes()
        secret = mlwe.generate_secret(input_bytes)
        secret_lwe = transform_vector_lwe(secret.to_list())
        secret_lwe[secret_lwe > params['q'] // 2] -= params['q']
        secrets[i] = secret_lwe

        error = mlwe.generate_error(input_bytes)
        error_lwe = transform_vector_lwe(error.to_list())[0:1]
        error_lwe[error_lwe > params['q'] // 2] -= params['q']
        errors[i] = error_lwe.item()

        B_lwe = A_lwe @ secret_lwe + error_lwe
        b_values[i] = B_lwe.item()

    # Compute expected and variances
    mean_s, _, std_s = get_vector_distribution(params, params['secret_type'], params.get("hw", -1))
    mean_e, _, std_e = get_vector_distribution(params, params['error_type'])

    mean_b, _, std_b = get_b_distribution(params, A_lwe)

    # Compute observed mean and stds
    observed_s = np.mean(secrets)
    observed_e = np.mean(errors)
    observed_b = np.mean(b_values)

    observed_s_std = np.std(secrets)
    observed_e_std = np.std(errors)
    observed_b_std = np.std(b_values)

    print(f"Secret expected distribution: {mean_s}, {std_s}")
    print(f"Secret observed distribution: {observed_s}, {observed_s_std}")

    print(f"Error expected distribution: {mean_e}, {std_e}")
    print(f"Error observed distribution: {observed_e}, {observed_e_std}")

    print(f"B expected distribution: {mean_b}, {std_b}")
    print(f"B observed distribution: {observed_b}, {observed_b_std}")

    # Generate x values for the normal distribution
    x = np.linspace(mean_b - 4 * std_b, mean_b + 4 * std_b, 1000)

    # Compute the PDF of the normal distribution
    pdf = norm.pdf(x, loc=mean_b.item(), scale=std_b.item())
    # Create traces for plotly
    hist = go.Histogram(
        x=b_values,
        nbinsx=30,
        histnorm='probability density',
        opacity=0.6,
        name='Observed B values',
        marker=dict(color="limegreen"),
        showlegend=False
    )
    normal_curve = go.Scatter(
        x=x,
        y=pdf,
        mode='lines',
        name='Normal Distribution',
        line=dict(color='blue'),
        showlegend=False
    )

    # Add vertical line for mean (no legend, no annotation)
    mean_line = go.Scatter(
        x=[mean_b, mean_b],
        y=[0, max(pdf)*1.0],
        mode='lines',
        line=dict(color='green', dash="dash", width=1),
        showlegend=False
    )

    # Add range line for std (-1 to +1 std), thinner line, no legend, no markers
    std_y = max(pdf)*0.9
    std_x0 = mean_b - std_b
    std_x1 = mean_b + std_b

    std_range = go.Scatter(
        x=[std_x0, std_x1],
        y=[std_y, std_y],
        mode='lines',
        line=dict(color='green', width=1),
        showlegend=False
    )

    # Add vertical end lines for std range (like |---|)
    std_end_left = go.Scatter(
        x=[std_x0, std_x0],
        y=[std_y-1e-6, std_y+1e-6],
        mode='lines',
        line=dict(color='green', width=1),
        showlegend=False
    )
    std_end_right = go.Scatter(
        x=[std_x1, std_x1],
        y=[std_y-1e-6, std_y+1e-6],
        mode='lines',
        line=dict(color='green', width=1),
        showlegend=False
    )

    layout = go.Layout(
        xaxis=dict(title='B', range=[-40e3, 40e3]),
        yaxis=dict(title='Density', range=[-5e-6, 80e-6]),
        bargap=0.2,
        margin=dict(l=20, r=20, t=10, b=20),
        width=550,
        height=400,
        showlegend=False
    )

    fig = go.Figure(
        data=[hist, normal_curve, mean_line, std_range, std_end_left, std_end_right],
        layout=layout
    )
    pyo.iplot(fig)

    if save_to_file:
        # Save as PDF using kaleido
        fig.write_image(f'b_{params["secret_type"]}_distr.pdf', format='pdf')

    # Example: compute z-scores for b_values
    b_mean = np.mean(b_values, keepdims=True)
    b_dev = b_values - b_mean
    z_scores = b_dev / observed_b_std
    mean_z = np.mean(z_scores)
    std_z = np.std(z_scores)
    print(f"Z-score mean: {mean_z}, std: {std_z}")

params = get_default_params()
params.update({
    'n': 100,
    'q': 3329,
    'k': 1,
    'secret_type': 'binary',
    'error_type': 'cbd',
    'seed': 0,
})

check_distribution(params, save_to_file=True)

Secret expected distribution: 0.5, 0.5
Secret observed distribution: 0.4800056, 0.4996000640198519
Error expected distribution: 0.0, 1.0
Error observed distribution: -0.0046, 1.0031444761349184
B expected distribution: -5034.0, 5308.4778891882
B observed distribution: -5258.2824, 5203.954049805805


Z-score mean: 1.0942358130705542e-17, std: 1.0


In [3]:
def check_reduced_distribution(params, save_to_file=False):
    mlwe = MLWE(params)
    input_bytes = mlwe.get_random_bytes()
    A = mlwe.generate_A(input_bytes)
    A_lwe = transform_matrix_lwe(A.to_list()) # take a single A vector
    A_lwe %= params['q']  # Ensure A_lwe is in the range [0, q)
    A_lwe[A_lwe > params['q'] // 2] -= params['q']  # Adjust values to be in the range [-q/2, q/2]

    # Apply reduction
    reduction = Reduction(params)
    R = reduction.reduce(A_lwe)
    RA = np.tensordot(R, A_lwe, axes=1) % params['q']
    RA[RA > params['q'] // 2] -= params['q']

    num_samples = 50000
    
    b_values = np.zeros(num_samples)

    for i in range(num_samples):
        input_bytes = mlwe.get_random_bytes()
        
        secret = mlwe.generate_secret(input_bytes)
        secret_lwe = transform_vector_lwe(secret.to_list())
        secret_lwe[secret_lwe > params['q'] // 2] -= params['q']
        
        error = mlwe.generate_error(input_bytes)
        error_lwe = transform_vector_lwe(error.to_list())
        error_lwe[error_lwe > params['q'] // 2] -= params['q']
        
        B_lwe = A_lwe @ secret_lwe + error_lwe

        RB_lwe = np.tensordot(R, B_lwe, axes=1) % params['q']
        RB_lwe[RB_lwe > params['q'] // 2] -= params['q']

        b_values[i] = RB_lwe[0].item()

    # Compute expected and variances
    mean_b_array, _, std_b_array = get_b_distribution(params, RA, R)
    mean_b = mean_b_array[0]
    std_b = std_b_array[0]

    # Compute observed mean and stds
    observed_b = np.mean(b_values)
    observed_b_std = np.std(b_values)

    print(f"B expected distribution: {mean_b}, {std_b}")
    print(f"B observed distribution: {observed_b}, {observed_b_std}")
    
    # Generate x values for the normal distribution
    x = np.linspace(mean_b - 4 * std_b, mean_b + 4 * std_b, 1000)

    # Compute the PDF of the normal distribution
    pdf = norm.pdf(x, loc=mean_b.item(), scale=std_b.item())

    # Create traces for plotly
    hist = go.Histogram(
        x=b_values,
        nbinsx=30,
        histnorm='probability density',
        opacity=0.6,
        name='Observed B values',
        marker=dict(color="orange"),
        showlegend=False
    )
    normal_curve = go.Scatter(
        x=x,
        y=pdf,
        mode='lines',
        name='Normal Distribution',
        line=dict(color='blue'),
        showlegend=False
    )

    # Add vertical line for mean (no legend, no annotation)
    mean_line = go.Scatter(
        x=[mean_b, mean_b],
        y=[0, max(pdf)*1.0],
        mode='lines',
        line=dict(color='darkgoldenrod', dash="dash", width=1),
        showlegend=False
    )

    # Add range line for std (-1 to +1 std), thinner line, no legend, no markers
    std_y = max(pdf)*0.9
    std_x0 = mean_b - std_b
    std_x1 = mean_b + std_b

    std_range = go.Scatter(
        x=[std_x0, std_x1],
        y=[std_y, std_y],
        mode='lines',
        line=dict(color='darkgoldenrod', width=1),
        showlegend=False
    )

    # Add vertical end lines for std range (like |---|)
    std_end_left = go.Scatter(
        x=[std_x0, std_x0],
        y=[std_y-0.01*std_y, std_y+0.01*std_y],
        mode='lines',
        line=dict(color='darkgoldenrod', width=1),
        showlegend=False
    )
    std_end_right = go.Scatter(
        x=[std_x1, std_x1],
        y=[std_y-0.01*std_y, std_y+0.01*std_y],
        mode='lines',
        line=dict(color='darkgoldenrod', width=1),
        showlegend=False
    )

    layout = go.Layout(
        xaxis=dict(title='R@B', range=[-2.5e3, 2.5e3]),
        yaxis=dict(title='Density', range=[-50e-6, 800e-6]),
        bargap=0.2,
        margin=dict(l=20, r=20, t=10, b=20),
        width=550,
        height=400,
        showlegend=False
    )

    fig = go.Figure(
        data=[hist, normal_curve, mean_line, std_range, std_end_left, std_end_right],
        layout=layout
    )
    pyo.iplot(fig)

    if save_to_file:
        # Save as PDF using kaleido
        fig.write_image(f'b_{params['secret_type']}_distr_red.pdf', format='pdf')

params = get_default_params()
params.update({
    'n': 100,
    'q': 3329,
    'k': 1,
    'secret_type': 'ternary',
    'error_type': 'cbd',

    'seed': 0,
    
    'penalty': 1,
    'verbose': False,
})

check_reduced_distribution(params, save_to_file=True)

B expected distribution: 0.0, 654.4715425440589
B observed distribution: 3.09278, 641.6854101597384


In [1]:
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from ml_attack import calculate_min_trials, prob_all_seen
import numpy as np

# Example usage
num_gen = 4
n = 128
N = 0.875
m = int(N * n)
total_items = n * num_gen

threshold = 0.99
min_trials = calculate_min_trials(total_items, m, target_prob=threshold, max_k=total_items)
print(f"Minimum trials for {threshold} probability: {min_trials}")
print(f"Probability of seeing all {n*num_gen} candidates with {m} samples and {min_trials} guesses: {prob_all_seen(n * num_gen, m, min_trials):.4f}")

  from .siever_params import SieverParams  # noqa


Minimum trials for 0.99 probability: 44
Probability of seeing all 512 candidates with 112 samples and 44 guesses: 0.9902


In [None]:
import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider, FloatSlider

# Plot
def plot_probabilities(n=100, N=0.875):
    if not (0 < N <= 1):
        print("N must be in (0, 1)")
        return
    
    num_gen = 4
    m = int(N * n)

    k_max = 100
    ks = np.arange(1, k_max + 1)
    probs = np.array([prob_all_seen(n * num_gen, m, k) for k in range(1, k_max + 1)])

    # set k_max based on the threshold
    threshold = 0.999
    k_max = np.argmax(probs >= threshold) + 1
    ks = ks[:k_max]
    probs = probs[:k_max]

    plt.figure(figsize=(10, 6))
    plt.plot(ks, probs, label=f'n={n}, m={m} ({N:.3f}×n)')
    plt.xlabel('Number of repetitions (k)')
    plt.ylabel('Probability all items seen')
    plt.ylim(0, 1.05)
    plt.grid(True)
    plt.title(f'Probability of Seeing All {n} Items\nDraw Size m = int({N:.3f} × n) = {m}')
    plt.axhline(0.99, color='red', linestyle='--', label='0.99 Threshold')
    plt.legend()
    plt.show()

# Widgets: vary n and N, keep m = int(N * n)
interact(
    plot_probabilities,
    n=IntSlider(min=10, max=512, step=10, value=100, description='n (set size)'),
    N=FloatSlider(min=0.05, max=1., step=0.025, value=0.875, description='N (m/n)')
)

interactive(children=(IntSlider(value=100, description='n (set size)', max=512, min=10, step=10), FloatSlider(…

<function __main__.plot_probabilities(n=100, N=0.875)>

In [10]:
def compare_B_distributions(params, num_samples=30000, save_to_file=False):
  mlwe = MLWE(params)
  input_bytes = mlwe.get_random_bytes()
  A = mlwe.generate_A(input_bytes)
  A_lwe = transform_matrix_lwe(A.to_list())
  A_lwe %= params['q']
  A_lwe[A_lwe > params['q'] // 2] -= params['q']

  # Reduction
  reduction = Reduction(params)
  R = reduction.reduce(A_lwe)
  RA = np.tensordot(R, A_lwe, axes=1) % params['q']
  RA[RA > params['q'] // 2] -= params['q']

  b_values = np.zeros(num_samples)
  rb_values = np.zeros(num_samples)

  for i in range(num_samples):
    input_bytes = mlwe.get_random_bytes()
    secret = mlwe.generate_secret(input_bytes)
    secret_lwe = transform_vector_lwe(secret.to_list())
    secret_lwe[secret_lwe > params['q'] // 2] -= params['q']

    error = mlwe.generate_error(input_bytes)
    error_lwe = transform_vector_lwe(error.to_list())
    error_lwe[error_lwe > params['q'] // 2] -= params['q']

    B_lwe = A_lwe @ secret_lwe + error_lwe
    b_values[i] = B_lwe[0].item() if B_lwe.shape else B_lwe.item()

    RB_lwe = np.tensordot(R, B_lwe, axes=1) % params['q']
    RB_lwe[RB_lwe > params['q'] // 2] -= params['q']
    rb_values[i] = RB_lwe[0].item() if RB_lwe.shape else RB_lwe.item()

  # Means and stds
  mean_b, _, std_b = get_b_distribution(params, A_lwe)
  mean_rb_array, _, std_rb_array = get_b_distribution(params, RA, R)
  mean_rb = mean_rb_array[0]
  std_rb = std_rb_array[0]
  
  # Plot
  x_b = np.linspace(mean_b - 4 * std_b, mean_b + 4 * std_b, 1000)
  pdf_b = norm.pdf(x_b, loc=mean_b, scale=std_b)
  
  x_rb = np.linspace(mean_rb - 4 * std_rb, mean_rb + 4 * std_rb, 1000)
  pdf_rb = norm.pdf(x_rb, loc=mean_rb.item(), scale=std_rb.item())

  hist_b = go.Histogram(
    x=b_values, nbinsx=30, histnorm='probability density',
    opacity=0.5, name='B (no reduction)', marker=dict(color='orange')
  )
  curve_b = go.Scatter(
    x=x_b, y=pdf_b, mode='lines', name='Normal (no reduction)', line=dict(color='orange', dash='dash')
  )
  hist_rb = go.Histogram(
    x=rb_values, nbinsx=30, histnorm='probability density',
    opacity=0.5, name='B (with reduction)', marker=dict(color='blue')
  )
  curve_rb = go.Scatter(
    x=x_rb, y=pdf_rb, mode='lines', name='Normal (with reduction)', line=dict(color='blue', dash='dash')
  )

  layout = go.Layout(
    xaxis=dict(title='Value'),
    yaxis=dict(title='Density'),
    bargap=0.2,
    margin=dict(l=20, r=20, t=10, b=20),
    width=900,
    height=500,
    title='B Distribution: With and Without Reduction'
  )

  fig = go.Figure(data=[hist_b, curve_b, hist_rb, curve_rb], layout=layout)
  fig.update_layout(barmode='overlay')
  pyo.iplot(fig)

  if save_to_file:
    fig.write_image('b_compare_distribution.pdf', format='pdf')

params = get_default_params()
params.update({
    'n': 32,
    'q': 3329,
    'k': 1,
    'secret_type': 'binary',
    'error_type': 'cbd',
    'eta': 2,
    
    'penalty': 1,
    'verbose': False,
})

compare_B_distributions(params)

In [2]:
import numpy as np
from scipy.special import erf
from scipy.special import erfinv

def compute_lifting_error_rates(sigmas, q):
    """
    Computes the probability of correct lifting and error rates for each coordinate
    of Rb ~ N(mu, sigma^2), given modulus q.

    Args:
        sigmas: np.array of shape (k,) - standard deviations of each coordinate of Rb
        q: modulus (integer)

    Returns:
        lift_correct_probs: np.array of shape (k,) - probability of correct lifting
        lift_error_probs: np.array of shape (k,) - probability of incorrect lifting
    """
    factor = q / (2 * np.sqrt(2) * sigmas)
    lift_correct_probs = erf(factor)
    lift_error_probs = 1 - lift_correct_probs
    return lift_correct_probs, lift_error_probs

def sigma_from_lifting_prob(prob, q):
    """
    Inverse of lifting probability formula: given prob and q, solve for sigma.

    Args:
        prob: probability of correct lifting (float or np.array)
        q: modulus (integer)

    Returns:
        sigma: estimated standard deviation(s)
    """
    # Avoid division by zero or invalid values
    prob = np.clip(prob, 1e-12, 1 - 1e-12)
    factor = np.sqrt(2) * erfinv(prob)
    sigma = q / (2 * factor)
    return sigma



In [3]:
# Example: suppose you have a 4-dimensional Rb
sigmas = np.array([150, 200, 300, 800, 1156.2790])  # standard deviations from empirical Rb samples
q = 3329  # typical Kyber modulus

correct_probs, error_probs = compute_lifting_error_rates(sigmas, q)

for i, (sigma, p_err) in enumerate(zip(sigmas, error_probs)):
    print(f"Coord {i}: σ = {sigma:.1f}, P(lifting error) ≈ {p_err:.4f}")

# Example: compute sigma from a given lifting probability
lifting_prob = 0.99  # desired probability of correct lifting
sigma_estimate = sigma_from_lifting_prob(lifting_prob, q)
print(f"Estimated sigma for P(lifting error) = {1 - lifting_prob:.4f}: {sigma_estimate:.1f}")

Coord 0: σ = 150.0, P(lifting error) ≈ 0.0000
Coord 1: σ = 200.0, P(lifting error) ≈ 0.0000
Coord 2: σ = 300.0, P(lifting error) ≈ 0.0000
Coord 3: σ = 800.0, P(lifting error) ≈ 0.0375
Coord 4: σ = 1156.3, P(lifting error) ≈ 0.1500
Estimated sigma for P(lifting error) = 0.0100: 646.2


In [1]:
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from ml_attack.dataset import LWEDataset
from ml_attack.utils import get_train_default_params

dataset = LWEDataset.load_reduced("./../data/data_n_100_k_1_s_binary_e813c.pkl")


In [12]:
mu_b, _, std_b = dataset.get_b_distribution()

# Compute lifting error rates for each coordinate using std_b and q
lift_correct_probs, lift_error_probs = compute_lifting_error_rates(std_b.flatten(), q)

for i, (sigma, p_err) in enumerate(zip(std_b.flatten(), lift_error_probs)):
    print(f"Coord {i}: σ = {sigma:.1f}, P(lifting error) ≈ {p_err:.4f}")

Coord 0: σ = 1029.0, P(lifting error) ≈ 0.1057
Coord 1: σ = 1072.8, P(lifting error) ≈ 0.1208
Coord 2: σ = 1102.4, P(lifting error) ≈ 0.1311
Coord 3: σ = 1066.2, P(lifting error) ≈ 0.1185
Coord 4: σ = 1159.0, P(lifting error) ≈ 0.1509
Coord 5: σ = 1159.0, P(lifting error) ≈ 0.1509
Coord 6: σ = 1076.9, P(lifting error) ≈ 0.1222
Coord 7: σ = 1072.8, P(lifting error) ≈ 0.1208
Coord 8: σ = 1194.2, P(lifting error) ≈ 0.1634
Coord 9: σ = 1102.4, P(lifting error) ≈ 0.1311
Coord 10: σ = 1066.2, P(lifting error) ≈ 0.1185
Coord 11: σ = 1102.4, P(lifting error) ≈ 0.1311
Coord 12: σ = 1171.7, P(lifting error) ≈ 0.1554
Coord 13: σ = 1098.8, P(lifting error) ≈ 0.1298
Coord 14: σ = 1098.8, P(lifting error) ≈ 0.1298
Coord 15: σ = 1157.4, P(lifting error) ≈ 0.1504
Coord 16: σ = 1075.1, P(lifting error) ≈ 0.1216
Coord 17: σ = 1066.2, P(lifting error) ≈ 0.1185
Coord 18: σ = 1098.8, P(lifting error) ≈ 0.1298
Coord 19: σ = 1055.9, P(lifting error) ≈ 0.1149
Coord 20: σ = 1147.1, P(lifting error) ≈ 0.1468
Co

In [14]:
prob = 0.8
sigma = sigma_from_lifting_prob(prob, q)
print(f"Estimated sigma for P(lifting error) = {1 - prob:.4f}: {sigma:.1f}")

Estimated sigma for P(lifting error) = 0.2000: 1298.8


In [13]:
dataset.params.update(get_train_default_params())
_,_ = dataset.train()

Mean overall std_B: 1134.6864016826096
True B is the best candidate: 7604 / 8976 (84.71%)
Expected true B is best candidate: 85.56%
