In [2]:
import numpy as np
from scipy.optimize import curve_fit

In [3]:
array = np.random.normal(size=64)
array[0:5]

array([ 1.17840015,  0.39621674, -0.4852947 , -1.46519372,  1.15404058])

In [4]:
def softmax(x):
    """Compute softmax values for each set of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

# Apply softmax to the array
softmax_array = softmax(array)
softmax_array[0]

0.03993663083095821

In [13]:
def average_probability_of_gamma_edit(length, gamma, num_trials=100):
    """Calculate the average probability of the first entry being around 25% after gamma edit over several trials."""
    total_probability = 0
    for _ in range(num_trials):
        array = np.random.normal(size=length)
        array[0] += gamma
        softmax_probs = softmax(array)
        total_probability += softmax_probs[0]
    return total_probability / num_trials

# Calculate the average probability over many trials using a randomly chosen gamma and a given array length
average_prob = average_probability_of_gamma_edit(length=64, gamma=1)
average_prob

0.03508006823940956

In [12]:
def find_optimal_gamma_for_average_probability(length, target_avg_prob, initial_guess=1.0, tolerance=0.001, max_iter=1000, num_trials=100):
    """Find the optimal gamma value to achieve the target average probability over many trials."""
    gamma = initial_guess
    for _ in range(max_iter):
        avg_prob = average_probability_of_gamma_edit(length, gamma, num_trials=num_trials)
        error = target_avg_prob - avg_prob
        if abs(error) < tolerance:
            return gamma
        gamma += error * 10  # Scaling factor to speed up convergence
    return gamma

ideal_gamma = find_optimal_gamma_for_average_probability(length=64, target_avg_prob=0.25)
ideal_gamma

3.309549187743648

In [15]:
lengths = np.array([64, 256, 1024, 8192])
probs = np.array([0.01, 0.05, 0.1, 0.2, 0.5])

# Find the optimal gamma for each combination of length and probability
gamma_values = []
length_prob_pairs = []
for length in lengths:
    for prob in probs:
        optimal_gamma = find_optimal_gamma_for_average_probability(length, prob, num_trials=100)  # Reduced trials for speed
        gamma_values.append(optimal_gamma)
        length_prob_pairs.append((length, prob))

# Fitting a function that takes both d (length) and p (probability) as variables
# We can try a function of the form: gamma = a * log(d) + b * p + c
def fit_function(d_p, a, b, c):
    d, p = d_p
    return a * np.log(d) + b * p + c

# Perform the curve fitting with reduced data
params, _ = curve_fit(fit_function, np.array(length_prob_pairs).T, gamma_values, maxfev=10000)

# Extract the parameters
a, b, c = params
a, b, c

(0.9860260050705083, 8.94147912144401, -3.467570669583252)

In [18]:
from sklearn.metrics import r2_score

# Test lengths and probabilities not included in the fitting process
test_lengths = np.array([32, 128, 512, 2048, 4092])
test_probs = np.array([0.001, 0.005, 0.02, 0.025, 0.33])

# Calculate the expected gamma values using the fitted function
expected_gammas = fit_function([test_lengths, test_probs], a, b, c)

# Calculate the actual gamma values through the optimization process
actual_gammas = []
for length, prob in zip(test_lengths, test_probs):
    gamma = find_optimal_gamma_for_average_probability(length, prob, num_trials=100)  # Reduced trials for speed
    actual_gammas.append(gamma)

# Calculate R^2 value to assess the accuracy of the fitted model
r2 = r2_score(actual_gammas, expected_gammas)
r2, expected_gammas, actual_gammas

(0.8492487482115556,
 array([-0.04132346,  1.36136474,  2.86240922,  4.27403891,  7.6836878 ]),
 [-2.6768594239402015,
  -0.2640693071947143,
  2.324398212806692,
  4.0489832314707375,
  7.95073072254116])