In [None]:
import numpy as np
from scipy.stats import entropy
import json
import collections
import itertools

import powerlaw
from jointpdf.jointpdf import JointProbabilityMatrix
from jointpdf.jointpdf import FullNestedArrayOfProbabilities

from probability_distributions import JointProbabilityMatrixExtended
import probability_distributions
from probability_distributions import ProbabilityArray
from simulate import find_mean_std_mse
import nudge

import information_theory
from information_theory import calculate_mutual_information

In [None]:
def generate_distribution(shape, method, arguments=None):
    if method=='random_biased':
        distribution = np.random.random(shape)
        distribution = distribution/np.sum(distribution)
        return distribution
    elif method=='random_dirichlet':
        return probability_distributions.compute_joint_uniform_random(shape)
    elif method=='fixed_entropy':
        return probability_distributions.generate_probability_distribution_with_certain_entropy(
            shape, arguments['entropy_size']
        )
    else:
        raise ValueError('provide a valid method')
        
def calculate_amount_and_size_nudges(total_nudge_size, number_of_states, threshold=10):
    """
    Calculate the nudge size and the number of nudges that need to be performed 
    to nudge a variable with the total nudge size. Assuming the distribution is
    not too peaked, in other words, not too many states should have a probability
    that is 10 times smaller than normal.
    
    Parameters:
    ----------
    total_nudge_size: a number
        How much the variable need to be nudged
    number_of_states: a number
        The total number of states of the joint distribution
    threshold: a float 
        Indicating how much smaller than uniform the value of the number
        at the 95-99 percentile of points is. Defaults to 10 
        
    Returns: local_nudge, number_of_nudges
    -------
    local_nudge: a number 
        The size of the local nudge to be performed on the joint distribution
    number_of_nudges: integer
        How often the nudge need to be performed
    
    """
    assumed_min_size = 1.0/threshold
    max_local_nudge = min(total_nudge_size, 0.1/number_of_states)
    number_of_nudges = int(np.ceil(total_nudge_size/max_local_nudge))
    local_nudge = total_nudge_size/float(number_of_nudges) 
    return local_nudge, number_of_nudges

def percentage_max_entropy(shape, percentage):
    """ 
    Return the percentage of the max-entropy given the shape
    
    Parameters:
    ----------
    shape: iterable
    percentage: float
    
    """
    return np.log2(reduce(lambda x,y: x*y, shape)) * percentage
        

## EXPERIMENT 1:

How do mutual information and nudge impact relate for one input variable and one output variable.
The joint of the input and output variables is generated randomly biased


In [None]:
def effect_of_nudge_1d(distribution, nudge_size):
    """
    Nudge the input variable and calculate the effect on the output variable
    (the KL-devergence of the output variable)
    
    Parameters:
    ----------
    distribution: a numpy array
        It should represent the joint probability distribution of 1 input
        (the first axis) and 1 output variable (the second axis).
    nudge_size: a number
    
    Returns: a number
    """
    probability_array_old = ProbabilityArray(distribution)
    marginal_variable_old = probability_array_old.marginalize(set([0]))
    marginal_function_old = probability_array_old.marginalize(set([1]))
    conditional_joint_old, marginal_labels_old, conditional_labels_old = (
        probability_array_old.find_conditional(set([1]), set([0]))
    )
    marginal_variable_nudged, nudges_states = nudge.nudge(
        marginal_variable_old, nudge_size
    )
    joint_new = ProbabilityArray(probability_distributions.compute_joint(
        marginal_variable_nudged, conditional_joint_old, conditional_labels_old
    ))
    marginal_function_new = joint_new.marginalize(set([1]))  
    kl_variable = entropy(marginal_variable_old, marginal_variable_nudged)
    kl_function = entropy(marginal_function_old, marginal_function_new) 
    return kl_variable, kl_function

pdf = JointProbabilityMatrix(1, 10, 'random')
pdf.append_variables_with_target_mi(1, 0.5)
distribution = pdf.joint_probabilities.joint_probabilities
effect_of_nudge_1d(distribution, 0.01)

In [None]:
#see whether and how mutual information and response to the nudge co-depend
NUMBER_OF_STATES, NUDGE_SIZE = 6, 0.01
mutual_information_sizes = np.arange(0.05, 1, 0.05)
sample_size = 1
effect_nudge_given_mi = {}
for mutual_information_size in mutual_information_sizes:
    print("the mutual information size is {}".format(mutual_information_size))
    nudge_effects = []
    for sample in range(sample_size):
        pdf = JointProbabilityMatrix(1, NUMBER_OF_STATES, 'random')
        pdf.append_variables_with_target_mi(1, mutual_information_size)
        distribution = pdf.joint_probabilities.joint_probabilities
        nudge_effects.append(effect_of_nudge_1d(distribution, 0.01)[1])
        
    effect_nudge_given_mi[mutual_information_size] = nudge_effects
    #with open("back_up2.json", 'w') as f:
    #    json.dump(effect_nudge_given_mi, f)
    
#print(effect_nudge_given_mi)

In [None]:
import plotting

with open("data_1_random_input_1_output_diff_MI.json", 'r') as f:
    first = json.load(f)

effect_nudge_given_mi = {}
    
for k, v in first.items():
    effect_nudge_given_mi[float(k)] = v
    
average_effect_nudge_dict = {k:np.mean(v) for k,v in effect_nudge_given_mi.items()}
standard_deviation_effect_nudge_dict = {k:np.std(v) for k,v in effect_nudge_given_mi.items()}

BATCH_SIZE = 30
batches_mean_squared_error = {}
for mi, effect_nudge_list in effect_nudge_given_mi.items():
    batched_estimates = []
    for i in range(len(effect_nudge_list)/BATCH_SIZE):
        batched_estimates.append(
            np.mean(effect_nudge_list[i*BATCH_SIZE:(i+1)*BATCH_SIZE])
        )
    batches_mean_squared_error[mi] = np.std(batched_estimates)

batch_std_effect_nudge_ord_dict = collections.OrderedDict(
    sorted(batches_mean_squared_error.items(), key= lambda x: x[0])
)    
average_effect_nudge_ord_dict = collections.OrderedDict(
    sorted(average_effect_nudge_dict.items(), key= lambda x: x[0])
)
std_effect_nudge_ord_dict = collections.OrderedDict(
    sorted(standard_deviation_effect_nudge_dict.items(), key= lambda x: x[0])
)

mi_values = average_effect_nudge_ord_dict.keys()
mean_effect_nudge = average_effect_nudge_ord_dict.values()
std_effect_nudge = std_effect_nudge_ord_dict.values()
batch_std_effect_nudge = batch_std_effect_nudge_ord_dict.values()

xlabel = "mutual information"
ylabel = "effect of the nudge"
title = "Effect of a nudge on input variable on output variable for certain MI"
plotting.plot_mean_and_confidence(
    mi_values, mean_effect_nudge, std_effect_nudge,
    "std", xlabel, ylabel, title
)
plotting.plot_mean_and_confidence(
    mi_values, mean_effect_nudge, batch_std_effect_nudge,
    "MSE", xlabel, ylabel, title
)

mi_values1, mean_effect_nudge1, std_effect_nudge1, batch_std_effect_nudge1 = (
    find_mean_std_mse(effect_nudge_given_mi, batch_size=30)
)

#print(np.allclose(mi_values1, mi_values))
#print(np.allclose(mean_effect_nudge1, mean_effect_nudge))
#print(np.allclose(std_effect_nudge1, std_effect_nudge))
#print(np.allclose(batch_std_effect_nudge1, batch_std_effect_nudge))


## Experiment 2

Research the relation between the impact of a local nudge and the number of input variables. 

### Experiment 2A

See what the distance is between 2 randomly generated functions

In [None]:
number_of_states, number_of_variables = 5, 4
shape = tuple([number_of_states]*(number_of_variables+1))
total_nudge_size = 0.01
total_number_of_states = number_of_states**number_of_variables
max_local_nudge, number_of_nudges = calculate_amount_and_size_nudges(
    total_nudge_size, total_number_of_states
)

total_nudge_sizes = []
for i in range(50):
    print(i)
    #distribution = ProbabilityArray(generate_distribution(shape, 'random_dirichlet'))
    distribution = ProbabilityArray(generate_distribution(
        shape, 'fixed_entropy', 
        {"entropy_size":percentage_max_entropy(shape, 0.5)}
    ))
    function_labels, label_nudged_variable = set([number_of_variables]), 0
    input_variable_labels = set(range(len(distribution.probability_distribution.shape))) - function_labels
    input_distribution = distribution.marginalize(input_variable_labels)
    marginal_nudged_old = ProbabilityArray(input_distribution).marginalize(
        set([label_nudged_variable])
    ) 
    
    new_input_distribution = nudge.nudge_distribution_local_non_causal(
        input_distribution, 0, max_local_nudge, number_of_nudges
    )
    marginal_nudged_new = ProbabilityArray(new_input_distribution).marginalize(
        set([label_nudged_variable])
    ) 
    total_nudge_sizes.append(np.sum(np.absolute(marginal_nudged_old-marginal_nudged_new)))
    
print(np.mean(total_nudge_sizes))

### Experiment 2B 

The actual experiment

In [None]:
import copy
    
def calculate_nudge_impact(number_of_variables, number_of_states, total_nudge_size):
    """ 
    For now calculate the impact of a local non-causal nudge on the input variables
    on the completely causally determined output variable
    
    Parameters:
    ----------
    number_of_variables: integer
    number_of_states: integer
    total_nudge_size: number
    
    """
    total_number_of_states = number_of_states**number_of_variables
    max_local_nudge, number_of_nudges = calculate_amount_and_size_nudges(
        total_nudge_size, total_number_of_states
    )
    shape = tuple([number_of_states] * (number_of_variables+1))
    distribution = ProbabilityArray(generate_distribution(shape, 'random_dirichlet'))
    function_labels, label_nudged_variable = set([number_of_variables]), 0
    input_variable_labels = set(range(len(distribution.probability_distribution.shape))) - function_labels
    input_distribution = distribution.marginalize(input_variable_labels)
    
    new_input_distribution = nudge.nudge_distribution_local_non_causal(
        input_distribution, 0, max_local_nudge, number_of_nudges
    )
    return nudge.impact_nudge_causal_output(distribution, function_labels,
                                      new_input_distribution)

number_of_variables = 1
NUMBER_OF_STATES = 5
TOTAL_NUDGE_SIZE = 0.005    
nudge_impact = calculate_nudge_impact(number_of_variables, NUMBER_OF_STATES, TOTAL_NUDGE_SIZE)
print(nudge_impact)

Check the average "distance" (KL-divergence) between randomly (probability masses states are distributed according to Dirichlet distribution) generated distributions. The distance decreases, since, the number of
states that are close to uniform increases as the number of states for a distribution grows.

In [None]:
number_of_states, number_of_distributions = 5, 20
difference_distributions = []
for number_of_variables in range(1, 7, 1):
    marginal_outputs = []
    shape = tuple([number_of_states]*(number_of_variables+1))
    for i in range(number_of_distributions):
        distribution = ProbabilityArray(generate_distribution(shape, 'random_dirichlet'))
        function_label, label_nudged_variable = number_of_variables, 0
        marginal_outputs.append(distribution.marginalize(set([function_label])))

    kl_divergences = []
    for i in range(int(number_of_distributions/2)):
        kl_divergences.append(entropy(marginal_outputs[i].flatten(), marginal_outputs[i+1].flatten()))

    difference_distributions.append(np.mean(kl_divergences))
    print(np.mean(kl_divergences))

Experiment:
The impact of a nudge on an input variable (with no causal impact on the other input variables)
on the output variable, for different number of input variables.


In [None]:
MAX_NUMBER_OF_VARIABLES, NUMBER_OF_STATES, TOTAL_NUDGE_SIZE = 7, 5, 0.01
NUMBER_OF_SAMPLES = 20
impact_nudge_dict = {}

for number_of_variables in range(1, MAX_NUMBER_OF_VARIABLES, 1):
    print(number_of_variables)
    impact_nudges = []
    for i in range(NUMBER_OF_SAMPLES):
        print("sample number {}".format(i))
        impact_nudges.append(
            calculate_nudge_impact(number_of_variables, 
                                   NUMBER_OF_STATES, 
                                   TOTAL_NUDGE_SIZE)
        )
    
    impact_nudge_dict[number_of_variables] = impact_nudges
    #with open("back_up_number_variables_output.json", 'w') as f:
    #    json.dump(impact_nudge_dict, f)

#print(impact_nudge_dict) 


In [None]:
variable_range, mean_impact_nudge, std_impact_nudge, batches_std = (
    find_mean_std_mse(impact_nudge_dict, 10)
)

xlabel = "number of input variables"
ylabel = "impact of the nudge"
title = "Impact of a nudge on 1 input variable on output variable for different amount of input variables"
plotting.plot_mean_and_confidence(
    variable_range, mean_impact_nudge, std_impact_nudge,
    "std", xlabel, ylabel, title
)

plotting.plot_mean_and_confidence(
    variable_range, mean_impact_nudge, batches_std,
    "std of batched means", xlabel, ylabel, title
)

plotting.plot_mean_and_confidence(
    variable_range, np.array(mean_impact_nudge)/np.array(difference_distributions), batches_std,
    "std of batched means", xlabel, "normalised impact of the nudge", "normalised values"
)

fit = powerlaw.Fit(mean_impact_nudge)
print(fit.distribution_compare("power_law", "exponential"))
print(fit.distribution_compare("power_law", "lognormal"))

Experiment:
The relation between nudge impact and the mutual information between the output variable and
the nudged input variable.


In [None]:
NUMBER_OF_VARIABLES, NUMBER_OF_STATES, TOTAL_NUDGE_SIZE = 2, 5, 0.01
NUMBER_OF_SAMPLES = 3

local_nudge, number_of_nudges = calculate_amount_and_size_nudges(
    TOTAL_NUDGE_SIZE, NUMBER_OF_STATES**NUMBER_OF_VARIABLES
)
impact_nudges_and_mi = []
for i in range(NUMBER_OF_SAMPLES):
    if i%20==0 and i != 0:
        print("sample number {}".format(i))
    
    #calculate the distribution
    distribution = ProbabilityArray(generate_distribution(shape, 'random_dirichlet'))
    function_label, label_nudged_variable = NUMBER_OF_VARIABLES, 0
    function_labels = set([function_label])
    input_variable_labels = set(range(len(distribution.probability_distribution.shape))) - function_labels
    
    #calculate mutual information
    mutual_information = calculate_mutual_information(distribution, 
                                                      set([function_label]),
                                                      set([label_nudged_variable]))
    
    #calculate_nudge_impact
    input_distribution = distribution.marginalize(input_variable_labels)
    nudge_impacts = []
    for _ in range(5):
        new_input_distribution = nudge.nudge_distribution_local_non_causal(
            input_distribution, 0, local_nudge, number_of_nudges
        )
        nudge_impacts.append(nudge.impact_nudge_causal_output(
            distribution, function_labels, new_input_distribution
        ))
    nudge_impact = np.mean(nudge_impacts)
    impact_nudges_and_mi.append((nudge_impact, mutual_information))


In [None]:
import matplotlib.pyplot as plt

impact_nudges = [item[0] for item in impact_nudges_and_mi] 
mutual_information_sizes = [item[1] for item in impact_nudges_and_mi]
plt.plot(mutual_information_sizes, impact_nudges, 'o')
plt.show()
    
    

Experiment:
Change the output so as to minimize the nudge impact. See what happens with the mutual information between
the nudged variable and the output distribution

In [None]:

def get_nudge_impact(distribution, output_label, nudge_label, number_of_nudges, local_nudge_size):
    input_variable_labels = (set(range(len(distribution.probability_distribution.shape))) -
                             set([output_label]))
    input_distribution = distribution.marginalize(input_variable_labels)
    
    new_input_distribution = nudge.nudge_distribution_local_non_causal(
        input_distribution, nudge_label, local_nudge_size, number_of_nudges
    )
    return nudge.impact_nudge_causal_output(distribution, set([output_label]),
                                      new_input_distribution)

def minimize_nudge_greedy(initial_distribution, output_label, number_of_trials, 
                          evaluations_per_trial, mutation_size, number_of_mutations,
                          total_nudge_size, nudge_label):
    """
    Mutate the distribution to minimize nudge impact and maximize entropy
    
    Parameters:
    ----------
    initial_distribution: a numpy array
        Representing a discrete probability distribution
    function_label: an integer
    nudge_size: a (small) number
    number_of_nudges: an integer
    
    """
    total_number_of_states = reduce(lambda x, y: x*y, initial_distribution.shape)
    local_nudge_size, number_of_nudges = calculate_amount_and_size_nudges(
        total_nudge_size, total_number_of_states
    )
                             
    distribution = initial_distribution
    nudge_impacts = []
    for i in range(evaluations_per_trial):
        nudge_impacts.append(get_nudge_impact(
            ProbabilityArray(initial_distribution), output_label, nudge_label, number_of_nudges, local_nudge_size
        ))                        
    prev_nudge_impact = np.mean(nudge_impacts)
    initial_nudge_impact = prev_nudge_impact
    #print(prev_nudge_impact)
             
    for i in range(number_of_trials):
        #print(i)
        #print("number of mutations {}".format(number_of_mutations))
        proposed_distribution = nudge.mutate_distribution_with_fixed_marginals(
            distribution, output_label, int(number_of_mutations), mutation_size
        ) 
        #print("found proposal distribution")
        nudge_impacts = []
        for j in range(evaluations_per_trial):
            nudge_impacts.append(get_nudge_impact(
                ProbabilityArray(proposed_distribution), output_label, nudge_label,
                number_of_nudges, local_nudge_size
            ))
        if np.mean(nudge_impacts) < prev_nudge_impact:
            prev_nudge_impact = np.mean(nudge_impacts)
            distribution = proposed_distribution
            
        #print(prev_nudge_impact)
    
    return distribution, prev_nudge_impact, initial_nudge_impact, prev_nudge_impact

NUMBER_OF_VARIABLES, NUMBER_OF_STATES = 3, 4 
pdf = JointProbabilityMatrix(NUMBER_OF_VARIABLES+1, NUMBER_OF_STATES, 'random')
initial_distribution = pdf.joint_probabilities.joint_probabilities
output_label = NUMBER_OF_VARIABLES

number_of_trials = 50
evaluations_per_trial = 10
mutation_size = 0.2 / (4.0**3)
number_of_mutations = int(0.1 * 4**3)
a=minimize_nudge_greedy(initial_distribution, output_label, number_of_trials, 
                      evaluations_per_trial, mutation_size, number_of_mutations, 0.01, 0)

print(a[1])

In [None]:
NUMBER_OF_VARIABLES, NUMBER_OF_STATES = 4, 5 
output_label = NUMBER_OF_VARIABLES
number_of_trials = 50
evaluations_per_trial = 10
mutation_size = 0.2 / (5.0**4)
number_of_mutations = int(0.1 * 5**4)
number_of_samples = 20

mi_before = []
mi_after = []

impact_before = []
impact_after = []

for count in range(number_of_samples):
    if count%2==0:
        print(count)
        
    shape = [NUMBER_OF_STATES] * (NUMBER_OF_VARIABLES+1)
    initial_distribution = generate_distribution(shape, 'random_dirichlet')
    a=minimize_nudge_greedy(initial_distribution, output_label, number_of_trials, 
                          evaluations_per_trial, mutation_size, number_of_mutations, 0.01, 0)

    impact_before.append(a[2])
    impact_after.append(a[3])
    mi_before.append(calculate_mutual_information(
        ProbabilityArray(initial_distribution), set([0]), set([NUMBER_OF_VARIABLES])
    ))
    mi_after.append(calculate_mutual_information(
        ProbabilityArray(a[0]), set([0]), set([NUMBER_OF_VARIABLES])
    ))
    
print("mean impact before {}".format(np.mean(impact_before)))
print("mean impact before {}".format(np.mean(impact_after)))
    
print("mean mi before {}".format(np.mean(mi_before)))
print("mean mi after {}".format(np.mean(mi_after)))

In [None]:
print(mi_before)
print(mi_after)

In [None]:
shape = tuple([5, 5, 5, 5, 5])
entropy_size = percentage_max_entropy(shape, 0.5)
dist = generate_distribution(shape, method='fixed_entropy', arguments={"entropy_size": entropy_size})
print(entropy(dist.flatten()))