In [None]:
import numpy as np
from scipy.stats import entropy
import json
import collections
import itertools

import powerlaw
from jointpdf.jointpdf import JointProbabilityMatrix
from jointpdf.jointpdf import FullNestedArrayOfProbabilities

from probability_distributions import JointProbabilityMatrixExtended
import probability_distributions
from probability_distributions import ProbabilityArray
from simulate import find_mean_std_mse
import nudge

In [None]:
def nudge_distribution_local_non_causal_assume_independence(joint, nudge_label, nudge_size):
    """
    Nudge the marginal and assume independence after the nudge to find the
    new joint.
    
    Parameters:
    ----------
    joint: a numpy array
        Representing a discrete probability distribution
    nudge_label: an integer
    nudge_size: a (small) number
    number_of_nudges: an integer
    
    """
    other_variables_labels = set(range(len(joint.shape))) - set([nudge_label]) 
    marginal_nudge_variable = ProbabilityArray(joint).marginalize(nudge_label)
    marginal_other_variables = ProbabilityArray(joint).marginalize(other_variables_labels)
    marginal_variable_nudged, nudged_states = nudge.nudge(marginal_nudge_variable, nudge_size)
    return probability_distributions.compute_joint_from_independent_marginals(
        marginal_other_variables, marginal_variable_nudged, sorted(list(label_nudged_variable))
    )

In [None]:
pdf = JointProbabilityMatrix(2, 5, 'random')
#pdf.append_variables_with_target_mi(1, 0.5)
#pdf.append_synergistic_variables()

import numpy as np

probability_array_tryout = FullNestedArrayOfProbabilities(
    np.array(
      [
        [
          [
            [0.2, 0.1]          
          ],
          [
            [0.05, 0.05]
          ]
        ],
        [
          [
            [0.3, 0.05] 
          ],
          [
            [0.15, 0.1] 
          ]
        ],
      ]
    )
) 

In [None]:
NUDGE_SIZE = 0.01
pdf = JointProbabilityMatrix(1, 6, 'random')
#print(pdf.joint_probabilities.joint_probabilities)
pdf.append_variables_with_target_mi(1, 0.1)
#print(pdf.mutual_information([0], [1]))

joint_old = pdf.joint_probabilities.joint_probabilities
probability_array_old = ProbabilityArray(joint_old)
marginal_variable_old = probability_array_old.marginalize(set([0]))
marginal_function_old = probability_array_old.marginalize(set([1]))
conditional_joint_old, marginal_labels_old, conditional_labels_old = (
    probability_array_old.find_conditional(set([1]), set([0]))
)
marginal_variable_nudged, nudged_states = nudge.nudge(marginal_variable_old, NUDGE_SIZE)
joint_new = ProbabilityArray(probability_distributions.compute_joint(
    marginal_variable_nudged, conditional_joint_old, conditional_labels_old
))

#This takes the KL-divergence between the new and old function variable
marginal_function_new = joint_new.marginalize(set([1]))  

kl_variable = entropy(marginal_variable_old, marginal_variable_nudged)
kl_function = entropy(marginal_function_old, marginal_function_new) 
print("KL-divergence old and new function distribution: {}".format(kl_variable))
print("KL-divergence old and new function distribution: {}".format(kl_function))

In [None]:
#this method does ot work, the joint does not sum to 1!!!
def update_joint_independent_marginals(joint, label_marginal, marginal, 
                                       update_states, other_marginal):
    """update a joint distribution only for certain states of a marginal
    assuming the merginals are independent for those states
    
    Parameters:
    ----------
    joint: numpy array
    label_marginal: integer
    marginal: a numpy array
    states: a numpy array
    other_marginal: a numpy array
    
    Returns: The updated joint distribution

    """
    updated_joint = np.copy(joint)
    states_input_variables = [range(states) for states in joint.shape]
    states_input_variables[label_marginal] = list(update_states)
    
    for state in itertools.product(*states_input_variables):
        state_other_input_variables = tuple([index for count, index in enumerate(state)
                                             if count != label_marginal])
        updated_joint[state] = (other_marginal[state_other_input_variables] *
                                marginal_variable_nudged[state[label_marginal]])

    return updated_joint



In [None]:
import numpy as np

tryout = np.arange(2**10).reshape([2]*10)
#print(tryout)
a = [1, 3, 4, 5, 9]
total_variables = 10
b = np.moveaxis(tryout, a, range(total_variables-len(a), total_variables, 1))
c = np.array([[1,10],[100,1000]])
b = b*c
#tryout = np.moveaxis(b, range(total_variables-len(a), total_variables, 1), a)
print(tryout)


#tryout = b*c
#np.moveaxis(tryout, range(total_variables-len(a), total_variables, 1), a)


In [None]:
def nudge_distribution_local_non_causal(joint, nudge_label, nudge_size, number_of_nudges):
    """
    nudge the the variable with nudge label while keeping the 
    marginal of the other variables constant
    
    Parameters:
    ----------
    joint: a numpy array
        Representing a discrete probability distribution
    nudge_label: an integer
    nudge_size: a (small) number
    number_of_nudges: an integer
    
    Returns: a numpy array, representing the nudged probability distribution
    
    """
    nudged_joint = np.copy(joint)
    nudged_joint = nudged_joint.swapaxes(nudge_label, len(joint.shape)-1)
    nudge_states = nudge.select_random_states(nudged_joint.shape[:-1], number_of_nudges) 
    
    nudged_states_marginal = np.random.choice(joint.shape[nudge_label], 2, replace=False)
    nudge_state_plus, nudge_state_minus = nudged_states_marginal[0], nudged_states_marginal[1]   
    for state in nudge_states:
        plus_state = tuple(copy.copy(state) + [nudge_state_plus])
        minus_state = tuple(copy.copy(state) + [nudge_state_minus])        
        size = min(nudged_joint[minus_state], 1-nudged_joint[plus_state], nudge_size)
        nudged_joint[plus_state] += size
        nudged_joint[minus_state] -= size
    
    nudged_joint = nudged_joint.swapaxes(nudge_label, len(joint.shape)-1)
    return nudged_joint
    
def impact_nudge_causal_output(distribution, function_indices, new_input_distribution):
    """
    Calculate the impact of a nudge of the input distribution on the output. 
    Assuming the output is causally determined using using the input.
    
    Parameters:
    ----------
    distribution: a ProbabilityArray object
    function_indices: a set of integers
    new_input_distribution: a numpy array
        It represents the input distribution after the nudge
    
    Returns:
    -------
    A numpy array representing a probability distribution
    
    """
    variable_indices = set(range(len(distribution.probability_distribution.shape))) - function_indices
    marginal_output_old = distribution.marginalize(function_indices)
    conditional, marginal_labels, conditional_labels = (
        distribution.find_conditional(function_indices, variable_indices)
    )
    distribution_new = ProbabilityArray(probability_distributions.compute_joint(
        new_input_distribution, conditional, conditional_labels
    ))
    marginal_output_new = distribution_new.marginalize(function_indices)  
    kl_divergence = entropy(marginal_output_old, marginal_output_new) 
    return kl_divergence

In [None]:
def conditional_distribution(self, selected_indices, conditional_indices):
    """create the conditional distribution for the selected_indices given 
    the conditional_indices for the joint_distribution
    
    Parameters:
    ----------
    joint_distribution: numpy array
    selected_indices: list of integers
    conditional_indices: list of integers
    
    Returns:
    -------
    
    """
    joint_distribution = self.marginalize_distribution(selected_indices+conditional_indices)
    marginal_conditional = self.marginalize_distribution(conditional_indices)
    conditional_distribution = np.copy(joint_distribution) 
    it = np.iter(joint_distribution, flags='multi_index')
    while not it.finished:
        conditional_arguments = tuple([it.multi_index[i] for i in conditional_indices])
        conditional_distribution[it.multi_index] = (
            conditional_distribution[it.multi_index] /
            marginal_conditional[conditional_arguments]
        )
        it.iternext()
        
    return conditional_distribution
        

In [None]:
import numpy as np
import time

a = np.random.random(10**7)
start1 = time.time()
for i in range(100):
    b = np.log2(a)

print(time.time()-start1)
    
start2 = time.time()
for i in range(100):
    b = np.log(a)

print(time.time()-start2)
    
    
    

In [None]:
def decrease_entropy(distribution, state1, state2, max_difference):
    """
    Decrease the entropy of the distribution randomly

    Parameters:
    ----------
    distribution: a 1-d numpy array 
        Representing a probability distribution
    state1, state2: integers in the range len(distribution)
    
    Returns: a float
    -------
    By how much the entropy was decreased

    """
    if distribution[state1]==0 or distribution[state2]==0:
        return 0
    elif distribution[state1] >= distribution[state2]:
        initial_entropy = np.dot(np.log2(distribution[[state1, state2]]),
                                 distribution[[state1, state2]])
        change = np.random.uniform(0, min(max_difference, distribution[state2]))
        if distribution[state2] < (10**(-10)):
            change = distribution[state2]

        distribution[state1] += change
        distribution[state2] -= change
        if distribution[state2] != 0:
            entropy_after = np.dot(np.log2(distribution[[state1, state2]]), 
                                   distribution[[state1, state2]])
        else:
            entropy_after = np.log2(distribution[state1]) * distribution[state1]
        return -(initial_entropy - entropy_after)
    else:
        return decrease_entropy(distribution, state2, state1, max_difference)



In [None]:
from scipy.stats import entropy

total_number_of_states = 5**8
a = np.full(total_number_of_states, 1.0/total_number_of_states)
b = np.full(total_number_of_states, 1.0/total_number_of_states)
max_difference = 1.0/total_number_of_states
final_entropy_size = np.log2(total_number_of_states * 0.1)
print("the final_entropy size is: {}".format(final_entropy_size))

#start1 = time.time()

#entropy_size = entropy(a, base=2)
#count = 0
#while entropy_size > final_entropy_size:
#    count += 1
#    state1, state2 = tuple(np.random.choice(total_number_of_states, 2, False))
#    entropy_size -= decrease_entropy(a, state1, state2, max_difference)
#print("the count is {}".format(count))
#print("entropy {}".format(entropy(a, base=2)))

#print(time.time()-start1)


start1 = time.time()

entropy_size = entropy(b, base=2)
while entropy_size > final_entropy_size:
    print('starting again')
    random_positions = np.random.choice(total_number_of_states, total_number_of_states*50*2)
    for i in range(total_number_of_states*50):
        entropy_size -= decrease_entropy(b, random_positions[i], random_positions[i*2], max_difference)
        if entropy_size < final_entropy_size:
            break
    
print("entropy {}".format(entropy(b, base=2)))
print(time.time()-start1)

In [None]:
import numpy as np

a = np.array([1,2,3,4])
for i, j in enumerate(a):
    print(i, j)

In [None]:
import numpy as np

a = np.array([[1,2,3,4], [5,6,7,8]])
print(a)
print(np.delete(a, [0], 0))
