In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from scipy import stats

import probability_distributions
import maximum_nudges
import evolutionary_algorithms as ea
import maximum_nudges_evolutionary as ev_max_nudges

#### Way to produce a single example of input and conditional output distributions

In [4]:
#distribution parameters
input_variables = 4
number_of_states = 5
nudge_size = 0.01

#generate both input and conditional output with Dirichlet weights
distribution_shape = [number_of_states]*input_variables
total_number_of_states = reduce(lambda x,y: x*y, distribution_shape)
input_dist = np.random.dirichlet([1]*total_number_of_states)
input_dist = np.reshape(input_dist, distribution_shape)
cond_shape = [number_of_states]*(input_variables+1)
cond_output = [
    probability_distributions.compute_joint_uniform_random((number_of_states,))
    for i in range(number_of_states**(input_variables))
]
cond_output = np.array(cond_output)
cond_output = np.reshape(cond_output, cond_shape)

#### Generate generic distribution using a Dirichlet distribution

In [5]:
NUMBER_OF_STATES = 5
TOTAL_NUMBER_OF_VAR = 6
NUMBER_OF_SAMPLES = 1000

PATH = "/home/derkjan/Documents/academics_UVA/master_thesis/code/"
INPUT_FOLDER = "input_distributions/"
COND_OUTPUT_FOLDER = "conditional_output_distributions/"

SPECIFIC_FOLDER_INPUT = "dirichlet"
FOLDER_FORMAT_INPUT = "{}var_{}states"
SPECIFIC_FOLDER_COND_OUTPUT = "dirichlet"
FOLDER_FORMAT_CONDITIONAL = "{}var_{}states"

full_path_input = PATH + INPUT_FOLDER + SPECIFIC_FOLDER_INPUT
full_path_cond_output = PATH + COND_OUTPUT_FOLDER + SPECIFIC_FOLDER_COND_OUTPUT
RUN = False

if RUN:
    for number_of_variables in range(1, TOTAL_NUMBER_OF_VAR+1, 1):
        shape = [NUMBER_OF_STATES] * number_of_variables
        print("shape {}".format(shape))
        for sample_number in range(NUMBER_OF_SAMPLES):
            if sample_number%5 == 0 and sample_number != 0:
                print(sample_number)

            #calculate the input distribution
            distribution_shape = [NUMBER_OF_STATES]*number_of_variables
            total_number_of_states = reduce(lambda x,y: x*y, shape)
            input_dist = np.random.dirichlet([1]*total_number_of_states)
            input_dist = np.reshape(input_dist, shape)

            #save input distribution
            folder_name = FOLDER_FORMAT_INPUT.format(number_of_variables, NUMBER_OF_STATES)
            file_name = "dist_{}.npy".format(sample_number)
            #print(full_path_input + '/' + folder_name + '/' + file_name)
            with open(full_path_input + '/' + folder_name + '/' + file_name, 'wb') as f:
                np.save(f, input_dist)

            
            #calculate the conditional distribution
            cond_shape = [NUMBER_OF_STATES] * (number_of_variables+1)
            cond_output = [
                probability_distributions.compute_joint_uniform_random((NUMBER_OF_STATES,))
                for i in range(NUMBER_OF_STATES**(number_of_variables))
            ]
            cond_output = np.array(cond_output)
            cond_output = np.reshape(cond_output, cond_shape)

            #save conditional output distribution
            folder_name = FOLDER_FORMAT_CONDITIONAL.format(number_of_variables, NUMBER_OF_STATES)
            file_name = "cond_dist_{}.npy".format(sample_number)
            with open(full_path_cond_output + '/' + folder_name + '/' + file_name, 'wb') as f:
                np.save(f, cond_output)

#### Generate distributions with limited entropy

In [None]:
NUMBER_OF_STATES = 5
TOTAL_NUMBER_OF_VAR = 6
NUMBER_OF_SAMPLES = 1000
PATH = "/home/derkjan/Documents/academics_UVA/master_thesis/code/input_distributions/"
PERCENTAGE_MAX_ENTROPY = 0.75
SPECIFIC_FOLDER = "entropy_0.75"
FOLDER_FORMAT = "{}var_{}states"
full_path = PATH + SPECIFIC_FOLDER

RUN = False
if RUN:
    for number_of_variables in range(1, TOTAL_NUMBER_OF_VAR+1, 1):
        shape = [NUMBER_OF_STATES] * number_of_variables
        print("shape {}".format(shape))
        max_entropy = np.log2(NUMBER_OF_STATES**number_of_variables) 
        goal_entropy = max_entropy * PERCENTAGE_MAX_ENTROPY
        for sample_number in range(NUMBER_OF_SAMPLES):
            if sample_number%5 == 0 and sample_number != 0:
                print(sample_number)

            dist = ea.produce_distribution_with_entropy(shape, PERCENTAGE_MAX_ENTROPY)
            folder_name = FOLDER_FORMAT.format(number_of_variables, NUMBER_OF_STATES)
            file_name = "dist_{}.npy".format(sample_number)
            #print(full_path + '/' + folder_name + '/' + file_name)
            entropy_size = stats.entropy(dist, base=2)
            if abs(entropy_size-goal_entropy) > 0.01*goal_entropy:
                print("WARNING: entropy size is {} while it should be {}, percentage difference {}".format(
                    entropy_size, goal_entropy, (entropy_size-goal_entropy)/goal_entropy  
                ))

            with open(full_path + '/' + folder_name + '/' + file_name, 'wb') as f:
                np.save(f, dist)

    # shape = [5]*6
    # dist = ea.produce_distribution_with_entropy(shape, 0.75)
    # print(stats.entropy(dist, base=2))
    # with open('/home/derkjan/Documents/academics_UVA/master_thesis/code/input_distributions/entropy_0.75/6var_5states/dist_0.npy', 'wb') as f:
    #     np.save(f, dist)

    # with open('/home/derkjan/Documents/academics_UVA/master_thesis/code/input_distributions/entropy_0.75/6var_5states/dist_0.npy', 'rb') as f:
    #     new_dist = np.load(f)
    #     print(stats.entropy(dist, base=2))

    # print(np.allclose(new_dist, dist))

In [None]:
file_path = ('/home/derkjan/Documents/academics_UVA/master_thesis/code/input_distributions/' +
            'entropy_0.75/6var_5states/dist_14.npy')

with open(file_path, 'rb') as f:
    new_dist = np.load(f)
    print(stats.entropy(new_dist, base=2))

#print(stats.entropy(dist, base=2))
#print(dist[1000:2000])