In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Description

Generate a conditional output distribution according to some constraints

In [10]:
import numpy as np
import function_generation
import probability_distributions

In [11]:
#distribution parameters
input_variables = 4
number_of_states = 5
nudge_size = 0.01

#generate both input and conditional output with Dirichlet weights
distribution_shape = [number_of_states]*input_variables
total_number_of_states = reduce(lambda x,y: x*y, distribution_shape)
input_dist = np.random.dirichlet([1]*total_number_of_states)
input_dist = np.reshape(input_dist, distribution_shape)
cond_shape = [number_of_states]*(input_variables+1)
cond_output = [
    probability_distributions.compute_joint_uniform_random((number_of_states,))
    for i in range(number_of_states**(input_variables))
]
cond_output = np.array(cond_output)
cond_output = np.reshape(cond_output, cond_shape)


In [12]:
PATH = "/home/derkjan/Documents/academics_UVA/master_thesis/code/"
INPUT_FOLDER = "input_distributions/"
COND_OUTPUT_FOLDER = "conditional_output_distributions/"
FOLDER_FORMAT_INPUT = "{}var_{}states/"
FOLDER_FORMAT_CONDITIONAL = "{}var_{}states/"
FILE_FORMAT_INPUT = "dist_{}.npy"
FILE_FORMAT_COND_OUTPUT = "cond_dist_{}.npy"

DIRICHLET_FOLDER_INPUT = "dirichlet/"
ENTROPY_LOW_FOLDER_INPUT = "entropy_0.5/"
ENTROPY_MEDIUM_FOLDER_INPUT = "entropy_0.75/"

DIRICHLET_FOLDER_COND_OUTPUT = "dirichlet/"


def generate_distributions(path_to_files, file_format, number_of_distributions):
    for i in range(number_of_distributions):
        file_name = path_to_files + file_format.format(i)
        with open(file_name, 'rb') as f:
            yield np.load(f)

def get_input_dist_entropy_75(number_of_dists, number_of_var):
    number_of_states = 5
    path_to_input_files = (
        PATH + INPUT_FOLDER + ENTROPY_MEDIUM_FOLDER_INPUT
        + FOLDER_FORMAT_INPUT.format(number_of_var, number_of_states)
    )
    input_generator = generate_distributions(
        path_to_input_files, FILE_FORMAT_INPUT, 
        number_of_dists
    )
    input_shape = [number_of_states]*number_of_var
    input_dists = []
    for sample in range(number_of_dists):
        input_dist = next(input_generator)
        input_dists.append(np.reshape(input_dist, input_shape))
        
    return input_dists


In [13]:
input_dists = get_input_dist_entropy_75(100, 4)

In [16]:
individual = function_generation.ConditionalOutput(cond_output)
individual.evaluate(input_dists=input_dists, goal_distance=0)
individual.score

0.053120974471955441

In [17]:

mutated_distribution = function_generation.mutate_conditional_distribution_uniform(
    np.copy(cond_output), 0.01
)

In [33]:
#local nudge optimization
number_of_generations = 100 
population_size = 20
number_of_children = 10 
generational = True
mutation_size = 0.05
parent_selection_mode = "rank_exponential"
#parent_selection_mode = None
number_of_input_distributions = 200
goal_distance = 0

number_of_states = 5
number_of_input_variables = 4 

#create the initial population
conditional_outputs = function_generation.create_condional_distributions(
    population_size, number_of_states, number_of_input_variables
)

for conditional_output in conditional_outputs:
    conditional_output.evaluate(goal_distance, input_dists)
print("initial distance condtional output {}".format(
    function_generation.sort_individuals(conditional_outputs)[-1].score
))

initial distance condtional output 0.0589375907398


In [34]:


#start the optimization process
find_conditional_output = function_generation.FindConditionalOutput(
    conditional_outputs, goal_distance, number_of_generations,
    number_of_children, parent_selection_mode
)
print([i.score for i in find_conditional_output.individuals])
#find_conditional_output.evolve(generational, mutation_size, None,
#                               number_of_input_distributions)
find_conditional_output.evolve(generational, mutation_size, input_dists, None)

print("distance after evolution {}".format(
    function_generation.sort_individuals(find_conditional_output.individuals)[-1].score
))




[0.058029272661808774, 0.054426075106180354, 0.05522592121156835, 0.050876403592097053, 0.054380509512050847, 0.053757843815619127, 0.051976556085402484, 0.054475893683103835, 0.049317011687453595, 0.054726489043890096, 0.055977707798473685, 0.053797011982869111, 0.056211217949664591, 0.058937590739764227, 0.056514080186320211, 0.057467528427759484, 0.053395308733689112, 0.058106821253760703, 0.053724959199888331, 0.054347115144120411]
timestep 0, worst 0.0580292726618, best 0.0543471151441
timestep 1, worst 0.0534910065684, best 0.0592492906305
timestep 2, worst 0.053523995951, best 0.0589779601739
timestep 3, worst 0.0545707818828, best 0.0592784964063
timestep 4, worst 0.0549789980235, best 0.0590489572875
timestep 5, worst 0.0551319889006, best 0.0593164175302
timestep 6, worst 0.0557626688545, best 0.0585201793828
timestep 7, worst 0.0560939189498, best 0.058367169735
timestep 8, worst 0.0556178270761, best 0.0585842721846
timestep 9, worst 0.0560935991957, best 0.0589856792811
ti

In [36]:
find_conditional_output.individuals[-1].cond_output

array([[[[[  5.27549717e-02,   5.96984285e-02,   2.50450767e-02,
             1.93755232e-01,   6.68746291e-01],
          [  5.43970359e-02,   5.88832513e-02,   2.75656314e-01,
             2.28175533e-01,   3.82887865e-01],
          [  1.94235000e-01,   4.43534712e-01,   5.93028144e-02,
             1.38165378e-01,   1.64762096e-01],
          [  8.40696842e-02,   4.43248474e-01,   3.69548888e-01,
             1.03132955e-01,   0.00000000e+00],
          [  3.07861542e-01,   1.23310639e-01,   2.53676454e-01,
             2.59918305e-01,   5.52330605e-02]],

         [[  5.55559606e-02,   2.39305363e-01,   6.37083814e-02,
             4.99744240e-01,   1.41686055e-01],
          [  8.15116567e-02,   0.00000000e+00,   6.56381942e-01,
             8.84831834e-02,   1.73623218e-01],
          [  2.92315372e-01,   2.05657780e-01,   2.23145469e-01,
             5.99089876e-02,   2.18972392e-01],
          [  3.70720804e-02,   7.14871071e-01,   4.00498180e-02,
             1.62844123e-02, 