In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import function_generation
import probability_distributions

### Define functions for loading conditional output data

In [None]:
PATH = "/home/derkjan/Documents/academics_UVA/master_thesis/code/"
INPUT_FOLDER = "input_distributions/"
COND_OUTPUT_FOLDER = "conditional_output_distributions/"
FOLDER_FORMAT_INPUT = "{}var_{}states/"
FOLDER_FORMAT_CONDITIONAL = "{}var_{}states/"
FILE_FORMAT_INPUT = "dist_{}.npy"
FILE_FORMAT_COND_OUTPUT = "cond_dist_{}.npy"

DIRICHLET_FOLDER_INPUT = "dirichlet/"
ENTROPY_LOW_FOLDER_INPUT = "entropy_0.5/"
ENTROPY_MEDIUM_FOLDER_INPUT = "entropy_0.75/"

DIRICHLET_FOLDER_COND_OUTPUT = "dirichlet/"


def generate_distributions(path_to_files, file_format, number_of_distributions):
    for i in range(number_of_distributions):
        file_name = path_to_files + file_format.format(i)
        with open(file_name, 'rb') as f:
            yield np.load(f)

def generate_distributions_smart(path_to_files, file_format, start_dist, end_dist):
    for i in range(start_dist, end_dist, 1):
        file_name = path_to_files + file_format.format(i)
        #print(file_name)
        with open(file_name, 'rb') as f:
            yield np.load(f)
            
def get_input_dist_entropy_75(number_of_dists, number_of_var, start, end):
    number_of_states = 5
    path_to_input_files = (
        PATH + INPUT_FOLDER + ENTROPY_MEDIUM_FOLDER_INPUT
        + FOLDER_FORMAT_INPUT.format(number_of_var, number_of_states)
    )
    input_generator = generate_distributions_smart(
        path_to_input_files, FILE_FORMAT_INPUT, 
        start, end
    )
    input_shape = [number_of_states]*number_of_var
    input_dists = []
    for sample in range(number_of_dists):
        input_dist = next(input_generator)
        input_dists.append(np.reshape(input_dist, input_shape))
        
    return input_dists


### Set up folder structure

In [None]:
NUMBER_OF_STATES = 5
TOTAL_NUMBER_OF_VAR = 6

PATH = "/home/derkjan/Documents/academics_UVA/master_thesis/code/"
COND_OUTPUT_FOLDER = "conditional_output_distributions/"
SPECIFIC_FOLDER_COND_OUTPUT = "distance_optimization/"
FOLDER_FORMAT_CONDITIONAL = "{}var_{}states"
full_path_cond_output = PATH + COND_OUTPUT_FOLDER + SPECIFIC_FOLDER_COND_OUTPUT

if not os.path.exists(COND_OUTPUT_FOLDER):
    os.makedirs(COND_OUTPUT_FOLDER)
    
if not os.path.exists(COND_OUTPUT_FOLDER+SPECIFIC_FOLDER_COND_OUTPUT):
    os.makedirs(COND_OUTPUT_FOLDER+SPECIFIC_FOLDER_COND_OUTPUT)

for number_of_variables in range(1, TOTAL_NUMBER_OF_VAR+1, 1):
    directory = (COND_OUTPUT_FOLDER + SPECIFIC_FOLDER_COND_OUTPUT
                 + FOLDER_FORMAT_CONDITIONAL.format(number_of_variables, NUMBER_OF_STATES))
                 
    if not os.path.exists(directory):
        os.makedirs(directory)


### Do it seperately for different amount of input variables

In [None]:
NUMBER_OF_SAMPLES = 2

#### 1 input variable

In [None]:
evolutionary_parameters = {
    "number_of_generations": 5,
    "population_size": 10,
    "number_of_children": 20, 
    "generational": True,
    "mutation_size": 2.5,
    "change_mutation_size": 0.5,
    "parent_selection_mode": "rank_exponential",
    "number_of_input_distributions": 200
}

number_of_vars = 4
number_of_input_dists = 200

input_shape = number_of_vars * [NUMBER_OF_STATES]
number_of_states_output = NUMBER_OF_STATES
goal_distance = 0.3

for sample_number in range(NUMBER_OF_SAMPLES):
    print(sample_number)
    start_dist = np.random.choice(800)
    end_dist = start_dist + 200
    input_dists = get_input_dist_entropy_75(
        number_of_input_dists, number_of_vars, start_dist, end_dist
    )

    max_cond_output = function_generation.get_cond_output_with_max_distance(
        input_shape, number_of_states_output, goal_distance, 
        evolutionary_parameters, input_dists
    )
    cond_output = max_cond_output.cond_output

    #save conditional output distribution
    folder_name = FOLDER_FORMAT_CONDITIONAL.format(number_of_vars, NUMBER_OF_STATES)
    file_name = "cond_dist_{}.npy".format(sample_number)
    full_path = full_path_cond_output + '/' + folder_name + '/' + file_name
    with open(full_path, 'wb') as f:
        np.save(f, cond_output)

