In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import numpy as np
from scipy import stats

import probability_distributions
import maximum_nudges
import evolutionary_algorithms as ea
import maximum_nudges_evolutionary as ev_max_nudges

In [None]:
NUDGE_SIZE = 0.01

#### Generate the input distributions

In [None]:
PATH = "/home/derkjan/Documents/academics_UVA/master_thesis/code/"
INPUT_FOLDER = "input_distributions/"
COND_OUTPUT_FOLDER = "conditional_output_distributions/"
FOLDER_FORMAT_INPUT = "{}var_{}states/"
FOLDER_FORMAT_CONDITIONAL = "{}var_{}states/"
FILE_FORMAT_INPUT = "dist_{}.npy"
FILE_FORMAT_COND_OUTPUT = "cond_dist_{}.npy"

DIRICHLET_FOLDER_INPUT = "dirichlet/"
ENTROPY_LOW_FOLDER_INPUT = "entropy_0.5/"
ENTROPY_MEDIUM_FOLDER_INPUT = "entropy_0.75/"

DIRICHLET_FOLDER_COND_OUTPUT = "dirichlet/"


def generate_distributions(path_to_files, file_format, number_of_distributions):
    for i in range(number_of_distributions):
        file_name = path_to_files + file_format.format(i)
        with open(file_name, 'rb') as f:
            yield np.load(f)
            
def generate_distributions_smart(path_to_files, file_format, start_dist, end_dist):
    for i in range(start_dist, end_dist, 1):
        file_name = path_to_files + file_format.format(i)
        #print(file_name)
        with open(file_name, 'rb') as f:
            yield np.load(f)

def generate_input_and_conditional_output(input_type, parameters, cond_output_type="dirichlet"):
    """

    Parameters:
    ----------
    input_type: string in set {"dirichlet", "entropy_0.5", "entropy_0.75"}
    parameters: dict
    cond_output: 
    
    Returns: a dict with keys:
    -------
    number_of_var: a number
    number_of_states: a number
    input_dist: nd-array
    cond_output: nd-array
    
    """
    if input_type == "dirichlet":
        input_dirichlet_path = PATH + INPUT_FOLDER + DIRICHLET_FOLDER_INPUT
    elif input_type == "entropy_0.75":
        input_dirichlet_path = PATH + INPUT_FOLDER + ENTROPY_MEDIUM_FOLDER_INPUT
    elif input_type == "entropy_0.5":
        input_dirichlet_path = PATH + INPUT_FOLDER + ENTROPY_LOW_FOLDER_INPUT
    else:
        raise ValueError("supply valid input distribution type")

    cond_output_dirichlet_path = PATH + COND_OUTPUT_FOLDER + DIRICHLET_FOLDER_COND_OUTPUT

    min_inputs = parameters["min_number_inputs"]
    max_inputs = parameters["max_number_inputs"]
    number_of_states = parameters["number_of_states"]
    for number_of_var in range(min_inputs, max_inputs, 1):
        path_to_input_files = (
            input_dirichlet_path 
            + FOLDER_FORMAT_INPUT.format(number_of_var, number_of_states)
        )
        path_to_cond_output_files = (
            cond_output_dirichlet_path 
            + FOLDER_FORMAT_CONDITIONAL.format(number_of_var, number_of_states)
        )
        input_generator = generate_distributions_smart(
            path_to_input_files, FILE_FORMAT_INPUT,
            parameters["start"], parameters["end"]
        )
        cond_output_generator = generate_distributions_smart(
            path_to_cond_output_files, FILE_FORMAT_COND_OUTPUT,
            parameters["start"], parameters["end"]
        )
        input_shape = [number_of_states]*number_of_var
        cond_output_shape = [number_of_states]*(number_of_var+1)
        for sample in range(parameters["start"], parameters["end"], 1):
            input_dist = next(input_generator)
            input_dist = np.reshape(input_dist, input_shape)
            cond_output = next(cond_output_generator)
            cond_output = np.reshape(cond_output, cond_output_shape)
            yield {
                "number_of_vars": number_of_var,
                "number_of_states": parameters["number_of_states"],
                "input_dist": input_dist,
                "cond_output": cond_output
            }


In [None]:
parameters_distributions = {
    "max_number_inputs": 7,
    "min_number_inputs": 2,
    "number_of_states": 5,
    "start": 250,
    "end":350,
    "number_of_distributions": 100
}

generator = generate_input_and_conditional_output(
    "entropy_0.75", parameters_distributions, cond_output_type="dirichlet"
)

for dist_dict in generator:
    print(stats.entropy(dist_dict["input_dist"].flatten()))

### Define evolutionary parameters and parameters for the distributions

In [None]:
ev_max_nudges.TEST = False
evolutionary_parameters = {
    "number_of_generations": 200,
    "population_size": 12,
    "number_of_children": 24, 
    "generational": True,
    "parent_selection_mode": "rank_exponential",
    "start_mutation_size": NUDGE_SIZE/10,
    "change_mutation_size": NUDGE_SIZE/(10*10),
    "mutations_per_update_step": 10
}

parameters_distributions = {
    "max_number_inputs": 7,
    "min_number_inputs": 2,
    "number_of_states": 5,
    "start": 295,
    "end":300,
    "number_of_distributions": 5
}


In [None]:

generator = generate_input_and_conditional_output(
    "entropy_0.75", parameters_distributions, cond_output_type="dirichlet"
)


RUN = True
FILE_NAME = "max_impact_synergistic_nudges_entropy0.75.json"
if RUN:
    max_synergistic_impact_dict = {}
    prev_number_of_vars = -1
    for count, dist_dict in enumerate(generator):
        number_of_vars = dist_dict["number_of_vars"]
        number_of_states = dist_dict["number_of_states"]
        old_input = np.copy(dist_dict["input_dist"])
        if number_of_vars != len(dist_dict["input_dist"].shape):
            print("WARNING in sample {} input dist has weird distribution".format(count))
        if number_of_vars != prev_number_of_vars:
            prev_number_of_vars = number_of_vars 
            print("the number of vars {}".format(number_of_vars))
        #print(dist_dict["cond_output"].shape)
        max_synergistic_nudge = ev_max_nudges.find_synergistic_nudge_with_max_impact(
            dist_dict["input_dist"], dist_dict["cond_output"], NUDGE_SIZE, 
            evolutionary_parameters
        )
        max_impact = max_synergistic_nudge.score
        print("the max nudge impact {}".format(max_impact))
        
        new_distribution = max_synergistic_nudge.new_distribution
        l1_norm_to_old_distribution = np.sum(np.absolute(
            old_input-max_synergistic_nudge.new_distribution
        ))
        if l1_norm_to_old_distribution < NUDGE_SIZE/2:
            print("WARNING size of nudge {}".format(l1_norm_to_old_distribution))
        
        if number_of_vars in max_synergistic_impact_dict:
            max_synergistic_impact_dict[number_of_vars].append(max_impact)
        else:
            max_synergistic_impact_dict[number_of_vars] = [max_impact]

        if (count+1)%5==0 and count != 0:
            with open(FILE_NAME, 'w') as f:
                json.dump(max_synergistic_impact_dict, f, indent=4)
            
    with open(FILE_NAME, 'w') as f:
        json.dump(max_synergistic_impact_dict, f, indent=4)

print(max_synergistic_impact_dict)
