### Find the maximum impacts for individual, local, synergistic and global nudges

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from scipy import stats

import probability_distributions
import maximum_nudges
import evolutionary_algorithms as ea
import maximum_nudges_evolutionary as ev_max_nudges

#### First generate a generic input distribution

In [None]:
#distribution parameters
input_variables = 2
number_of_states = 5
nudge_size = 0.01

#generate both input and conditional output with Dirichlet weights
distribution_shape = [number_of_states]*input_variables
total_number_of_states = reduce(lambda x,y: x*y, distribution_shape)
input_dist = np.random.dirichlet([1]*total_number_of_states)
input_dist = np.reshape(input_dist, distribution_shape)
cond_shape = [number_of_states]*(input_variables+1)
cond_output = [
    probability_distributions.compute_joint_uniform_random((number_of_states,))
    for i in range(number_of_states**(input_variables))
]
cond_output = np.array(cond_output)
cond_output = np.reshape(cond_output, cond_shape)


#### load the generated input and conditional output distribution

In [None]:
PATH = "/home/derkjan/Documents/academics_UVA/master_thesis/code/"
INPUT_FOLDER = "input_distributions/"
COND_OUTPUT_FOLDER = "conditional_output_distributions/"
FOLDER_FORMAT_INPUT = "{}var_{}states/"
FOLDER_FORMAT_CONDITIONAL = "{}var_{}states/"
FILE_FORMAT_INPUT = "dist_{}.npy"
FILE_FORMAT_COND_OUTPUT = "cond_dist_{}.npy"

DIRICHLET_FOLDER_INPUT = "dirichlet/"
ENTROPY_LOW_FOLDER_INPUT = "entropy_0.5/"
ENTROPY_MEDIUM_FOLDER_INPUT = "entropy_0.75/"

DIRICHLET_FOLDER_COND_OUTPUT = "dirichlet/"


def generate_distributions(path_to_files, file_format, number_of_distributions):
    for i in range(number_of_distributions):
        file_name = path_to_files + file_format.format(i)
        with open(file_name, 'rb') as f:
            yield np.load(f)
            
def generate_input_and_conditional_output(input_type, parameters, cond_output_type="dirichlet"):
    """
    
    Parameters:
    ----------
    input_type: string in set {"dirichlet", "entropy_0.5", "entropy_0.75"}
    parameters: dict
    cond_output: 
    
    Returns: a dict with keys:
    -------
    number_of_var: a number
    number_of_states: a number
    input_dist: nd-array
    cond_output: nd-array
    
    """
    if input_type == "dirichlet":
        input_dirichlet_path = PATH + INPUT_FOLDER + DIRICHLET_FOLDER_INPUT
    elif input_type == "entropy_0.75":
        input_dirichlet_path = PATH + INPUT_FOLDER + ENTROPY_MEDIUM_FOLDER_INPUT
    elif input_type == "entropy_0.5":
        input_dirichlet_path = PATH + INPUT_FOLDER + ENTROPY_LOW_FOLDER_INPUT
    else:
        raise ValueError("supply valid input distribution type")

    cond_output_dirichlet_path = PATH + COND_OUTPUT_FOLDER + DIRICHLET_FOLDER_COND_OUTPUT

    min_inputs = parameters["min_number_inputs"]
    max_inputs = parameters["max_number_inputs"]
    number_of_states = parameters["number_of_states"]
    for number_of_var in range(min_inputs, max_inputs, 1):
        path_to_input_files = (
            input_dirichlet_path 
            + FOLDER_FORMAT_INPUT.format(number_of_var, number_of_states)
        )
        path_to_cond_output_files = (
            cond_output_dirichlet_path 
            + FOLDER_FORMAT_CONDITIONAL.format(number_of_var, number_of_states)
        )
        input_generator = generate_distributions(
            path_to_input_files, FILE_FORMAT_INPUT, 
            parameters["number_of_distributions"]
        )
        cond_output_generator = generate_distributions(
            path_to_cond_output_files, FILE_FORMAT_COND_OUTPUT, 
            parameters["number_of_distributions"]
        )
        input_shape = [number_of_states]*number_of_var
        cond_output_shape = [number_of_states]*(number_of_var+1)
        for sample in range(parameters["number_of_distributions"]):
            input_dist = next(input_generator)
            input_dist = np.reshape(input_dist, input_shape)
            cond_output = next(cond_output_generator)
            cond_output = np.reshape(cond_output, cond_output_shape)
            yield {
                "number_of_var": number_of_var,
                "number_of_states": parameters["number_of_states"],
                "input_dist": input_dist,
                "cond_output": cond_output
            }
            

In [None]:
parameters_distributions = {
    "max_number_inputs": 6,
    "min_number_inputs": 1,
    "number_of_states": 5,
    "number_of_distributions": 100
}

generator = generate_input_and_conditional_output(
    'dirichlet', parameters_distributions, cond_output_type="dirichlet"
)

for dist_dict in generator:
    a = dist_dict

#### Define the parameters for the evolutionary algorithm to optimize the impact of a local nudge

In [None]:
#local nudge optimization
number_of_generations = 400 
population_size = 15
number_of_children = 30 
generational = True 
mutation_size = nudge_size/4
parent_selection_mode = "rank_exponential"
#parent_selection_mode = None
mutation_size_weights = 0.025
start_mutation_size = nudge_size/10
change_mutation_size = start_mutation_size/10
nudged_vars_to_states = {
    nudged_var:number_of_states for nudged_var in range(input_variables)
}


In [None]:


#create the initial population
local_nudges = []
for _ in range(population_size):
    new_local_nudge = ev_max_nudges.LocalNudge.create_local_nudge(
        nudged_vars_to_states, nudge_size, mutation_size_weights,
        start_mutation_size, change_mutation_size, timestamp=0
    )
    local_nudges.append(new_local_nudge)

for local_nudge in local_nudges:
    local_nudge.evaluate(input_dist, cond_output)
print("initial impact local nudge {}".format(
    ea.sort_individuals(local_nudges)[0].score
))

#start the optimization process
find_max_local_nudge = ev_max_nudges.FindMaximumLocalNudge(
    input_dist, cond_output, nudge_size, 
    generational, number_of_children, parent_selection_mode
)
max_local_nudge_individual = find_max_local_nudge.get_max_nudge(
    local_nudges, number_of_generations
)
print("the found max impact for a local nudge {}".format(
    max_local_nudge_individual.score
))


In [None]:
def find_optimum_local_nudge(input_dist, cond_output, number_of_input_variables, 
                             number_of_states, nudge_size, parameters):
    """optimize local nudge
    
    Parameters:
    ----------
    input_dist:nd-array
    cond_output: nd-array, one axis more than input_dist
    
    """
    local_nudges = []
    for _ in range(parameters["population_size"]):
        new_local_nudge = ev_max_nudges.LocalNudge.create_local_nudge(
            parameters["nudged_vars_to_states"], nudge_size, 
            parameters["mutation_size_weights"], parameters["start_mutation_size"],
            parameters["change_mutation_size"], timestamp=0
        )
        local_nudges.append(new_local_nudge)

    for local_nudge in local_nudges:
        local_nudge.evaluate(input_dist, cond_output)

    initial_impact = ea.sort_individuals(local_nudges)[0].score

    #start the optimization process
    find_max_local_nudge = ev_max_nudges.FindMaximumLocalNudge(
        input_dist, cond_output, nudge_size, 
        parameters["generational"], parameters["number_of_children"], 
        parameters["parent_selection_mode"]
    )
    max_local_nudge_individual = find_max_local_nudge.get_max_nudge(
        local_nudges, parameters["number_of_generations"]
    )
    max_impact = max_local_nudge_individual.score 
    print("local nudge: initial impact {} max impact {}".format(initial_impact, max_impact))
    return max_impact





In [None]:
print(input_dist.shape)
print(cond_output.shape)

ev_max_nudges.TEST = True

parameters = {
    "number_of_generations": 1000,
    "population_size": 25,
    "number_of_children": 55,
    "generational": True,
    "mutation_size": nudge_size/4,
    "parent_selection_mode": None,
    "mutation_size_weights": 0.025,
    "start_mutation_size": nudge_size/10,
    "nudged_vars_to_states": {
        nudged_var:5 for nudged_var in range(2)
    }
}
parameters["change_mutation_size"] = parameters["start_mutation_size"]/7

max_impact = find_optimum_local_nudge(
    input_dist, cond_output, 2, 5, nudge_size=0.01, parameters=parameters
)


In [None]:
#local nudge optimization
number_of_input_variables = 4
number_of_states = 5
nudge_size = 0.01

parameters = {
    "number_of_generations": 300, 
    "population_size": 15,
    "number_of_children": 30, 
    "generational": True,
    "mutation_size": nudge_size/4,
    "parent_selection_mode": "rank_exponential",
    "mutation_size_weights": 0.025,
    "start_mutation_size": nudge_size/10,
    "nudged_vars_to_states": {
        nudged_var:number_of_states for nudged_var in range(number_of_input_variables)
    }
}
parameters["change_mutation_size"] = parameters["start_mutation_size"]/10

parameters_distributions = {
    "max_number_inputs": 6,
    "min_number_inputs": 2,
    "number_of_states": 5,
    "number_of_distributions": 10
}

generator = generate_input_and_conditional_output(
    'dirichlet', parameters_distributions, cond_output_type="dirichlet"
)

for dist_dict in generator:
    print(dist_dict["input_dist"].shape)
    print(dist_dict["cond_output"].shape)
    parameters["nudged_vars_to_states"] = {
        nudged_var:dist_dict["number_of_states"] for nudged_var in range(dist_dict["number_of_var"])
    }
    max_impact = find_optimum_local_nudge(
        dist_dict["input_dist"], dist_dict["cond_output"], 
        dist_dict["number_of_var"], dist_dict["number_of_states"],
        nudge_size=0.01, parameters=parameters
    )



In [None]:
for distribution in input_generator:
    print(stats.entropy(distribution))
    
for cond_dist in cond_output_generator:
    print(np.reshape(cond_dist, [5,5,5,5,5, 5]).shape)

#### Find the maximum individual impact evolutionary

#### Find the maximum impact of an individual nudge exactly

In [None]:
#still write some code to do the optimization for every variable
new_input_dist = np.copy(input_dist)
max_impacts = []
for i in range(len(new_input_dist.shape)):
    new_input_dist = np.swapaxes(new_input_dist, i, 
                                 len(new_input_dist.shape)-1)
    max_impact = maximum_nudges.find_maximum_local_nudge(
        new_input_dist, cond_output, nudge_size/2
    )
    max_impacts.append(max_impact)
    new_input_dist = np.swapaxes(new_input_dist, i, 
                                 len(new_input_dist.shape)-1)
    
print("the actual maximum individual nudge {}".format(max(max_impacts)))