### Find the maximum impacts for individual, local, synergistic and global nudges

In [2]:
%load_ext autoreload
%autoreload 2

In [13]:
import json

import numpy as np
from scipy import stats

import probability_distributions
import maximum_nudges
import evolutionary_algorithms as ea
import maximum_synergistic_nudge_evolutionary as synergistic_nudge_ev
import maximum_nudges_evolutionary as ev_max_nudges
import nudge_non_causal
import get_data

### Notebook level constants

In [14]:
NUDGE_SIZE = 0.01
PATH_TO_DISTRIBUTIONS = "/home/joboti/azumi_derkjan/master_thesis/code/"

### Define a function to find the maximum individual nudge impact exactly

Due to different definitions of nudge size used, the nudge size needs to be divided by 2.

In [15]:
def find_max_impact_individual_nudge_exactly(input_dist, cond_output, nudge_size, 
                                             without_conditional=True):
    new_input_dist = np.copy(input_dist)
    max_impacts = []
    if not without_conditional:
        for i in range(len(new_input_dist.shape)):
            new_input_dist = np.swapaxes(new_input_dist, i,
                                         len(new_input_dist.shape)-1)
            max_impact = maximum_nudges.find_maximum_local_nudge(
                new_input_dist, cond_output, nudge_size
            )
            max_impacts.append(max_impact)
            new_input_dist = np.swapaxes(new_input_dist, i,
                                         len(new_input_dist.shape)-1)

            return max(max_impacts)
    else:
        for i in range(len(new_input_dist.shape)):
            new_input_dist = np.swapaxes(new_input_dist, i,
                                         len(new_input_dist.shape)-1)
            max_impact = maximum_nudges.find_maximum_local_nudge_without_conditional(
                new_input_dist, cond_output, nudge_size
            )
            max_impacts.append(max_impact)
            new_input_dist = np.swapaxes(new_input_dist, i,
                                         len(new_input_dist.shape)-1)

            return max(max_impacts)
            
# max_impact = find_max_impact_individual_nudge_exactly(input_dist, cond_output, NUDGE_SIZE/2, True)
# print("the actual maximum individual nudge {}".format(max_impact))

### Find the maximum impact of a nudge given its type the input and conditional output and nudge size

The nudge sizes for individual and global are divided by 2 on purpose, since in their definitions its the 
amount subtracted AND added. 

In [20]:
def get_max_nudge_impacts(input_dists, cond_output_dists, nudge_size, nudge_type, 
                          backup_filename, parameters):
    """
    Find the maximum impact of the nudges
    
    Parameters:
    ----------
    input_dists: list of nd-arrays representing probability distributions
    cond_output_dists: list of nd-arrays 
        Representing output distributions (the last axis) conditioned on the input 
        distributions
    nudge_size: positive float
    nudge_type: string
        One of the following: {"individual", "local", "synergistic", "global"}
    filename_to_save: string, 
        Should be a valid path
    parameters: a dict,
        The parameters used to find the maximum nudge
    
    """
    impacts = []
    for input_dist, cond_output in zip(input_dists, cond_output_dists):
        if nudge_type == "individual":
            impact = find_max_impact_individual_nudge_exactly(
                input_dist, cond_output, nudge_size/2.0, True
            )
        elif nudge_type == "local":
            max_local_nudge = ev_max_nudges.find_maximum_local_nudge(
                input_dist, cond_output, nudge_size, 
                local_evolutionary_params, verbose=True
            )
            impact = max_local_nudge.score
            nudge_vectors = [
                weight*nudge.genes 
                for nudge, weight in zip(individual_nudges, max_local_nudge.weights)
            ]
            new_dist = nudge_non_causal.nudge_local(
                input_dist, [0, 1], 0.01, nudge_vectors 
            )
            l1_norm_to_old_distribution = np.sum(np.absolute(input_dist-new_dist))
            if abs(l1_norm_to_old_distribution-NUDGE_SIZE/2) > 10**(-7):
                print("WARNING size of nudge {}".format(l1_norm_to_old_distribution))
        elif nudge_type == "synergistic":
            max_synergistic_nudge = synergistic_nudge_ev.find_synergistic_nudge_with_max_impact(
                input_dist, cond_output, nudge_size, parameters
            )
            impact = max_synergistic_nudge.score
            new_distribution = max_synergistic_nudge.new_distribution
            l1_norm_to_old_distribution = np.sum(np.absolute(
                old_input-max_synergistic_nudge.new_distribution
            ))
            if abs(l1_norm_to_old_distribution-NUDGE_SIZE/2) > 10**(-7):
                print("WARNING size of nudge {}".format(l1_norm_to_old_distribution))
        elif nudge_type == "global":
            _, _, max_global_nudge_impact = maximum_nudges.find_max_control_impact(
                input_dist, cond_output, nudge_size/2.0
            )
            impact = max_global_nudge
        else:
            raise ValueError("provide a valid nudge type")
            
        print("the max nudge impact {}".format(impact))
        impacts.append(impact)
        with open(backup_filename, 'w') as f:
            json.dump(impacts, f, indent=4)
        
    return impacts
    

### Generate the data and run the experiments

#### First for system distributions with limited entropy

In [22]:
PERCENTAGE_MAX_ENTROPY_SIZE = 80
NUMBER_OF_VARS = 3
NUMBER_OF_STATES = 3
FILENAME_FORMAT_INPUT = "input_dist_{}.npy"
FILENAME_FORMAT_OUTPUT = "cond_output_dist_{}.npy"
DISTRIBUTION_NUMBERS = list(range(100, 200, 1))
NUDGE_TYPE = "individual"
PARAMETER_FILE = None

if PARAMETER_FILE is None:
    parameters = None
else:
    with open("parameters/" + PARAMETER_FILE) as f:
        parameters = json.load(f)

backup_filename = "data_experiments/" + "backup_impacts_{}var_{}states_{}entropy_{}_nudge.json".format(
    NUMBER_OF_VARS, NUMBER_OF_STATES, PERCENTAGE_MAX_ENTROPY_SIZE, NUDGE_TYPE 
) 
path_to_limited_entropy_system_dists = (
    PATH_TO_DISTRIBUTIONS + "system_distributions/" 
    + "limited_entropy/"
)

input_dists = get_data.get_system_distributions_limited_entropy(
    path_to_limited_entropy_system_dists, PERCENTAGE_MAX_ENTROPY_SIZE,
    NUMBER_OF_VARS, NUMBER_OF_STATES, FILENAME_FORMAT_INPUT, 
    DISTRIBUTION_NUMBERS
)
output_dists = get_data.get_system_distributions_limited_entropy(
    path_to_limited_entropy_system_dists, PERCENTAGE_MAX_ENTROPY_SIZE,
    NUMBER_OF_VARS, NUMBER_OF_STATES, FILENAME_FORMAT_OUTPUT, 
    DISTRIBUTION_NUMBERS
)

impacts = get_max_nudge_impacts(
    input_dists, output_dists, NUDGE_SIZE, NUDGE_TYPE, 
    backup_filename, parameters
)
print(impacts)
filename_to_save_impacts = "impacts_{}var_{}states_{}entropy_{}_nudge_system.json".format(
    NUMBER_OF_VARS, NUMBER_OF_STATES, PERCENTAGE_MAX_ENTROPY_SIZE, NUDGE_TYPE 
)
with open("data_experiments/" + filename_to_save_impacts, 'w') as f:
    json.dump(impacts, f)
    
    

the max nudge impact 0.00405837885364
the max nudge impact 0.00522247231153
the max nudge impact 0.00454143504345
the max nudge impact 0.00698028031265
the max nudge impact 0.00333925713173
the max nudge impact 0.00510392639182
the max nudge impact 0.0038747382935
the max nudge impact 0.00464276353648
the max nudge impact 0.00517459372791
the max nudge impact 0.00648762320279
the max nudge impact 0.00384299930553
the max nudge impact 0.0034349327467
the max nudge impact 0.0026777034916
the max nudge impact 0.00442127088361
the max nudge impact 0.00375507744756
the max nudge impact 0.00328404140466
the max nudge impact 0.0040231089851
the max nudge impact 0.00513704720349
the max nudge impact 0.00274230501379
the max nudge impact 0.00280720657811
the max nudge impact 0.00363511520061
the max nudge impact 0.00320795261961
the max nudge impact 0.00506070438164
the max nudge impact 0.00216072075642
the max nudge impact 0.00577400046948
the max nudge impact 0.00379987428687
the max nudge im

#### First generate a generic input distribution

In [6]:
#distribution parameters
input_variables = 2
number_of_states = 5

#generate both input and conditional output with Dirichlet weights
distribution_shape = [number_of_states]*input_variables
total_number_of_states = reduce(lambda x,y: x*y, distribution_shape)
input_dist = np.random.dirichlet([1]*total_number_of_states)
input_dist = np.reshape(input_dist, distribution_shape)
cond_shape = [number_of_states]*(input_variables+1)
cond_output = [
    probability_distributions.compute_joint_uniform_random((number_of_states,))
    for i in range(number_of_states**(input_variables))
]
cond_output = np.array(cond_output)
cond_output = np.reshape(cond_output, cond_shape)


#### load the generated input and conditional output distribution

In [7]:
PATH = "/home/joboti/azumi_derkjan/master_thesis/code/"


def generate_distributions(path_to_files, file_format, number_of_distributions):
    for i in range(number_of_distributions):
        file_name = path_to_files + file_format.format(i)
        with open(file_name, 'rb') as f:
            yield np.load(f)
            
def generate_input_and_conditional_output(input_type, parameters, cond_output_type="dirichlet"):
    """
    
    Parameters:
    ----------
    input_type: string in set {"dirichlet", "entropy_0.5", "entropy_0.75"}
    parameters: dict
    cond_output: 
    
    Returns: a dict with keys:
    -------
    number_of_var: a number
    number_of_states: a number
    input_dist: nd-array
    cond_output: nd-array
    
    """
    if input_type == "dirichlet":
        input_dirichlet_path = PATH + INPUT_FOLDER + DIRICHLET_FOLDER_INPUT
    elif input_type == "entropy_0.75":
        input_dirichlet_path = PATH + INPUT_FOLDER + ENTROPY_MEDIUM_FOLDER_INPUT
    elif input_type == "entropy_0.5":
        input_dirichlet_path = PATH + INPUT_FOLDER + ENTROPY_LOW_FOLDER_INPUT
    else:
        raise ValueError("supply valid input distribution type")

    cond_output_dirichlet_path = PATH + COND_OUTPUT_FOLDER + DIRICHLET_FOLDER_COND_OUTPUT

    min_inputs = parameters["min_number_inputs"]
    max_inputs = parameters["max_number_inputs"]
    number_of_states = parameters["number_of_states"]
    for number_of_var in range(min_inputs, max_inputs, 1):
        path_to_input_files = (
            input_dirichlet_path 
            + FOLDER_FORMAT_INPUT.format(number_of_var, number_of_states)
        )
        path_to_cond_output_files = (
            cond_output_dirichlet_path 
            + FOLDER_FORMAT_CONDITIONAL.format(number_of_var, number_of_states)
        )
        input_generator = generate_distributions(
            path_to_input_files, FILE_FORMAT_INPUT, 
            parameters["number_of_distributions"]
        )
        cond_output_generator = generate_distributions(
            path_to_cond_output_files, FILE_FORMAT_COND_OUTPUT, 
            parameters["number_of_distributions"]
        )
        input_shape = [number_of_states]*number_of_var
        cond_output_shape = [number_of_states]*(number_of_var+1)
        for sample in range(parameters["number_of_distributions"]):
            input_dist = next(input_generator)
            input_dist = np.reshape(input_dist, input_shape)
            cond_output = next(cond_output_generator)
            cond_output = np.reshape(cond_output, cond_output_shape)
            yield {
                "number_of_vars": number_of_var,
                "number_of_states": parameters["number_of_states"],
                "input_dist": input_dist,
                "cond_output": cond_output
            }
            