### Introduction

This notebook aims to find the impact of random nudges

In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import json

import numpy as np
from scipy import stats

import probability_distributions
import maximum_nudges
import evolutionary_algorithms as ea
import maximum_synergistic_nudge_evolutionary as synergistic_nudge_ev
import maximum_nudges_evolutionary as ev_max_nudges
import nudge
import nudge_non_causal
import get_data

### Notebook level constants

In [2]:
NUDGE_SIZE = 0.01
PATH_TO_DISTRIBUTIONS = "/home/joboti/azumi_derkjan/master_thesis/code/"

In [3]:
def calculate_amount_and_size_nudges(total_nudge_size, number_of_states, threshold=10):
    """
    Calculate the nudge size and the number of nudges that need to be performed 
    to nudge a variable with the total nudge size. Assuming the distribution is
    not too peaked, in other words, not too many states should have a probability
    that is 10 times smaller than normal.
    
    Parameters:
    ----------
    total_nudge_size: a number
        How much the variable need to be nudged
    number_of_states: a number
        The total number of states of the joint distribution
    threshold: a float 
        Indicating how much smaller than uniform the value of the number
        at the 95-99 percentile of points is. Defaults to 10 
        
    Returns: local_nudge, number_of_nudges
    -------
    local_nudge: a number 
        The size of the local nudge to be performed on the joint distribution
    number_of_nudges: integer
        How often the nudge need to be performed
    
    """
    assumed_min_size = 1.0/threshold
    max_local_nudge = min(total_nudge_size, 0.1/number_of_states)
    number_of_nudges = int(np.ceil(total_nudge_size/max_local_nudge))
    local_nudge = total_nudge_size/float(number_of_nudges) 
    return local_nudge, number_of_nudges


### Find the random impact of a nudge given the nudge type, the input and conditional output, and nudge size



In [4]:
def get_random_nudge_impacts(input_dists, cond_output_dists, nudge_size, nudge_type, 
                             number_of_samples, backup_filename, parameters):
    """
    Find the maximum impact of the nudges
    
    Parameters:
    ----------
    input_dists: list of nd-arrays representing probability distributions
    cond_output_dists: list of nd-arrays 
        Representing output distributions (the last axis) conditioned on the input 
        distributions
    nudge_size: positive float
    nudge_type: string
        One of the following: {"individual", "focused", "local", "synergistic", "global"}
    number_of_samples: int
    filename_to_save: string, 
        Should be a valid path
    parameters: a dict,
        The parameters used to find the maximum nudge
    
    """
    dist_impacts = []
    missed_weight_dist = 0
    for input_dist, cond_output in zip(input_dists, cond_output_dists):
        print("count {}".format(len(dist_impacts)))
        impacts = []
        missed_weights = []
        for _ in range(number_of_samples):
            if nudge_type == "individual":
                new_dist = nudge_non_causal.nudge_individual_without_conditional(input_dist, nudge_size)
            elif nudge_type == "focused":
                local_nudge_size, number_of_nudges = calculate_amount_and_size_nudges(
                    nudge_size/2, input_dist.flatten().shape[0], threshold=parameters["threshold"]
                )
                num_vars = len(input_dist.shape)
                new_dist = nudge.nudge_distribution_local_non_causal(
                    input_dist, num_vars-1, local_nudge_size, number_of_nudges
                )
            elif nudge_type == "local":
                new_dist = nudge_non_causal.nudge_local(
                    input_dist, nudged_vars=parameters["nudged_vars"], nudge_size=nudge_size,
                    without_conditional=True
                )
            elif nudge_type == "synergistic":
                new_dist = np.copy(input_dist)
                for _ in range(parameters["max_number_of_mutations"]):
                    nudge_non_causal.synergistic_mutate(
                        new_dist, parameters["mutation_size"]
                    )
                    new_nudge_size = np.sum(abs(new_dist-input_dist))
                    adjustment_factor = nudge_size/new_nudge_size
                    if adjustment_factor <= 1:
                        new_dist = input_dist + (new_dist-input_dist)*adjustment_factor
                        break

                if adjustment_factor > 1:
                    print("WARNING: nudge size only {} percent of intended nudge size".format(adjustment_factor))
            elif nudge_type == "global":
                new_dist = nudge_non_causal.nudge_global(input_dist, nudge_size, without_conditional=True)
            else:
                raise ValueError("provide a valid nudge type")

            l1_norm_to_old_distribution = np.sum(np.absolute(input_dist-new_dist))
            impacts.append(nudge_non_causal.find_nudge_impact(input_dist, new_dist, cond_output))
            missed_weights.append(abs(l1_norm_to_old_distribution-NUDGE_SIZE))

        print("the nudge impact {}".format(np.mean(impacts)))
        if np.mean(missed_weights) > nudge_size/100:
            print("WARNING missed weight equals {} percentage of nudge_size".format(
                (np.mean(missed_weights)/nudge_size) * 100
            ))
        
        missed_weight_dist += np.mean(missed_weights)
        dist_impacts.append(np.mean(impacts))
        with open(backup_filename, 'w') as f:
            json.dump(impacts, f, indent=4)
        
    print("total missed weight {}".format(missed_weight_dist))
    return dist_impacts, missed_weight_dist


### Generate the data and run the experiments

#### First for system distributions with limited entropy

In [5]:
PERCENTAGE_MAX_ENTROPY_SIZE = 80
NUMBER_OF_VARS = 3
NUMBER_OF_STATES = 3
FILENAME_FORMAT_INPUT = "input_dist_{}.npy"
FILENAME_FORMAT_OUTPUT = "cond_output_dist_{}.npy"
DIST_START = 100
DIST_END = 200
NUDGE_TYPE = "local"
NUMBER_OF_SAMPLES = 100

MAX_NUMBER_OF_MUTATIONS = 729
MUTATION_SIZE = NUDGE_SIZE/243
THRESHOLD = 50

distribution_numbers = list(range(DIST_START, DIST_END, 1))
if NUDGE_TYPE == "global" or NUDGE_TYPE =="individual": 
    parameters = None
elif NUDGE_TYPE== "focused":
    parameters = {
        "threshold": THRESHOLD
    }
elif NUDGE_TYPE == "local":
    parameters = {
        "nudged_vars": list(range(NUMBER_OF_VARS-1))
    }
elif NUDGE_TYPE == "synergistic":
    parameters = {
        "max_number_of_mutations": MAX_NUMBER_OF_MUTATIONS,
        "mutation_size": MUTATION_SIZE
    }

backup_filename = ("data_experiments/" + 
    "backup_random_impacts_{}var_{}states_{}entropy_{}_nudge_dists{}-{}.json".format(
        NUMBER_OF_VARS, NUMBER_OF_STATES, PERCENTAGE_MAX_ENTROPY_SIZE, NUDGE_TYPE, DIST_START, DIST_END 
    )
)
path_to_limited_entropy_system_dists = (
    PATH_TO_DISTRIBUTIONS + "system_distributions/" 
    + "limited_entropy/"
)

input_dists = get_data.get_system_distributions_limited_entropy(
    path_to_limited_entropy_system_dists, PERCENTAGE_MAX_ENTROPY_SIZE,
    NUMBER_OF_VARS, NUMBER_OF_STATES, FILENAME_FORMAT_INPUT, 
    distribution_numbers
)
output_dists = get_data.get_system_distributions_limited_entropy(
    path_to_limited_entropy_system_dists, PERCENTAGE_MAX_ENTROPY_SIZE,
    NUMBER_OF_VARS, NUMBER_OF_STATES, FILENAME_FORMAT_OUTPUT, 
    distribution_numbers
)

impacts, missed_weight = get_random_nudge_impacts(
    input_dists, output_dists, NUDGE_SIZE, NUDGE_TYPE, 
    NUMBER_OF_SAMPLES, backup_filename, parameters
)

print(impacts)
filename_to_save_impacts =  "random_impacts_{}var_{}states_{}entropy_{}_nudge_dists{}-{}.json".format(
    NUMBER_OF_VARS, NUMBER_OF_STATES, PERCENTAGE_MAX_ENTROPY_SIZE, NUDGE_TYPE, DIST_START, DIST_END 
)
with open("data_experiments/" + filename_to_save_impacts, 'w') as f:
    json.dump(impacts, f)
    
filename_to_save_missed_weight =  "missed_weight_{}var_{}states_{}entropy_{}_nudge_dists{}-{}.json".format(
    NUMBER_OF_VARS, NUMBER_OF_STATES, PERCENTAGE_MAX_ENTROPY_SIZE, NUDGE_TYPE, DIST_START, DIST_END 
)
with open("data_experiments/" + filename_to_save_missed_weight, 'w') as f:
    json.dump(missed_weight, f)


count 0
the nudge impact 0.00260950803682
count 1
the nudge impact 0.00288127364742
count 2
the nudge impact 0.00286108902162
count 3
the nudge impact 0.00381372016215
count 4
the nudge impact 0.00264861565886
count 5
the nudge impact 0.00245969101078
count 6
the nudge impact 0.00381660733093
count 7
the nudge impact 0.00320456542814
count 8
the nudge impact 0.00248504479405
count 9
the nudge impact 0.00295525833911
count 10
the nudge impact 0.00342190866513
count 11
the nudge impact 0.00227414121394
count 12
the nudge impact 0.00222348462868
count 13
the nudge impact 0.00424841972471
count 14
the nudge impact 0.00254961660074
count 15
the nudge impact 0.00345126844952
count 16
the nudge impact 0.0027134135428
count 17
the nudge impact 0.00310245677466
count 18
the nudge impact 0.00283900020047
count 19
the nudge impact 0.00223640378659
count 20
the nudge impact 0.0023763120318
count 21
the nudge impact 0.00288934819318
count 22
the nudge impact 0.00357264672562
count 23
the nudge impa

In [None]:
from scipy import stats

PERCENTAGE_MAX_ENTROPY_SIZE = 75
input_dists = get_data.get_system_distributions_limited_entropy(
    path_to_limited_entropy_system_dists, PERCENTAGE_MAX_ENTROPY_SIZE,
    NUMBER_OF_VARS, NUMBER_OF_STATES, FILENAME_FORMAT_INPUT, 
    distribution_numbers
)

output_dists = get_data.get_system_distributions_limited_entropy(
    path_to_limited_entropy_system_dists, PERCENTAGE_MAX_ENTROPY_SIZE,
    NUMBER_OF_VARS, NUMBER_OF_STATES, FILENAME_FORMAT_OUTPUT, 
    distribution_numbers
)

for input_dist, cond_output in zip(input_dists, output_dists):
    joint = probability_distributions.compute_joint(input_dist, cond_output, set(list(range(NUMBER_OF_VARS-1)))) 
    print("goal entropy {}".format(0.75*np.log2(joint.flatten().shape[0])))
    print(stats.entropy(joint.flatten(), base=2))