### Introduction

This notebook sets out to produce distributions that are randomly sampled from the space of
distributions which entropy is set to a fixed percentage of the maximum entropy

In [1]:
import os
import numpy as np
from scipy import stats
import pickle
import probability_distributions
import limited_entropy_distributions as limited_entropy

In [2]:
PATH = "/home/joboti/azumi_derkjan/master_thesis/code/"
PERCENTAGE_MAX_ENTROPY = 75
TOP_FOLDER = "system_distributions/limited_entropy/"
FOLDER_PERCENTAGE_MAX_ENTROPY = "entropy{}/".format(PERCENTAGE_MAX_ENTROPY)
FOLDER_DISTRIBUTION_SHAPE_FORMAT = "{}var_{}states/"
FILENAME_DISTRIBUTION_FORMAT = "dist_{}_exact.npy"
MIN_NUMBER_VARIABLES = 2
MAX_NUMBER_VARIABLES = 7
NUMBER_OF_STATES = 3

FILENAME_INPUT_FORMAT = "input_dist_exact_{}.npy"
FILENAME_COND_OUTPUT_FORMAT = "cond_output_dist_exact_{}.npy" 

NUMBER_OF_SAMPLES = 100
START_NUMBER_SAMPLES = 200
END_NUMBER_SAMPLES = 300

MAX_CHANGE = 10**(-3)



#### generate the folder structure (if needed)

In [3]:
if not os.path.exists(PATH+TOP_FOLDER):
    os.makedirs(PATH+TOP_FOLDER)
    
if not os.path.exists(PATH+TOP_FOLDER+FOLDER_PERCENTAGE_MAX_ENTROPY):
    os.makedirs(PATH+TOP_FOLDER+FOLDER_PERCENTAGE_MAX_ENTROPY)

for number_of_variables in range(MIN_NUMBER_VARIABLES, MAX_NUMBER_VARIABLES+1, 1):
    directory = (PATH+TOP_FOLDER+FOLDER_PERCENTAGE_MAX_ENTROPY
                 + FOLDER_DISTRIBUTION_SHAPE_FORMAT.format(number_of_variables, NUMBER_OF_STATES))
                 
    if not os.path.exists(directory):
        os.makedirs(directory)


In [4]:
import limited_entropy_distributions

print(np.log2(3**2) * 0.8)
dist = limited_entropy_distributions.get_dist_percentage_max_entropy_exactly([3]*2, 0.8, 10**(-3))
print(np.sum(dist))
print(stats.entropy(dist.flatten(), base=2))

2.53594000115
1.0
2.53515199519


In [5]:

RUN = True
if RUN:
    for number_of_variables in range(MIN_NUMBER_VARIABLES, MAX_NUMBER_VARIABLES+1, 1):
        print("number of variables {}".format(number_of_variables))
        directory = (
            PATH + TOP_FOLDER + FOLDER_PERCENTAGE_MAX_ENTROPY
            + FOLDER_DISTRIBUTION_SHAPE_FORMAT.format(number_of_variables, NUMBER_OF_STATES)
        )
        dist_shape = [NUMBER_OF_STATES]*number_of_variables
        goal_entropy = np.log2(NUMBER_OF_STATES**number_of_variables) * PERCENTAGE_MAX_ENTROPY/100.0
        print("goal entropy {}".format(goal_entropy))
        for sample_number in range(START_NUMBER_SAMPLES, END_NUMBER_SAMPLES, 1):
            good_entropy = False
            while not good_entropy:
                dist = limited_entropy_distributions.get_dist_percentage_max_entropy_exactly(
                    dist_shape, PERCENTAGE_MAX_ENTROPY/100.0, MAX_CHANGE
                )
                if abs(stats.entropy(dist.flatten(), base=2)/goal_entropy - 1) < 0.05:
                    good_entropy = True
                    
            print("sample number {} entropy {}".format(sample_number, stats.entropy(dist.flatten(), base=2)))

            #save distribution
            file_name = FILENAME_DISTRIBUTION_FORMAT.format(sample_number)
            with open(directory+file_name, 'wb') as f:
                np.save(f, dist)


number of variables 2
goal entropy 2.53594000115
sample number 200 entropy 2.53576708211
sample number 201 entropy 2.53578856179
sample number 202 entropy 2.53496604259
sample number 203 entropy 2.53499024448
sample number 204 entropy 2.53443659891
sample number 205 entropy 2.53587001996
sample number 206 entropy 2.53521510378
sample number 207 entropy 2.53566411375
sample number 208 entropy 2.53588461035
sample number 209 entropy 2.53563066486
sample number 210 entropy 2.4420066301
sample number 211 entropy 2.53593402779
sample number 212 entropy 2.53240430704
sample number 213 entropy 2.5241955563
sample number 214 entropy 2.53545131298
sample number 215 entropy 2.535075607
sample number 216 entropy 2.53340015826
sample number 217 entropy 2.53491647197
sample number 218 entropy 2.53583757743
sample number 219 entropy 2.53358591582
sample number 220 entropy 2.53554174977
sample number 221 entropy 2.4894959878
sample number 222 entropy 2.53558983262
sample number 223 entropy 2.53587990

sample number 203 entropy 5.06859640498
sample number 204 entropy 5.07167192385
sample number 205 entropy 5.07171243283
sample number 206 entropy 5.07166906164
sample number 207 entropy 5.07101416457
sample number 208 entropy 5.06984159394
sample number 209 entropy 5.07067679852
sample number 210 entropy 5.07073604846
sample number 211 entropy 5.07114390481
sample number 212 entropy 5.07181339663
sample number 213 entropy 5.07153899495
sample number 214 entropy 5.07110026836
sample number 215 entropy 5.07013679569
sample number 216 entropy 5.07177264297
sample number 217 entropy 5.07114917181
sample number 218 entropy 5.07137185911
sample number 219 entropy 5.07063822349
sample number 220 entropy 5.06926286161
sample number 221 entropy 5.0713679979
sample number 222 entropy 5.06847095705
sample number 223 entropy 5.06690690261
sample number 224 entropy 5.06805886951
sample number 225 entropy 5.07150467642
sample number 226 entropy 5.07156043211
sample number 227 entropy 5.07117453649
s

sample number 206 entropy 7.60555427996
sample number 207 entropy 7.60771650395
sample number 208 entropy 7.60760331349
sample number 209 entropy 7.60540934685
sample number 210 entropy 7.60776691929
sample number 211 entropy 7.60753096683
sample number 212 entropy 7.60636595223
sample number 213 entropy 7.60763067604
sample number 214 entropy 7.6056340122
sample number 215 entropy 7.60677940863
sample number 216 entropy 7.60723536217
sample number 217 entropy 7.60747686112
sample number 218 entropy 7.60737172577
sample number 219 entropy 7.60646833682
sample number 220 entropy 7.60732188071
sample number 221 entropy 7.6075808773
sample number 222 entropy 7.60646472636
sample number 223 entropy 7.6072771581
sample number 224 entropy 7.60738173754
sample number 225 entropy 7.60703450839
sample number 226 entropy 7.60769889961
sample number 227 entropy 7.6070045849
sample number 228 entropy 7.60781741484
sample number 229 entropy 7.60767951272
sample number 230 entropy 7.60729705896
samp

#### From the joint distribution take the input distribution (the marginal of the first N-1 axis) and the output (the Nth axis) conditioned on the input

In [6]:
TEST = True
RUN = True
limited_entropy.PRINT = False

evolutionary_params_generate_conditional_output_stats = {
    "number_of_generations": 700,
    "population_size": 20,
    "number_of_children": 80,
    "generational": False,
    "mutation_method": "step_wise_after",
    "number_of_mutations": 3,
    "mutation_size": 0.01,
    "parent_selection_mode": "rank_exponential",
    "early_stopping_criterium": 0.01
}

def produce_conditional_states(entropy_size, evolution_params, number_of_states=NUMBER_OF_STATES):
    while True:
        new_entropy_size = entropy_size + np.random.uniform(-0.15*entropy_size, 0.15*entropy_size) 
        yield limited_entropy.get_dist_with_entropy(
            NUMBER_OF_STATES, new_entropy_size, 
            evolution_params, verbose=False    
        )

if RUN:
    min_number_of_variables = max(MIN_NUMBER_VARIABLES, 2)
    for number_of_variables in range(min_number_of_variables, MAX_NUMBER_VARIABLES+1, 1):
        print("number of variables {}".format(number_of_variables))
        directory = (
            PATH + TOP_FOLDER + FOLDER_PERCENTAGE_MAX_ENTROPY
            + FOLDER_DISTRIBUTION_SHAPE_FORMAT.format(number_of_variables, NUMBER_OF_STATES)
        )
        dist_shape = [NUMBER_OF_STATES]*number_of_variables
        input_labels = set(range(number_of_variables-1))
        output_label = set([number_of_variables-1])
        for sample_number in range(START_NUMBER_SAMPLES, END_NUMBER_SAMPLES, 1):
            print("sample number {}".format(sample_number))
            #load the distribution
            file_name = FILENAME_DISTRIBUTION_FORMAT.format(sample_number)
            with open(directory+file_name, 'rb') as f:
                joint = np.load(f)
                
            #produce the marginal
            joint_dist = probability_distributions.ProbabilityArray(joint)
            input_dist = joint_dist.marginalize(input_labels)
            
            #save the marginal (representing the input distribution)
            file_name = FILENAME_INPUT_FORMAT.format(sample_number)
            with open(directory+file_name, 'wb') as f:
                np.save(f, input_dist)
                
            print("number of input states with zero probability {}".format(
                input_dist.flatten().shape[0]-np.count_nonzero(input_dist.flatten())
            ))
            
            #produce the conditional output
            entropies = [stats.entropy(joint[tuple(state)], base=2) for state in np.argwhere(input_dist != 0)]
            average_entropy = np.mean(entropies)
            print(average_entropy)
            generator = produce_conditional_states(
                average_entropy, evolutionary_params_generate_conditional_output_stats
            )
            cond_output , mar_labels, cond_labels = joint_dist.find_conditional_accounting_for_zero_marginals(
                output_label, input_labels, generator
            )
            
            #save cond_output
            file_name = FILENAME_COND_OUTPUT_FORMAT.format(sample_number)
            with open(directory+file_name, 'wb') as f:
                np.save(f, cond_output)
                
            if TEST:
                computed_joint = probability_distributions.compute_joint(input_dist, cond_output, cond_labels)
                if not np.allclose(computed_joint, joint):
                    raise ValueError()
                
            


number of variables 2
sample number 200
number of input states with zero probability 0
1.00005059015
sample number 201
number of input states with zero probability 0
0.981237823469
sample number 202
number of input states with zero probability 0
0.963020676566
sample number 203
number of input states with zero probability 0
1.10271391173
sample number 204
number of input states with zero probability 0
1.06145741041
sample number 205
number of input states with zero probability 0
0.930167473679
sample number 206
number of input states with zero probability 0
1.06200674144
sample number 207
number of input states with zero probability 0
0.957207234523
sample number 208
number of input states with zero probability 0
0.915068297698
sample number 209
number of input states with zero probability 0
1.04139418811
sample number 210
number of input states with zero probability 0
1.04139751083
sample number 211
number of input states with zero probability 0
0.91806897775
sample number 212
number 

sample number 238
number of input states with zero probability 0
0.767359101673
sample number 239
number of input states with zero probability 0
0.930587189123
sample number 240
number of input states with zero probability 0
0.916043523643
sample number 241
number of input states with zero probability 0
0.782153003811
sample number 242
number of input states with zero probability 0
0.739939869868
sample number 243
number of input states with zero probability 0
0.908603781203
sample number 244
number of input states with zero probability 0
0.688617293699
sample number 245
number of input states with zero probability 0
0.725501205128
sample number 246
number of input states with zero probability 0
0.695475846871
sample number 247
number of input states with zero probability 0
1.02725097717
sample number 248
number of input states with zero probability 0
0.730427544602
sample number 249
number of input states with zero probability 0
0.808235301327
sample number 250
number of input states 

number of input states with zero probability 0
0.549375630814
sample number 254
number of input states with zero probability 0
0.570859578795
sample number 255
number of input states with zero probability 0
0.598881150633
sample number 256
number of input states with zero probability 0
0.589227969926
sample number 257
number of input states with zero probability 0
0.579814347618
sample number 258
number of input states with zero probability 0
0.549504179219
sample number 259
number of input states with zero probability 0
0.586421557965
sample number 260
number of input states with zero probability 0
0.513756799546
sample number 261
number of input states with zero probability 0
0.477492532625
sample number 262
number of input states with zero probability 0
0.640068418088
sample number 263
number of input states with zero probability 0
0.437575244361
sample number 264
number of input states with zero probability 0
0.501185427563
sample number 265
number of input states with zero probabi

number of input states with zero probability 0
0.474877787225
sample number 265
number of input states with zero probability 0
0.382445520011
sample number 266
number of input states with zero probability 0
0.450587195603
sample number 267
number of input states with zero probability 0
0.40003790249
sample number 268
number of input states with zero probability 0
0.371837809878
sample number 269
number of input states with zero probability 0
0.399393068116
sample number 270
number of input states with zero probability 0
0.344138565218
sample number 271
number of input states with zero probability 0
0.383027486818
sample number 272
number of input states with zero probability 0
0.456416962961
sample number 273
number of input states with zero probability 0
0.429463370803
sample number 274
number of input states with zero probability 0
0.448733308568
sample number 275
number of input states with zero probability 0
0.481621605748
sample number 276
number of input states with zero probabil

0.352827612707
sample number 272
number of input states with zero probability 0
0.399058224057
sample number 273
number of input states with zero probability 0
0.354438460537
sample number 274
number of input states with zero probability 0
0.356166773541
sample number 275
number of input states with zero probability 0
0.388186502136
sample number 276
number of input states with zero probability 0
0.395583771529
sample number 277
number of input states with zero probability 0
0.316838147383
sample number 278
number of input states with zero probability 0
0.386884197267
sample number 279
number of input states with zero probability 0
0.363018502147
sample number 280
number of input states with zero probability 0
0.388011798628
sample number 281
number of input states with zero probability 0
0.324210049509
sample number 282
number of input states with zero probability 0
0.311267340114
sample number 283
number of input states with zero probability 0
0.359791661612
sample number 284
number 

sample number 280
number of input states with zero probability 0
0.38450466363
sample number 281
number of input states with zero probability 0
0.394015103818
sample number 282
number of input states with zero probability 0
0.39839735995
sample number 283
number of input states with zero probability 0
0.394910361784
sample number 284
number of input states with zero probability 0
0.388590300429
sample number 285
number of input states with zero probability 0
0.385609390723
sample number 286
number of input states with zero probability 0
0.365880072502
sample number 287
number of input states with zero probability 0
0.387885198476
sample number 288
number of input states with zero probability 0
0.395625011444
sample number 289
number of input states with zero probability 0
0.390621737696
sample number 290
number of input states with zero probability 0
0.373612649309
sample number 291
number of input states with zero probability 0
0.382325369212
sample number 292
number of input states w