In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import random
import keras
import time
import numpy as np
import multiprocessing as mp

from keras.optimizers import SGD, Adam
from keras.models import Sequential
from keras.layers import Dense

import utils_quadform as utils
from utils_quadform import bits_to_stab

In [17]:
n_qubits = 2
chi = 2
H = [[np.cos(np.pi/8)],[np.sin(np.pi/8)]]
target = utils.tensor([H]*n_qubits)
real = True

In [32]:
x = [1.0,2.0]
x = np.array(x,dtype=int)
x

array([1, 2])

In [38]:
x

numpy.ndarray

In [44]:
m = (1.5*np.power(n_qubits,2) + 1.5*n_qubits)*chi
x = [np.random.randint(2) for dummy in range(int(m))]
print('x =',np.array(x), '\n')
print('stabilizer(x) = ', bits_to_stab(x,n_qubits,chi,real=True))

x = [1 1 1 0 0 1 1 1 0 0 0 0 0 1 1 1 1 0] 

k= 2
D =  [4 4]
vecx [0, 0]
qx 0.0
vecx [0, 1]
qx 4.0
vecx [1, 0]
qx 4.0
vecx [1, 1]
qx 0.0
k= 1
D =  [4]
vecx [0]
qx 0.0
vecx [1]
qx 4.0
stabilizer(x) =  (array([ 0.5+0.000000e+00j, -0.5+6.123234e-17j, -0.5+6.123234e-17j,
        0.5+0.000000e+00j]), array([ 0.70710678+0.00000000e+00j,  0.        +0.00000000e+00j,
        0.        +0.00000000e+00j, -0.70710678+8.65956056e-17j]))


In [51]:
x = [0, 0, 1, 1, 0, 0, 0, 0, 0]+[0, 0, 1, 0, 0, 1, 0, 0, 1]
print(len(x))
basis = bits_to_stab(x,n_qubits,chi,real=True)
print(basis)

18
(array([0.70710678+0.j, 0.        +0.j, 0.        +0.j, 0.70710678+0.j]), array([ 0.5+0.000000e+00j,  0.5+0.000000e+00j,  0.5+0.000000e+00j,
       -0.5+6.123234e-17j]))


In [50]:
projector = utils.orthogonal_projector(basis)
score = np.linalg.norm(projector*target) # note that this * is okay as working with matrix objects
print(score)

0.9999999999999999


In [None]:
# Wagner code: neural net
# Monte Carlo tree search: [0 0 1 0 1 *]
# Combinations + Monte Carlo tree search

In [None]:
# sanity check stabilizer_to_state
# allz_check_matrix = np.hstack((np.zeros([n_qubits,n_qubits]),np.eye(n_qubits)))
# allx_check_matrix = np.hstack((np.eye(n_qubits),np.zeros([n_qubits,n_qubits])))
# stabilizer_to_state(allz_check_matrix, np.zeros(n_qubits), n_qubits)

In [10]:
# Daochen: would be easier if could enforce the use of combinations
# MYN = int(2**n)  #The length of the word we are generating. Here we are generating a Boolean function on n bits, so we create a 0-1 word of length 2^n

MYN = int(chi*n_qubits*(2*n_qubits+1))

# LEARNING_RATE = 0.0001 #Increase this to make convergence faster, decrease if the algorithm gets stuck in local optima too often.
LEARNING_RATE = 0.00001
n_sessions = 1000 #number of new sessions per iteration
# default 93, 94 respectively
percentile = 93 #top 100-X percentiled we are learning from
super_percentile = 94 #top 100-X percentile that survives to next iteration

# These are hyperparameters
FIRST_LAYER_NEURONS = int(MYN/2) #Number of neurons in the hidden layers.
SECOND_LAYER_NEURONS = int(MYN/4)
THIRD_LAYER_NEURONS = int(MYN/8)

observation_space = 2*MYN

# Leave this at 2*MYN. The input vector will have size 2*MYN, 
# where the first MYN letters encode our partial word (with zeros on
# the positions we haven't considered yet), and the next MYN bits one-hot encode which letter we are considering now.
# So e.g. [0,1,0,0,   0,0,1,0] means we have the partial word 01 and we are considering the third letter now.
# Is there a better way to format the input to make it easier for the neural network to understand things?

# Daochen: why is len_game = MYN
len_game = MYN 
INF = 1000000

#Model structure: a sequential network with three hidden layers, sigmoid activation in the output.
#I usually used relu activation in the hidden layers but play around to see what activation function and what optimizer works best.
#It is important that the loss is binary cross-entropy if alphabet size is 2.

model = Sequential()
model.add(Dense(FIRST_LAYER_NEURONS,  activation="relu"))
model.add(Dense(SECOND_LAYER_NEURONS, activation="relu"))
model.add(Dense(THIRD_LAYER_NEURONS, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
model.build((None, observation_space))
model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate = LEARNING_RATE)) #Adam optimizer also works well, with lower learning rate

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 31)                3937      
_________________________________________________________________
dense_1 (Dense)              (None, 15)                480       
_________________________________________________________________
dense_2 (Dense)              (None, 7)                 112       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 8         
Total params: 4,537
Trainable params: 4,537
Non-trainable params: 0
_________________________________________________________________
None


In [11]:
def calc_score(state):
    """
    Calculates the reward for a given word. 
    This function is very slow, it can be massively sped up with numba -- but numba doesn't support networkx yet, which is very convenient to use here
    :param state: the first MYN letters of this param are the word that the neural network has constructed.

    :returns: the reward (a real number). Higher is better, the network will try to maximize this.
    """
    x = state[:MYN]
    basis = bitarray_to_basis(x,n_qubits,chi)
    if basis == -1:
        return -INF, -INF
    
    projector = utils.orthogonal_projector(basis)
    score = np.linalg.norm(projector*target) # note that this * is okay as working with matrix objects

    if np.allclose(score, 1):
        print('You found a stabilizer decomposition with (n_qubits,chi) = ', [n_qubits,chi])
        return -1, -1
    
    return score, score

####No need to change anything below here. 
# Daochen: the agent argument will be the "model"
def generate_session(agent, n_sessions, verbose = 1):
    """
    Play n_session games using agent neural network.
    Terminate when games finish 

    Code inspired by https://github.com/yandexdataschool/Practical_RL/blob/master/week01_intro/deep_crossentropy_method.ipynb
    """
    states =  np.zeros([n_sessions, observation_space, len_game], dtype=int)
    actions = np.zeros([n_sessions, len_game], dtype = int)
    state_next = np.zeros([n_sessions,observation_space], dtype = int)
    prob = np.zeros(n_sessions)
    states[:,MYN,0] = 1
    step = 0
    total_target = np.zeros([n_sessions])
#     total_target = np.zeros([n_sessions], dtype=complex)
    total_score = np.zeros([n_sessions])
    recordsess_time = 0
    play_time = 0
    scorecalc_time = 0
    pred_time = 0
    while (True):
        step += 1
        tic = time.time()
        prob = agent.predict(states[:,:,step-1], batch_size = n_sessions) 
        pred_time += time.time()-tic

        for i in range(n_sessions):
            # choose action 1 with probability prob[i]
            if np.random.rand() < prob[i]:
                action = 1
            else:
                action = 0
            actions[i][step-1] = action
            tic = time.time()
            state_next[i] = states[i,:,step-1]
            play_time += time.time()-tic
            if (action > 0):
                state_next[i][step-1] = action
            state_next[i][MYN + step-1] = 0
            if (step < MYN):
#                 print('i am here')
                state_next[i][MYN + step] = 1
#                 print('state_next[i][MYN:]_if',state_next[i][MYN:])
#                 Daochen: terminal equals whether step equals MYN: I suppose meaning that an entire state has been generated
            terminal = step == MYN
            tic = time.time()
            if terminal:
#                 print('state_next[i][:MYN]',state_next[i][:MYN])
#                 print('state_next[i][MYN:]',state_next[i][MYN:])
                total_target[i], total_score[i] = calc_score(state_next[i])
            scorecalc_time += time.time()-tic
            tic = time.time()
            if not terminal:
                states[i,:,step] = state_next[i]
            recordsess_time += time.time()-tic
        if terminal:
            break
    #If you want, print out how much time each step has taken. This is useful to find the bottleneck in the program.		
    if (verbose):
        print("Predict: "+str(pred_time)+", play: " + str(play_time) +", scorecalc: " + str(scorecalc_time) +", recordsess: " + str(recordsess_time))
    return states, actions, total_score, total_target

def select_elites(states_batch, actions_batch, rewards_batch, percentile=50):
    """
    Select states and actions from games that have rewards >= percentile
    :param states_batch: list of lists of states, states_batch[session_i][t]
    :param actions_batch: list of lists of actions, actions_batch[session_i][t]
    :param rewards_batch: list of rewards, rewards_batch[session_i]

    :returns: elite_states,elite_actions, both 1D lists of states and respective actions from elite sessions

    This function was mostly taken from https://github.com/yandexdataschool/Practical_RL/blob/master/week01_intro/deep_crossentropy_method.ipynb
    If this function is the bottleneck, it can easily be sped up using numba
    """
    counter = n_sessions * (100.0 - percentile) / 100.0
    reward_threshold = np.percentile(rewards_batch,percentile)

    elite_states = []
    elite_actions = []
    elite_rewards = []
    for i in range(len(states_batch)):
        if rewards_batch[i] >= reward_threshold-0.0000001:
            if (counter > 0) or (rewards_batch[i] >= reward_threshold+0.0000001):
                for item in states_batch[i]:
                    elite_states.append(item.tolist())
                for item in actions_batch[i]:
                    elite_actions.append(item)
            counter -= 1
    elite_states = np.array(elite_states, dtype = int)
    elite_actions = np.array(elite_actions, dtype = int)
    return elite_states, elite_actions

def select_super_sessions(states_batch, actions_batch, rewards_batch, targets_batch, percentile=90):
    """
    Select all the sessions that will survive to the next generation
    Similar to select_elites function
    If this function is the bottleneck, it can easily be sped up using numba
    """
    counter = n_sessions * (100.0 - percentile) / 100.0
    reward_threshold = np.percentile(rewards_batch,percentile)

    super_states = []
    super_actions = []
    super_rewards = []
    super_targets = []
    for i in range(len(states_batch)):
        if rewards_batch[i] >= reward_threshold-0.0000001:
            if (counter > 0) or (rewards_batch[i] >= reward_threshold+0.0000001):
                super_states.append(states_batch[i])
                super_actions.append(actions_batch[i])
                super_rewards.append(rewards_batch[i])
                super_targets.append(targets_batch[i])
                counter -= 1
    super_states = np.array(super_states, dtype = int)
    super_actions = np.array(super_actions, dtype = int)
    super_rewards = np.array(super_rewards)
    super_targets = np.array(super_targets)
    return super_states, super_actions, super_rewards, super_targets

In [12]:
super_states =  np.empty((0,len_game,observation_space), dtype = int)
super_actions = np.array([], dtype = int)
super_rewards = np.array([])
super_targets= np.array([])
sessgen_time = 0
fit_time = 0
score_time = 0

myRand = random.randint(0,1000) #used in the filename

In [13]:
for i in range(1000000): #1000000 generations should be plenty
    #generate new sessions
    #performance can be improved with joblib
    tic = time.time()
#     sessions = states, actions, total_score, total_target
    sessions = generate_session(model,n_sessions,0) #change 0 to 1 to print out how much time each step in generate_session takes 
    if sessions == -1:
        break
    sessgen_time = time.time()-tic
    tic = time.time()

    states_batch = np.array(sessions[0], dtype = int)
    actions_batch = np.array(sessions[1], dtype = int)
    rewards_batch = np.array(sessions[2])
    targets_batch = np.array(sessions[3])
    
    states_batch = np.transpose(states_batch,axes=[0,2,1])
    states_batch = np.append(states_batch,super_states,axis=0)

    if i>0:
        actions_batch = np.append(actions_batch,np.array(super_actions),axis=0)	
    
    rewards_batch = np.append(rewards_batch,super_rewards)
    targets_batch = np.append(targets_batch,super_targets)

    randomcomp_time = time.time()-tic 
    tic = time.time()

    elite_states, elite_actions = select_elites(states_batch, actions_batch, rewards_batch, percentile=percentile) #pick the sessions to learn from
    select1_time = time.time()-tic

    tic = time.time()
    super_sessions = select_super_sessions(states_batch, actions_batch, rewards_batch, targets_batch, percentile=super_percentile) #pick the sessions to survive
    select2_time = time.time()-tic

    tic = time.time()
    super_sessions = [(super_sessions[0][i], super_sessions[1][i], super_sessions[2][i], super_sessions[3][i]) for i in range(len(super_sessions[2]))]
    super_sessions.sort(key=lambda super_sessions: super_sessions[2],reverse=True)
    select3_time = time.time()-tic

    tic = time.time()
#     print('elite_states[0][:MYN]', elite_states[0][:MYN])
#     print('elite_states[0][MYN:]', elite_states[0][MYN:])
#     print('elite_actions[0]', elite_actions[0])
    model.fit(elite_states, elite_actions, verbose=0) #learn from the elite sessions
    fit_time = time.time()-tic

    tic = time.time()

    super_states = [super_sessions[i][0] for i in range(len(super_sessions))]
    super_actions = [super_sessions[i][1] for i in range(len(super_sessions))]
    super_rewards = [super_sessions[i][2] for i in range(len(super_sessions))]
    super_targets = [super_sessions[i][3] for i in range(len(super_sessions))]

    rewards_batch.sort()
    score_time = time.time()-tic

#     if i%5 == 0 and i > 1:
    print("\n" + str(i) +  ". Best individuals (reward): " + str(np.flip(np.sort(super_rewards))))
#     Daochen: note that sometimes it makes sense to add/remove np.flip below
#     print("\n" + str(i) +  ". Best individuals (target): " + str(np.flip(np.sort(super_targets))))


0. Best individuals (reward): [ 4.24166805e-01 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.0000000


8. Best individuals (reward): [ 8.09203129e-01  6.16486021e-01  5.97238791e-01  5.00000000e-01
  4.86122964e-01  4.24166805e-01 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.0000000


16. Best individuals (reward): [ 8.09203129e-01  6.16486021e-01  5.97238791e-01  5.09841259e-01
  5.00000000e-01  4.86122964e-01  4.81346210e-01  4.70564370e-01
  4.24166805e-01 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


24. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  7.27269796e-01  6.80726338e-01
  6.16486021e-01  6.10618816e-01  5.97238791e-01  5.09841259e-01
  5.00000000e-01  4.86122964e-01  4.81346210e-01  4.70564370e-01
  4.63975018e-01  4.24166805e-01 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


31. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  7.27269796e-01  6.80726338e-01
  6.54482305e-01  6.16486021e-01  6.10618816e-01  5.97238791e-01
  5.50611114e-01  5.09841259e-01  5.00000000e-01  4.86122964e-01
  4.81346210e-01  4.78713554e-01  4.70564370e-01  4.63975018e-01
  4.41091985e-01  4.24166805e-01 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


38. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  7.83758059e-01  7.27269796e-01
  6.84653197e-01  6.80726338e-01  6.54482305e-01  6.16486021e-01
  6.13829958e-01  6.10618816e-01  5.97238791e-01  5.85631970e-01
  5.50611114e-01  5.09841259e-01  5.00000000e-01  4.86122964e-01
  4.82974705e-01  4.81346210e-01  4.78713554e-01  4.70564370e-01
  4.63975018e-01  4.41091985e-01  4.28460784e-01  4.24166805e-01
  3.54187356e-01 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


44. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  7.83758059e-01  7.27269796e-01
  6.84653197e-01  6.80726338e-01  6.61437828e-01  6.54482305e-01
  6.24703814e-01  6.16486021e-01  6.13829958e-01  6.10618816e-01
  5.97238791e-01  5.85631970e-01  5.50611114e-01  5.09841259e-01
  5.00000000e-01  4.86122964e-01  4.82974705e-01  4.81346210e-01
  4.78713554e-01  4.70564370e-01  4.63975018e-01  4.41091985e-01
  4.28460784e-01  4.24166805e-01  3.54187356e-01 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


50. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  7.83758059e-01  7.74949886e-01
  7.27269796e-01  7.09456364e-01  6.84653197e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.54482305e-01  6.24703814e-01
  6.16486021e-01  6.13829958e-01  6.10618816e-01  5.97238791e-01
  5.85631970e-01  5.50611114e-01  5.09841259e-01  5.00000000e-01
  4.86122964e-01  4.82974705e-01  4.81346210e-01  4.78713554e-01
  4.70564370e-01  4.63975018e-01  4.41091985e-01  4.28460784e-01
  4.24166805e-01  3.54187356e-01 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


56. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.38698012e-01  7.27269796e-01  7.09456364e-01
  6.97674127e-01  6.84653197e-01  6.80726338e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.54482305e-01  6.24703814e-01
  6.16486021e-01  6.13829958e-01  6.10618816e-01  5.97238791e-01
  5.85631970e-01  5.50611114e-01  5.09841259e-01  5.00000000e-01
  4.86122964e-01  4.82974705e-01  4.81346210e-01  4.78713554e-01
  4.70564370e-01  4.63975018e-01  4.41091985e-01  4.40053233e-01
  4.28460784e-01  4.24166805e-01  3.54187356e-01 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


62. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.38698012e-01  7.27269796e-01  7.09456364e-01
  6.97674127e-01  6.84653197e-01  6.80726338e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.54482305e-01  6.24703814e-01
  6.16486021e-01  6.13829958e-01  6.10618816e-01  6.07042582e-01
  5.97238791e-01  5.85631970e-01  5.50611114e-01  5.09841259e-01
  5.00000000e-01  4.86122964e-01  4.82974705e-01  4.81346210e-01
  4.78713554e-01  4.70564370e-01  4.63975018e-01  4.41091985e-01
  4.40053233e-01  4.28460784e-01  4.24166805e-01  4.15445722e-01
  3.54187356e-01  3.13296244e-01 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


68. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.38698012e-01  7.27269796e-01  7.09456364e-01
  6.97674127e-01  6.84653197e-01  6.80726338e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.60085436e-01  6.54482305e-01
  6.24703814e-01  6.16486021e-01  6.13829958e-01  6.10618816e-01
  6.07042582e-01  5.97238791e-01  5.85631970e-01  5.75708572e-01
  5.50611114e-01  5.09841259e-01  5.00000000e-01  4.86122964e-01
  4.82974705e-01  4.81346210e-01  4.78713554e-01  4.70564370e-01
  4.63975018e-01  4.41091985e-01  4.40053233e-01  4.28460784e-01
  4.24166805e-01  4.15445722e-01  3.54187356e-01  3.13296244e-01
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


73. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.38698012e-01  7.27269796e-01  7.09456364e-01
  6.97674127e-01  6.84653197e-01  6.80726338e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.60085436e-01  6.54482305e-01
  6.24703814e-01  6.16486021e-01  6.13829958e-01  6.10618816e-01
  6.07042582e-01  5.97238791e-01  5.85631970e-01  5.75708572e-01
  5.54200638e-01  5.50611114e-01  5.09841259e-01  5.00000000e-01
  4.86122964e-01  4.82974705e-01  4.81346210e-01  4.78713554e-01
  4.70564370e-01  4.63975018e-01  4.41091985e-01  4.40053233e-01
  4.28460784e-01  4.24166805e-01  4.15445722e-01  3.54187356e-01
  3.13296244e-01 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


78. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.38698012e-01  7.27269796e-01  7.09456364e-01
  6.97674127e-01  6.84653197e-01  6.80726338e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.60085436e-01  6.54482305e-01
  6.24703814e-01  6.16486021e-01  6.13829958e-01  6.10618816e-01
  6.07042582e-01  5.97238791e-01  5.96618709e-01  5.85631970e-01
  5.75708572e-01  5.54200638e-01  5.50611114e-01  5.09841259e-01
  5.01045834e-01  5.00000000e-01  4.86122964e-01  4.82974705e-01
  4.81346210e-01  4.78713554e-01  4.70564370e-01  4.63975018e-01
  4.41091985e-01  4.40053233e-01  4.28460784e-01  4.24166805e-01
  4.15445722e-01  3.54187356e-01  3.13296244e-01 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


83. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.38698012e-01  7.27269796e-01  7.09456364e-01
  6.97674127e-01  6.84653197e-01  6.80726338e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.60085436e-01  6.54482305e-01
  6.24703814e-01  6.16486021e-01  6.13829958e-01  6.10618816e-01
  6.07042582e-01  5.97238791e-01  5.96618709e-01  5.85631970e-01
  5.75708572e-01  5.62840405e-01  5.54200638e-01  5.50611114e-01
  5.09841259e-01  5.09070637e-01  5.01045834e-01  5.00000000e-01
  4.86122964e-01  4.82974705e-01  4.81346210e-01  4.81346210e-01
  4.78713554e-01  4.70564370e-01  4.63975018e-01  4.41091985e-01
  4.40053233e-01  4.28460784e-01  4.24166805e-01  4.23293266e-01
  4.15445722e-01  3.54187356e-01  3.13296244e-01 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06
 -1.00000000e+06 -1.00000000e+06 -1.00000000e+06 -1.000000


88. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.54197224e-01  7.38698012e-01  7.27269796e-01
  7.22472831e-01  7.09456364e-01  6.97674127e-01  6.94041911e-01
  6.84653197e-01  6.80726338e-01  6.80726338e-01  6.80264795e-01
  6.61437828e-01  6.60085436e-01  6.54482305e-01  6.41569678e-01
  6.37725783e-01  6.24703814e-01  6.16486021e-01  6.13829958e-01
  6.10618816e-01  6.09446934e-01  6.07042582e-01  5.97238791e-01
  5.97238791e-01  5.96618709e-01  5.85631970e-01  5.75708572e-01
  5.62840405e-01  5.54200638e-01  5.50611114e-01  5.09841259e-01
  5.09070637e-01  5.01045834e-01  5.00000000e-01  4.86122964e-01
  4.82974705e-01  4.81346210e-01  4.81346210e-01  4.78713554e-01
  4.74965506e-01  4.70564370e-01  4.63975018e-01  4.41091985e-01
  4.40053233e-01  4.28460784e-01  4.24166805e-01  4.23293266e-01
  4.15445722e-01  3.79248322e-01  3.54187356e-01  3.13296244e-01
  2.66869371e-01 -1.00000000e+06 -1.00000000e+06 -1.000000


93. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.54197224e-01  7.38698012e-01  7.27269796e-01
  7.22472831e-01  7.09456364e-01  6.97674127e-01  6.94041911e-01
  6.84653197e-01  6.80726338e-01  6.80726338e-01  6.80264795e-01
  6.61437828e-01  6.60085436e-01  6.54482305e-01  6.41569678e-01
  6.37725783e-01  6.24703814e-01  6.16486021e-01  6.13829958e-01
  6.10618816e-01  6.09446934e-01  6.07042582e-01  5.97238791e-01
  5.97238791e-01  5.96618709e-01  5.85631970e-01  5.75708572e-01
  5.62840405e-01  5.54200638e-01  5.50611114e-01  5.49423796e-01
  5.09841259e-01  5.09070637e-01  5.01045834e-01  5.00000000e-01
  4.86122964e-01  4.82974705e-01  4.81346210e-01  4.81346210e-01
  4.78713554e-01  4.74965506e-01  4.70564370e-01  4.63975018e-01
  4.41091985e-01  4.40053233e-01  4.28460784e-01  4.24166805e-01
  4.23293266e-01  4.15445722e-01  3.80626087e-01  3.79248322e-01
  3.54187356e-01  3.13296244e-01  2.66869371e-01 -1.000000


98. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.54197224e-01  7.38698012e-01  7.27269796e-01
  7.22472831e-01  7.09456364e-01  6.97674127e-01  6.94041911e-01
  6.84653197e-01  6.80726338e-01  6.80726338e-01  6.80264795e-01
  6.61437828e-01  6.60085436e-01  6.54482305e-01  6.41569678e-01
  6.37725783e-01  6.24703814e-01  6.16486021e-01  6.13829958e-01
  6.10618816e-01  6.09446934e-01  6.07042582e-01  5.97238791e-01
  5.97238791e-01  5.96618709e-01  5.85631970e-01  5.75708572e-01
  5.62840405e-01  5.54200638e-01  5.50611114e-01  5.49423796e-01
  5.09841259e-01  5.09070637e-01  5.01045834e-01  5.00570726e-01
  5.00000000e-01  4.86122964e-01  4.82974705e-01  4.81346210e-01
  4.81346210e-01  4.78713554e-01  4.74965506e-01  4.70564370e-01
  4.63975018e-01  4.41091985e-01  4.40053233e-01  4.28460784e-01
  4.24166805e-01  4.23293266e-01  4.15445722e-01  3.80626087e-01
  3.79248322e-01  3.61056301e-01  3.54187356e-01  3.132962


103. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.02659917e-01  7.83758059e-01
  7.74949886e-01  7.54197224e-01  7.38698012e-01  7.27269796e-01
  7.22472831e-01  7.09456364e-01  6.97674127e-01  6.94041911e-01
  6.84653197e-01  6.80726338e-01  6.80726338e-01  6.80264795e-01
  6.61437828e-01  6.60085436e-01  6.54482305e-01  6.41569678e-01
  6.37725783e-01  6.24703814e-01  6.16486021e-01  6.13829958e-01
  6.10618816e-01  6.09446934e-01  6.07042582e-01  5.97238791e-01
  5.97238791e-01  5.96618709e-01  5.85631970e-01  5.75708572e-01
  5.62840405e-01  5.54200638e-01  5.50611114e-01  5.49423796e-01
  5.47148831e-01  5.09841259e-01  5.09070637e-01  5.01045834e-01
  5.00570726e-01  5.00000000e-01  4.86122964e-01  4.82974705e-01
  4.81346210e-01  4.81346210e-01  4.78713554e-01  4.74965506e-01
  4.70564370e-01  4.63975018e-01  4.41091985e-01  4.40053233e-01
  4.28460784e-01  4.24166805e-01  4.23293266e-01  4.15445722e-01
  3.80626087e-01  3.79248322e-01  3.61056301e-01  3.54187


108. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.05056228e-01  8.02659917e-01
  7.83758059e-01  7.74949886e-01  7.54197224e-01  7.38698012e-01
  7.27269796e-01  7.22472831e-01  7.09456364e-01  6.97674127e-01
  6.94041911e-01  6.84653197e-01  6.80726338e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.61437828e-01  6.60085436e-01
  6.54482305e-01  6.41569678e-01  6.37725783e-01  6.24703814e-01
  6.16486021e-01  6.13829958e-01  6.10618816e-01  6.09446934e-01
  6.07042582e-01  5.97238791e-01  5.97238791e-01  5.96618709e-01
  5.86458850e-01  5.85631970e-01  5.75708572e-01  5.62840405e-01
  5.54200638e-01  5.50611114e-01  5.49423796e-01  5.47148831e-01
  5.09841259e-01  5.09070637e-01  5.01045834e-01  5.00570726e-01
  5.00000000e-01  4.86122964e-01  4.82974705e-01  4.81346210e-01
  4.81346210e-01  4.78713554e-01  4.74965506e-01  4.70564370e-01
  4.63975018e-01  4.43708525e-01  4.41091985e-01  4.40053233e-01
  4.28460784e-01  4.24166805e-01  4.23293266e-01  4.15445


118. Best individuals (reward): [0.84768273 0.80920313 0.80505623 0.80265992 0.78375806 0.77494989
 0.75419722 0.73869801 0.7272698  0.72247283 0.70945636 0.69767413
 0.69404191 0.6846532  0.68072634 0.68072634 0.68026479 0.66143783
 0.66143783 0.66008544 0.6544823  0.65328148 0.64156968 0.63772578
 0.6363853  0.63480879 0.62470381 0.61648602 0.61382996 0.61061882
 0.60944693 0.60704258 0.59723879 0.59723879 0.59661871 0.58645885
 0.58563197 0.57570857 0.56284041 0.55618246 0.55420064 0.55061111
 0.5494238  0.54714883 0.52674352 0.50984126 0.50907064 0.50104583
 0.50057073 0.5        0.48612296 0.4829747  0.48134621 0.48134621
 0.47871355 0.47496551 0.47285113 0.47056437 0.46397502 0.44370852
 0.44109199 0.44005323 0.42846078 0.42416681]

119. Best individuals (reward): [0.84768273 0.80920313 0.80505623 0.80265992 0.78375806 0.77494989
 0.75419722 0.73869801 0.7272698  0.72247283 0.70945636 0.69767413
 0.69404191 0.6846532  0.68072634 0.68072634 0.68026479 0.66143783
 0.66143783 0.660


129. Best individuals (reward): [0.84768273 0.80920313 0.80505623 0.80265992 0.78375806 0.77494989
 0.75419722 0.73869801 0.7272698  0.72247283 0.70945636 0.69767413
 0.69404191 0.6846532  0.68072634 0.68072634 0.68026479 0.66143783
 0.66143783 0.66008544 0.6544823  0.65328148 0.64156968 0.63772578
 0.6363853  0.63480879 0.62470381 0.61648602 0.61382996 0.61237244
 0.61061882 0.60944693 0.60704258 0.59723879 0.59723879 0.59661871
 0.59322224 0.58645885 0.58645885 0.58563197 0.57570857 0.56284041
 0.55618246 0.55420064 0.55061111 0.5494238  0.54714883 0.54034554
 0.52674352 0.50984126 0.50907064 0.50104583 0.50057073 0.5
 0.48612296 0.4829747  0.48134621 0.48134621 0.48134621 0.47871355
 0.47496551 0.47285113 0.47056437 0.46397502]

130. Best individuals (reward): [0.84768273 0.80920313 0.80505623 0.80265992 0.78375806 0.77494989
 0.75419722 0.73869801 0.7272698  0.72247283 0.70945636 0.69767413
 0.69404191 0.6846532  0.68072634 0.68072634 0.68026479 0.66143783
 0.66143783 0.66008544 0


140. Best individuals (reward): [0.84768273 0.80920313 0.80505623 0.80265992 0.78375806 0.77494989
 0.75419722 0.73869801 0.7272698  0.72247283 0.70945636 0.69767413
 0.69404191 0.6846532  0.68072634 0.68072634 0.68026479 0.66143783
 0.66143783 0.66008544 0.6544823  0.65328148 0.64156968 0.63772578
 0.6363853  0.63480879 0.6317483  0.62470381 0.61648602 0.61382996
 0.61237244 0.61061882 0.60944693 0.60704258 0.59723879 0.59723879
 0.59661871 0.59322224 0.58955895 0.58645885 0.58645885 0.58563197
 0.57570857 0.56651487 0.56284041 0.559485   0.55618246 0.55420064
 0.55061111 0.5494238  0.54714883 0.54034554 0.52674352 0.50984126
 0.50907064 0.50104583 0.50104583 0.50057073 0.5        0.48612296
 0.4829747  0.48134621 0.48134621 0.48134621]

141. Best individuals (reward): [0.84768273 0.80920313 0.80505623 0.80265992 0.78375806 0.77494989
 0.75419722 0.73869801 0.7272698  0.72247283 0.70945636 0.69767413
 0.69404191 0.6846532  0.68072634 0.68072634 0.68026479 0.66143783
 0.66143783 0.660


146. Best individuals (reward): [ 8.47682734e-01  8.09203129e-01  8.05056228e-01  8.02659917e-01
  7.83758059e-01  7.74949886e-01  7.54197224e-01  7.38698012e-01
  7.27269796e-01  7.22472831e-01  7.09456364e-01  6.97674127e-01
  6.94041911e-01  6.84653197e-01  6.80726338e-01  6.80726338e-01
  6.80264795e-01  6.61437828e-01  6.61437828e-01  6.60085436e-01
  6.54482305e-01  6.53281482e-01  6.41569678e-01  6.37725783e-01
  6.36385297e-01  6.34808793e-01  6.31748299e-01  6.24703814e-01
  6.16486021e-01  6.13829958e-01  6.12372436e-01  6.10618816e-01
  6.09446934e-01  6.07042582e-01  5.97238791e-01  5.97238791e-01
  5.96618709e-01  5.93222236e-01  5.89558947e-01  5.86458850e-01
  5.86458850e-01  5.85631970e-01  5.75708572e-01  5.66514871e-01
  5.62840405e-01  5.59484999e-01  5.56182457e-01  5.54200638e-01
  5.50611114e-01  5.49423796e-01  5.47148831e-01  5.40345542e-01
  5.36190265e-01  5.26743523e-01  5.15388203e-01  5.09841259e-01
  5.09070637e-01  5.01045834e-01  5.01045834e-01  5.00570

KeyboardInterrupt: 