In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import multiprocessing as mp
import utils as utils
import time

In [None]:
mp.cpu_count()

In [None]:
tic = time.time()
p = mp.Pool(mp.cpu_count())    
p.map(utils.f, range(10));
print(time.time()-tic)

In [None]:
tic = time.time()
[utils.f(i) for i in range(10)]
print(time.time()-tic)

In [1]:
# Code to accompany the paper "Constructions in combinatorics via neural networks and LP solvers" by A Z Wagner
# Code for conjecture 2.1, without the use of numba 
#
# Please keep in mind that I am far from being an expert in reinforcement learning. 
# If you know what you are doing, you might be better off writing your own code.
#
# This code works on tensorflow version 1.14.0 and python version 3.6.3
# It mysteriously breaks on other versions of python.
# For later versions of tensorflow there seems to be a massive overhead in the predict function for some reason, and/or it produces mysterious errors.
# Debugging these was way above my skill level.
# If the code doesn't work, make sure you are using these versions of tf and python.
# I used keras version 2.3.1, not sure if this is important, but I recommend this just to be safe.

import networkx as nx #for various graph parameters, such as eigenvalues, macthing number, etc
import random
import numpy as np
import copy
import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, Adam
from keras.models import load_model
from statistics import mean
from math import sqrt
from numpy.random import choice
import pickle
import time
import math
import sympy
import matplotlib.pyplot as plt

In [3]:
from stabilizer_search.search.brute_force import *
from stabilizer_search.mat import X, Z, T
from stabilizer_search.mat import tensor
import randstab as rs

In [4]:
n_qubits = 4
chi = 4
H = [[np.cos(np.pi/8)],[np.sin(np.pi/8)]]
target_state = tensor(*([H]*n_qubits))

In [5]:
n_qubits = 4
chi = 2
H_state = [[np.cos(np.pi/8)],[np.sin(np.pi/8)]]
# target_state = tensor(*([H_state]*n_qubits))
target_state = (tensor(*([T_state]*n_qubits)) + tensor(*([T_perp_state]*n_qubits)))/np.sqrt(2)

In [6]:
# Daochen: is there a way of knowing the number of real stabilizer states? O randomly generating real 
print('number of all stabilizer states =', sum(rs.number_of_states(n_qubits)))
n_stabilizers_target = 20000

number of all stabilizer states = 36720


In [5]:
stabilizers = [rs.random_stabilizer_state(n_qubits) for i in range(n_stabilizers_target)]
L = {array.tobytes(): array for array in stabilizers}
unique_stabilizers = list(L.values()) # [array([1, 3, 2, 4]), array([1, 2, 3, 4])]
if is_target_state_real:
    unique_real_stabilizers = list(filter(lambda x: all(np.isreal(x)), unique_stabilizers))
    stabilizers = np.array(unique_real_stabilizers)
stabilizers = np.array(unique_stabilizers)
n_stabilizers = len(stabilizers)
# why can this be sometimes bigger than number of all stabilizer states???
print('number of stabilizer states considered =', n_stabilizers)

number of considered stabilizer states = 1979


In [6]:
# sanity check that the target is in the span
candidate_stabilizer_basis = [np.array([stabilizers[i,:]]).transpose() for i in range(n_stabilizers)]
projector = ortho_projector(candidate_stabilizer_basis)
projection = np.linalg.norm(projector*target_state, 2)

In [7]:
projection

1.0000000000000002

In [10]:
# DON'T DO THIS! E.g. say you had |0>, |1>, |+> and you killed off |+> because it's linearly dependent, then you get a bad stabilizer decomposition of |+>!
# _, inds = sympy.Matrix(candidate_stabilizer_states).T.rref()
# candidate_stabilizer_states = candidate_stabilizer_states[inds,:]
# n_candidates_actual = candidate_stabilizer_states.shape[0]

In [8]:
# Daochen: would be easier if could enforce the use of combinations
# MYN = int(2**n)  #The length of the word we are generating. Here we are generating a Boolean function on n bits, so we create a 0-1 word of length 2^n

MYN = int(chi)

# LEARNING_RATE = 0.0001 #Increase this to make convergence faster, decrease if the algorithm gets stuck in local optima too often.
LEARNING_RATE = 0.00001
n_sessions = 1000 #number of new sessions per iteration
# default 93, 94 respectively
percentile = 93 #top 100-X percentiled we are learning from
super_percentile = 97 #top 100-X percentile that survives to next iteration

# These are hyperparameters
FIRST_LAYER_NEURONS = 128 #Number of neurons in the hidden layers.
SECOND_LAYER_NEURONS = 64
THIRD_LAYER_NEURONS = 32

# n_actions = 2
# Daochen: note that this parameter is not actually used anywhere.
n_actions = n_stabilizers
#The size of the alphabet. In this file we will assume this is 2. There are a few things we need to change when the alphabet size is larger,
#such as one-hot encoding the input, and using categorical_crossentropy as a loss function.

observation_space = 2*MYN 

# Leave this at 2*MYN. The input vector will have size 2*MYN, 
# where the first MYN letters encode our partial word (with zeros on
# the positions we haven't considered yet), and the next MYN bits one-hot encode which letter we are considering now.
# So e.g. [0,1,0,0,   0,0,1,0] means we have the partial word 01 and we are considering the third letter now.
# Is there a better way to format the input to make it easier for the neural network to understand things?

# Daochen: why should len_game have anything to do with MYN
len_game = MYN 
state_dim = (observation_space,)

INF = 1000000

#Model structure: a sequential network with three hidden layers, sigmoid activation in the output.
#I usually used relu activation in the hidden layers but play around to see what activation function and what optimizer works best.
#It is important that the loss is binary cross-entropy if alphabet size is 2.

model = Sequential()
model.add(Dense(FIRST_LAYER_NEURONS,  activation="relu"))
model.add(Dense(SECOND_LAYER_NEURONS, activation="relu"))
model.add(Dense(THIRD_LAYER_NEURONS, activation="relu"))
model.add(Dense(n_stabilizers, activation="softmax"))
model.build((None, observation_space))
model.compile(loss="sparse_categorical_crossentropy", optimizer=Adam(learning_rate = LEARNING_RATE)) #Adam optimizer also works well, with lower learning rate

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               1152      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_3 (Dense)              (None, 1979)              65307     
Total params: 76,795
Trainable params: 76,795
Non-trainable params: 0
_________________________________________________________________
None


In [12]:
# candidate_stabilizer_basis = [np.array([candidate_stabilizer_states[i,:]]).transpose() for i in range(MYN)];

In [9]:
def calcScore(state):
    """
    Calculates the reward for a given word. 
    This function is very slow, it can be massively sped up with numba -- but numba doesn't support networkx yet, which is very convenient to use here
    :param state: the first MYN letters of this param are the word that the neural network has constructed.

    :returns: the reward (a real number). Higher is better, the network will try to maximize this.
    """

    f = state[:MYN];
    candidate_stabilizer_basis = [np.array([stabilizers[f[i],:]]).transpose() for i in range(MYN)]
#     print(candidate_stabilizer_basis)
    projector = ortho_projector(candidate_stabilizer_basis)
    projection = np.linalg.norm(projector*target_state, 2)
#     projection = 1
    
    score = projection
    target = score
    
    if np.allclose(score, 1):
        print('You found a stabilizer decomposition with (n_qubits,chi) = ', [n_qubits,chi])
        print('The set of stabilizers is: ', f)
        return -1, -1
    return target, score

####No need to change anything below here. 
# Daochen: the agent argument will be the "model"
def generate_session(agent, n_sessions, verbose = 1):
    """
    Play n_session games using agent neural network.
    Terminate when games finish 

    Code inspired by https://github.com/yandexdataschool/Practical_RL/blob/master/week01_intro/deep_crossentropy_method.ipynb
    """
    states =  np.zeros([n_sessions, observation_space, len_game], dtype=int)
    actions = np.zeros([n_sessions, len_game], dtype = int)
    state_next = np.zeros([n_sessions,observation_space], dtype = int)
    prob = np.zeros(n_sessions)
    states[:,MYN,0] = 1
    step = 0
    total_target = np.zeros([n_sessions])
#     total_target = np.zeros([n_sessions], dtype=complex)
    total_score = np.zeros([n_sessions])
    recordsess_time = 0
    play_time = 0
    scorecalc_time = 0
    pred_time = 0
    while (True):
        step += 1
        tic = time.time()
        prob = agent.predict(states[:,:,step-1], batch_size = n_sessions) 
        pred_time += time.time()-tic

        for i in range(n_sessions):
            action = choice(n_stabilizers, p=prob[i])
            actions[i][step-1] = action
            tic = time.time()
            state_next[i] = states[i,:,step-1]
            play_time += time.time()-tic
            if (action > 0):
                state_next[i][step-1] = action
            state_next[i][MYN + step-1] = 0
            if (step < MYN):
                state_next[i][MYN + step] = 1
#                 Daochen: terminal equals whether step equals MYN: I suppose meaning that an entire state has been generated
            terminal = step == MYN
            tic = time.time()
            if terminal:
#                 print('state_next[i]', state_next[i])
                total_target[i], total_score[i] = calcScore(state_next[i])
                if total_target[i] == -1:
                    return -1
#                 print("total_score", total_score[i])
            scorecalc_time += time.time()-tic
            tic = time.time()
            if not terminal:
                states[i,:,step] = state_next[i]
            recordsess_time += time.time()-tic
        if terminal:
            break
    #If you want, print out how much time each step has taken. This is useful to find the bottleneck in the program.		
    if (verbose):
        print("Predict: "+str(pred_time)+", play: " + str(play_time) +", scorecalc: " + str(scorecalc_time) +", recordsess: " + str(recordsess_time))
    return states, actions, total_score, total_target

def select_elites(states_batch, actions_batch, rewards_batch, percentile=50):
    """
    Select states and actions from games that have rewards >= percentile
    :param states_batch: list of lists of states, states_batch[session_i][t]
    :param actions_batch: list of lists of actions, actions_batch[session_i][t]
    :param rewards_batch: list of rewards, rewards_batch[session_i]

    :returns: elite_states,elite_actions, both 1D lists of states and respective actions from elite sessions

    This function was mostly taken from https://github.com/yandexdataschool/Practical_RL/blob/master/week01_intro/deep_crossentropy_method.ipynb
    If this function is the bottleneck, it can easily be sped up using numba
    """
    counter = n_sessions * (100.0 - percentile) / 100.0
    reward_threshold = np.percentile(rewards_batch,percentile)

    elite_states = []
    elite_actions = []
    elite_rewards = []
    for i in range(len(states_batch)):
        if rewards_batch[i] >= reward_threshold-0.0000001:
            if (counter > 0) or (rewards_batch[i] >= reward_threshold+0.0000001):
                for item in states_batch[i]:
                    elite_states.append(item.tolist())
                for item in actions_batch[i]:
                    elite_actions.append(item)
            counter -= 1
    elite_states = np.array(elite_states, dtype = int)
    elite_actions = np.array(elite_actions, dtype = int)
    return elite_states, elite_actions

def select_super_sessions(states_batch, actions_batch, rewards_batch, targets_batch, percentile=90):
    """
    Select all the sessions that will survive to the next generation
    Similar to select_elites function
    If this function is the bottleneck, it can easily be sped up using numba
    """
    counter = n_sessions * (100.0 - percentile) / 100.0
    reward_threshold = np.percentile(rewards_batch,percentile)

    super_states = []
    super_actions = []
    super_rewards = []
    super_targets = []
    for i in range(len(states_batch)):
        if rewards_batch[i] >= reward_threshold-0.0000001:
            if (counter > 0) or (rewards_batch[i] >= reward_threshold+0.0000001):
                super_states.append(states_batch[i])
                super_actions.append(actions_batch[i])
                super_rewards.append(rewards_batch[i])
                super_targets.append(targets_batch[i])
                counter -= 1
    super_states = np.array(super_states, dtype = int)
    super_actions = np.array(super_actions, dtype = int)
    super_rewards = np.array(super_rewards)
    super_targets = np.array(super_targets)
    return super_states, super_actions, super_rewards, super_targets

In [10]:
super_states =  np.empty((0,len_game,observation_space), dtype = int)
super_actions = np.array([], dtype = int)
super_rewards = np.array([])
super_targets= np.array([])
sessgen_time = 0
fit_time = 0
score_time = 0

myRand = random.randint(0,1000) #used in the filename
index = 0 #used to index generation

In [15]:
for i in range(5000): #1000000 generations should be plenty
    #generate new sessions
    #performance can be improved with joblib
    tic = time.time()
#     sessions = states, actions, total_score, total_target
    sessions = generate_session(model,n_sessions,0) #change 0 to 1 to print out how much time each step in generate_session takes 
    if sessions == -1:
        break
    sessgen_time = time.time()-tic
    tic = time.time()

    states_batch = np.array(sessions[0], dtype = int)
    actions_batch = np.array(sessions[1], dtype = int)
    rewards_batch = np.array(sessions[2])
    targets_batch = np.array(sessions[3])
    
    states_batch = np.transpose(states_batch,axes=[0,2,1])
    states_batch = np.append(states_batch,super_states,axis=0)

    if i>0:
        actions_batch = np.append(actions_batch,np.array(super_actions),axis=0)	
    
    rewards_batch = np.append(rewards_batch,super_rewards)
    targets_batch = np.append(targets_batch,super_targets)

    randomcomp_time = time.time()-tic 
    tic = time.time()

    elite_states, elite_actions = select_elites(states_batch, actions_batch, rewards_batch, percentile=percentile) #pick the sessions to learn from
    select1_time = time.time()-tic

    tic = time.time()
    super_sessions = select_super_sessions(states_batch, actions_batch, rewards_batch, targets_batch, percentile=super_percentile) #pick the sessions to survive
    select2_time = time.time()-tic

    tic = time.time()
    super_sessions = [(super_sessions[0][i], super_sessions[1][i], super_sessions[2][i], super_sessions[3][i]) for i in range(len(super_sessions[2]))]
    super_sessions.sort(key=lambda super_sessions: super_sessions[2],reverse=True)
    select3_time = time.time()-tic

    tic = time.time()
    model.fit(elite_states, elite_actions, verbose=0) #learn from the elite sessions
    fit_time = time.time()-tic

    tic = time.time()

    super_states = [super_sessions[i][0] for i in range(len(super_sessions))]
    super_actions = [super_sessions[i][1] for i in range(len(super_sessions))]
    super_rewards = [super_sessions[i][2] for i in range(len(super_sessions))]
    super_targets = [super_sessions[i][3] for i in range(len(super_sessions))]
    
    #     print(super_states)

    rewards_batch.sort()
#     Daochen: why is it -100?
    mean_all_reward = np.mean(rewards_batch[-100:])
    mean_best_reward = np.mean(super_rewards)

    score_time = time.time()-tic

    
#Aarthi: Formatting output for logs and display. PLEASE DON'T DELETE CODE. Comment it out if not needed so it doesn't get lost and need to be re-written each time
    print("\n" + str(i) +  ". Best individuals (reward): " + str(np.flip(np.sort(super_rewards))))
#     Daochen: note that sometimes it makes sense to add/remove np.flip below
    print("\n" + str(i) +  ". Best individuals (target): " + str(np.flip(np.sort(super_targets))))

    #uncomment below line to print out how much time each step in this loop takes. 
    print("Mean reward: " + str(mean_all_reward) + "\nSessgen: " + str(sessgen_time) + ", other: " + str(randomcomp_time) + ", select1: " + str(select1_time) + ", select2: " + str(select2_time) + ", select3: " + str(select3_time) +  ", fit: " + str(fit_time) + ", score: " + str(score_time))
    
    with open('Run'+str(myRand)+'.txt', 'a') as f:
        f.write("\n" + str(i) +  ". Best individuals (reward): " + str(np.flip(np.sort(super_rewards))))
        f.write("\n" + str(i) +  ". Best individuals (target): " + str(np.sort(super_targets)))
        f.write("Mean reward: " + str(mean_all_reward) + "\nSessgen: " + str(sessgen_time) + ", other: " + str(randomcomp_time) + ", select1: " + str(select1_time) + ", select2: " + str(select2_time) + ", select3: " + str(select3_time) +  ", fit: " + str(fit_time) + ", score: " + str(score_time)+"\n")

    if (i%20 == 1): #Write all important info to files every 20 iterations
#         with open('best_species_pickle_'+str(myRand)+'.txt', 'wb') as fp:
#             pickle.dump(super_actions, fp)
        with open('super_actions_'+str(myRand)+'.txt', 'a') as f:
            f.write("At i = "+str(i)+":"+"\n")
            for item in super_actions:
                f.write(str(item))
                f.write("\n")
        with open('super_rewards_'+str(myRand)+'.txt', 'a') as f:
            f.write("At i = "+str(i)+":"+"\n")
            for item in super_rewards:
                f.write(str(item))
                f.write("\n")
        with open('mean_rewards_'+str(myRand)+'.txt', 'a') as f:
             f.write("At i = "+str(i)+": "+ str(mean_all_reward)+"\n")
        with open('mean_best_rewards_'+str(myRand)+'.txt', 'a') as f:
             f.write("At i = "+str(i)+": "+ str(mean_best_reward)+"\n")


0. Best individuals (reward): [0.82591667 0.7900654  0.7823587  0.78089519 0.77846915 0.7743958
 0.77283455 0.77153396 0.77052333 0.76582526 0.75688207 0.74682724
 0.7465197  0.74264776 0.74023465 0.73002377 0.71702947 0.71299977
 0.71299977 0.71224277 0.70710678 0.70605375 0.70605375 0.70465908
 0.69637403 0.69052284 0.68718921 0.68602884 0.68540342 0.68405053]

0. Best individuals (target): [0.82591667 0.7900654  0.7823587  0.78089519 0.77846915 0.7743958
 0.77283455 0.77153396 0.77052333 0.76582526 0.75688207 0.74682724
 0.7465197  0.74264776 0.74023465 0.73002377 0.71702947 0.71299977
 0.71299977 0.71224277 0.70710678 0.70605375 0.70605375 0.70465908
 0.69637403 0.69052284 0.68718921 0.68602884 0.68540342 0.68405053]
Mean reward: 0.6619758610458323
Sessgen: 6.53383207321167, other: 0.0, select1: 0.004628896713256836, select2: 0.0, select3: 0.0, fit: 1.4876270294189453, score: 0.0

1. Best individuals (reward): [0.82591667 0.7900654  0.7823587  0.7823587  0.78089519 0.77846915
 0.7


9. Best individuals (reward): [0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.83545601 0.82591667 0.82591667 0.81340183 0.7900654  0.7900654
 0.7900654  0.7900654  0.7900654  0.78965667 0.78603507 0.78574758
 0.7823587  0.7823587  0.7823587  0.7823587  0.7823587  0.78089519
 0.78089519 0.77846915 0.7743958  0.7743958  0.77283455 0.77283455]

9. Best individuals (target): [0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.83545601 0.82591667 0.82591667 0.81340183 0.7900654  0.7900654
 0.7900654  0.7900654  0.7900654  0.78965667 0.78603507 0.78574758
 0.7823587  0.7823587  0.7823587  0.7823587  0.7823587  0.78089519
 0.78089519 0.77846915 0.7743958  0.7743958  0.77283455 0.77283455]
Mean reward: 0.7219566648546666
Sessgen: 6.048006296157837, other: 0.0009982585906982422, select1: 0.0063533782958984375, select2: 0.0023169517517089844, select3: 0.0, fit: 0.1676781177520752, score: 0.0

10. Best individuals (reward): [0.85751384 0.85751384 0.8575138


18. Best individuals (reward): [0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.83545601
 0.83545601 0.8299668  0.82591667 0.82591667 0.82496145 0.82379897
 0.81669309 0.81340183 0.81328204 0.80670211 0.80487557 0.80257474
 0.79289112 0.7900654  0.7900654  0.7900654  0.7900654  0.7900654
 0.7900654 ]

18. Best individuals (target): [0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.83545601
 0.83545601 0.8299668  0.82591667 0.82591667 0.82496145 0.82379897
 0.81669309 0.81340183 0.81328204 0.80670211 0.80487557 0.80257474
 0.79289112 0.7900654  0.7900654  0.7900654  0.7900654  0.7900654
 0.7900654 ]
Mean reward: 0.7401206417659217
Sessgen: 7.552574634552002, other: 0.0, select1: 0.0055234432220458984, select2: 0.0, select3: 0.0, fit: 0.19816827774047852, score: 0.0

19. Best individuals (reward): [0.85751384 0.85751384 0.85751384 0.85751


27. Best individuals (reward): [0.87263064 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.83545601 0.83545601 0.83545601 0.83139288 0.83139288
 0.83139288 0.83139288 0.83011011 0.8299668  0.82591667 0.82591667
 0.82591667 0.82512803 0.82512803 0.82512803 0.82512803 0.82512803
 0.82512803]

27. Best individuals (target): [0.87263064 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.83545601 0.83545601 0.83545601 0.83139288 0.83139288
 0.83139288 0.83139288 0.83011011 0.8299668  0.82591667 0.82591667
 0.82591667 0.82512803 0.82512803 0.82512803 0.82512803 0.82512803
 0.82512803]
Mean reward: 0.7700834683974921
Sessgen: 7.73996114730835, other: 0.0, select1: 0.004164218902587891, select2: 0.0, select3: 0.0, fit: 0.18094921112060547, score: 0.0

28. Best individuals (reward): [0.87263064 0.85751384 0.85751384 0.85751


36. Best individuals (reward): [0.87263064 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.83633011 0.83545601 0.83545601
 0.83545601 0.83545601 0.83545601 0.83545601 0.83545601 0.83139288
 0.83139288 0.83139288 0.83139288 0.83139288 0.83139288 0.83139288]

36. Best individuals (target): [0.87263064 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.83633011 0.83545601 0.83545601
 0.83545601 0.83545601 0.83545601 0.83545601 0.83545601 0.83139288
 0.83139288 0.83139288 0.83139288 0.83139288 0.83139288 0.83139288]
Mean reward: 0.775709943515539
Sessgen: 7.451101779937744, other: 0.0, select1: 0.0062274932861328125, select2: 0.003999948501586914, select3: 0.0, fit: 0.2077336311340332, score: 0.0

37. Best individuals (reward): [0.87263064 0.85751384 0.85751384 0.85751384 0.8


45. Best individuals (reward): [0.88813808 0.87263064 0.86588376 0.86350691 0.85981201 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85445029 0.85097735
 0.84062824 0.83633011 0.83614083 0.83545601 0.83545601 0.83545601
 0.83545601]

45. Best individuals (target): [0.88813808 0.87263064 0.86588376 0.86350691 0.85981201 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85445029 0.85097735
 0.84062824 0.83633011 0.83614083 0.83545601 0.83545601 0.83545601
 0.83545601]
Mean reward: 0.7754737408900119
Sessgen: 7.291345834732056, other: 0.0, select1: 0.010705709457397461, select2: 0.0, select3: 0.0, fit: 0.18487095832824707, score: 0.003795146942138672

46. Best individuals (reward): [0.88813808 0.87263064 


54. Best individuals (reward): [0.88813808 0.88459188 0.87263064 0.86604355 0.86588376 0.86454464
 0.86350691 0.85987947 0.85981201 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85720981
 0.85505209]

54. Best individuals (target): [0.88813808 0.88459188 0.87263064 0.86604355 0.86588376 0.86454464
 0.86350691 0.85987947 0.85981201 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85720981
 0.85505209]
Mean reward: 0.7836121128729832
Sessgen: 5.826666593551636, other: 0.001003265380859375, select1: 0.0, select2: 0.004178285598754883, select3: 0.0, fit: 0.1502542495727539, score: 0.0

55. Best individuals (reward): [0.88813808 0.88459188 0


63. Best individuals (reward): [0.90299286 0.90081071 0.88813808 0.88459188 0.874636   0.87263064
 0.86745773 0.86623413 0.86604355 0.86604355 0.86588376 0.86454464
 0.86350691 0.85987947 0.85981201 0.85892071 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384]

63. Best individuals (target): [0.90299286 0.90081071 0.88813808 0.88459188 0.874636   0.87263064
 0.86745773 0.86623413 0.86604355 0.86604355 0.86588376 0.86454464
 0.86350691 0.85987947 0.85981201 0.85892071 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.85751384]
Mean reward: 0.796078886212295
Sessgen: 5.888380765914917, other: 0.0, select1: 0.002809286117553711, select2: 0.003935337066650391, select3: 0.0, fit: 0.1531362533569336, score: 0.0009982585906982422

64. Best individuals (reward): [0.9029


72. Best individuals (reward): [0.91817706 0.90299286 0.90081071 0.88813808 0.88459188 0.874636
 0.87263064 0.87036556 0.86745773 0.86726029 0.86623413 0.86604355
 0.86604355 0.86604355 0.86604355 0.86604355 0.86588376 0.86454464
 0.86350691 0.85987947 0.85987145 0.85987145 0.85981201 0.85892071
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.8546298 ]

72. Best individuals (target): [0.91817706 0.90299286 0.90081071 0.88813808 0.88459188 0.874636
 0.87263064 0.87036556 0.86745773 0.86726029 0.86623413 0.86604355
 0.86604355 0.86604355 0.86604355 0.86604355 0.86588376 0.86454464
 0.86350691 0.85987947 0.85987145 0.85987145 0.85981201 0.85892071
 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384 0.85751384
 0.8546298 ]
Mean reward: 0.7987873322756865
Sessgen: 6.33967924118042, other: 0.0, select1: 0.0, select2: 0.004174470901489258, select3: 0.0, fit: 0.17007660865783691, score: 0.0010046958923339844

73. Best individuals (reward): [0.91817706 0.90299286 0.90


81. Best individuals (reward): [0.91817706 0.90299286 0.90081071 0.88813808 0.88547828 0.88459188
 0.88371    0.87788383 0.874636   0.87277118 0.87263064 0.87036556
 0.86745773 0.86726029 0.86623413 0.86604355 0.86604355 0.86604355
 0.86604355 0.86604355 0.86604355 0.86604355 0.86604355 0.86588376
 0.86454464 0.86350691 0.86348066 0.85987947 0.85987145 0.85987145]

81. Best individuals (target): [0.91817706 0.90299286 0.90081071 0.88813808 0.88547828 0.88459188
 0.88371    0.87788383 0.874636   0.87277118 0.87263064 0.87036556
 0.86745773 0.86726029 0.86623413 0.86604355 0.86604355 0.86604355
 0.86604355 0.86604355 0.86604355 0.86604355 0.86604355 0.86588376
 0.86454464 0.86350691 0.86348066 0.85987947 0.85987145 0.85987145]
Mean reward: 0.8124785079324771
Sessgen: 5.753612995147705, other: 0.001007080078125, select1: 0.005911588668823242, select2: 0.003204822540283203, select3: 0.0, fit: 0.12115359306335449, score: 0.0

82. Best individuals (reward): [0.91817706 0.90299286 0.90081071


90. Best individuals (reward): [0.95470293 0.92291179 0.91817706 0.90299286 0.90081071 0.88813808
 0.88547828 0.88459188 0.88371    0.88020349 0.87993471 0.87788383
 0.87613691 0.87482726 0.874636   0.87277118 0.87263064 0.87036556
 0.86745773 0.86726029 0.86623413 0.86604355 0.86604355 0.86604355
 0.86604355 0.86604355 0.86604355 0.86604355 0.86604355 0.86604355]

90. Best individuals (target): [0.95470293 0.92291179 0.91817706 0.90299286 0.90081071 0.88813808
 0.88547828 0.88459188 0.88371    0.88020349 0.87993471 0.87788383
 0.87613691 0.87482726 0.874636   0.87277118 0.87263064 0.87036556
 0.86745773 0.86726029 0.86623413 0.86604355 0.86604355 0.86604355
 0.86604355 0.86604355 0.86604355 0.86604355 0.86604355 0.86604355]
Mean reward: 0.8137028318914062
Sessgen: 5.579805135726929, other: 0.0, select1: 0.004167079925537109, select2: 0.0, select3: 0.0, fit: 0.15011167526245117, score: 0.0

91. Best individuals (reward): [0.95470293 0.92291179 0.91817706 0.90299286 0.90081071 0.888138


99. Best individuals (reward): [0.95470293 0.92291179 0.91817706 0.90299286 0.90199628 0.90081071
 0.88813808 0.88547828 0.88502214 0.88459188 0.88371    0.88020349
 0.87993471 0.87788383 0.87687981 0.87613691 0.87482726 0.874636
 0.87277118 0.87263064 0.87036556 0.87036556 0.86745773 0.86726029
 0.86637879 0.86623413 0.86604355 0.86604355 0.86604355 0.86604355]

99. Best individuals (target): [0.95470293 0.92291179 0.91817706 0.90299286 0.90199628 0.90081071
 0.88813808 0.88547828 0.88502214 0.88459188 0.88371    0.88020349
 0.87993471 0.87788383 0.87687981 0.87613691 0.87482726 0.874636
 0.87277118 0.87263064 0.87036556 0.87036556 0.86745773 0.86726029
 0.86637879 0.86623413 0.86604355 0.86604355 0.86604355 0.86604355]
Mean reward: 0.813784610916991
Sessgen: 5.770811557769775, other: 0.0, select1: 0.004170417785644531, select2: 0.0, select3: 0.0, fit: 0.15006303787231445, score: 0.0

100. Best individuals (reward): [0.95470293 0.92291179 0.91817706 0.90299286 0.90199628 0.90081071
 


108. Best individuals (reward): [0.95470293 0.92291179 0.91817706 0.90689399 0.90299286 0.90199628
 0.90104757 0.90081071 0.89555999 0.88813808 0.88773712 0.88547828
 0.88502214 0.88459188 0.88371    0.88235673 0.88020349 0.87993471
 0.87896226 0.87788383 0.87765591 0.87687981 0.87613691 0.87482726
 0.874636   0.87277118 0.87263064 0.87036556 0.87036556 0.86745773
 0.86726029]

108. Best individuals (target): [0.95470293 0.92291179 0.91817706 0.90689399 0.90299286 0.90199628
 0.90104757 0.90081071 0.89555999 0.88813808 0.88773712 0.88547828
 0.88502214 0.88459188 0.88371    0.88235673 0.88020349 0.87993471
 0.87896226 0.87788383 0.87765591 0.87687981 0.87613691 0.87482726
 0.874636   0.87277118 0.87263064 0.87036556 0.87036556 0.86745773
 0.86726029]
Mean reward: 0.8210829071149445
Sessgen: 4.0085039138793945, other: 0.0, select1: 0.0, select2: 0.0, select3: 0.0, fit: 0.0802755355834961, score: 0.0

109. Best individuals (reward): [0.95470293 0.92291179 0.91817706 0.90689399 0.9029928


117. Best individuals (reward): [0.95470293 0.93390028 0.92291179 0.92279075 0.91817706 0.90689399
 0.90299286 0.90199628 0.90104757 0.90081071 0.89989077 0.89975107
 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107
 0.89975107 0.89975107 0.89975107 0.8964114  0.89555999 0.89256702
 0.89256702 0.89256702 0.88813808 0.88773712 0.8861232  0.8861232
 0.88547828]

117. Best individuals (target): [0.95470293 0.93390028 0.92291179 0.92279075 0.91817706 0.90689399
 0.90299286 0.90199628 0.90104757 0.90081071 0.89989077 0.89975107
 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107
 0.89975107 0.89975107 0.89975107 0.8964114  0.89555999 0.89256702
 0.89256702 0.89256702 0.88813808 0.88773712 0.8861232  0.8861232
 0.88547828]
Mean reward: 0.8550594769847895
Sessgen: 5.197877407073975, other: 0.0039403438568115234, select1: 0.0022745132446289062, select2: 0.001764535903930664, select3: 0.0, fit: 0.1469733715057373, score: 0.0

118. Best individuals (reward): [0.9


126. Best individuals (reward): [0.95470293 0.93390028 0.92291179 0.92279075 0.91817706 0.90689399
 0.90549458 0.90299286 0.90199628 0.90130619 0.90104757 0.90092007
 0.90092007 0.90092007 0.90092007 0.90081071 0.89989077 0.89975107
 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107
 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107
 0.89975107]

126. Best individuals (target): [0.95470293 0.93390028 0.92291179 0.92279075 0.91817706 0.90689399
 0.90549458 0.90299286 0.90199628 0.90130619 0.90104757 0.90092007
 0.90092007 0.90092007 0.90092007 0.90081071 0.89989077 0.89975107
 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107
 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107
 0.89975107]
Mean reward: 0.8608647322051542
Sessgen: 5.698013782501221, other: 0.0, select1: 0.0009984970092773438, select2: 0.0, select3: 0.0, fit: 0.1583080291748047, score: 0.0

127. Best individuals (reward): [0.95470293 0.93390028 0.92291179 0.9


135. Best individuals (reward): [0.95470293 0.94356669 0.93390028 0.92291179 0.92279075 0.91817706
 0.90689399 0.90549458 0.90299286 0.90199628 0.90130619 0.90104757
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90081071 0.90033598
 0.89989077 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107]

135. Best individuals (target): [0.95470293 0.94356669 0.93390028 0.92291179 0.92279075 0.91817706
 0.90689399 0.90549458 0.90299286 0.90199628 0.90130619 0.90104757
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90081071 0.90033598
 0.89989077 0.89975107 0.89975107 0.89975107 0.89975107 0.89975107]
Mean reward: 0.8567714763413489
Sessgen: 4.53678297996521, other: 0.0, select1: 0.003942012786865234, select2: 0.0, select3: 0.0, fit: 0.09742236137390137, score: 0.0

136. Best individuals (reward): [0.95470293 0.94356669 0.93390028 0.92291179 0.92279075 0.9181


144. Best individuals (reward): [0.95470293 0.94356669 0.93390028 0.92291179 0.92279075 0.91817706
 0.90689399 0.90549458 0.90299286 0.90284315 0.90199628 0.90130619
 0.90104757 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90081071 0.90033598
 0.89989077]

144. Best individuals (target): [0.95470293 0.94356669 0.93390028 0.92291179 0.92279075 0.91817706
 0.90689399 0.90549458 0.90299286 0.90284315 0.90199628 0.90130619
 0.90104757 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90081071 0.90033598
 0.89989077]
Mean reward: 0.867590713312724
Sessgen: 4.400766372680664, other: 0.0, select1: 0.003998756408691406, select2: 0.0018243789672851562, select3: 0.0, fit: 0.15164923667907715, score: 0.0

145. Best individuals (reward): [0.95470293 0.943566


153. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.93390028 0.92291179 0.92291179
 0.92279075 0.91817706 0.91405598 0.90689399 0.90549458 0.90299286
 0.90284315 0.90199628 0.90130619 0.90104757 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007]

153. Best individuals (target): [0.96632247 0.95470293 0.94356669 0.93390028 0.92291179 0.92291179
 0.92279075 0.91817706 0.91405598 0.90689399 0.90549458 0.90299286
 0.90284315 0.90199628 0.90130619 0.90104757 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007]
Mean reward: 0.8639046882692492
Sessgen: 5.738512754440308, other: 0.0, select1: 0.006134033203125, select2: 0.004578113555908203, select3: 0.0, fit: 0.18464207649230957, score: 0.0

154. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.93390028 0.


162. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.93390028 0.92291179 0.92291179
 0.92279075 0.91817706 0.91405598 0.91405598 0.90689399 0.90607007
 0.90549458 0.90501449 0.90299286 0.90284315 0.90199628 0.90130619
 0.90104757 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007]

162. Best individuals (target): [0.96632247 0.95470293 0.94356669 0.93390028 0.92291179 0.92291179
 0.92279075 0.91817706 0.91405598 0.91405598 0.90689399 0.90607007
 0.90549458 0.90501449 0.90299286 0.90284315 0.90199628 0.90130619
 0.90104757 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.90092007]
Mean reward: 0.8647630745043753
Sessgen: 6.155763387680054, other: 0.0, select1: 0.006452798843383789, select2: 0.001051187515258789, select3: 0.0, fit: 0.14665699005126953, score: 0.0

163. Best individuals (reward): [0.96632247 0.954702


171. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.93390028 0.92291179 0.92291179
 0.92279075 0.91817706 0.91455844 0.91405598 0.91405598 0.91186324
 0.90689399 0.90607007 0.90549458 0.90501449 0.90299286 0.90284315
 0.90225911 0.90206702 0.90199628 0.90130619 0.90104757 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.8861232 ]

171. Best individuals (target): [0.96632247 0.95470293 0.94356669 0.93390028 0.92291179 0.92291179
 0.92279075 0.91817706 0.91455844 0.91405598 0.91405598 0.91186324
 0.90689399 0.90607007 0.90549458 0.90501449 0.90299286 0.90284315
 0.90225911 0.90206702 0.90199628 0.90130619 0.90104757 0.90092007
 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007 0.90092007
 0.8861232 ]
Mean reward: 0.8627745295517975
Sessgen: 5.823999643325806, other: 0.0009932518005371094, select1: 0.0020508766174316406, select2: 0.0, select3: 0.0, fit: 0.15518951416015625, score: 0.0

172. Best individuals (reward): [0.96632247 0.9547


180. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.93390028 0.92291179 0.92291179
 0.92279075 0.91817706 0.91455844 0.91405598 0.91405598 0.91405598
 0.91186324 0.90689399 0.90607007 0.90573379 0.90549458 0.90501449
 0.90299286 0.90284315 0.90228567 0.90225911 0.90225911 0.90206702
 0.90199628 0.90130619 0.90104757 0.90092007 0.90092007 0.90092007]

180. Best individuals (target): [0.96632247 0.95470293 0.94356669 0.93390028 0.92291179 0.92291179
 0.92279075 0.91817706 0.91455844 0.91405598 0.91405598 0.91405598
 0.91186324 0.90689399 0.90607007 0.90573379 0.90549458 0.90501449
 0.90299286 0.90284315 0.90228567 0.90225911 0.90225911 0.90206702
 0.90199628 0.90130619 0.90104757 0.90092007 0.90092007 0.90092007]
Mean reward: 0.8670258995440814
Sessgen: 5.918442487716675, other: 0.0, select1: 0.004170656204223633, select2: 0.003957509994506836, select3: 0.0, fit: 0.15486645698547363, score: 0.0

181. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.93390028


189. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.93390028 0.93372698 0.92566811
 0.92291179 0.92291179 0.92279075 0.9186918  0.91817706 0.91455844
 0.91405598 0.91405598 0.91405598 0.91405598 0.9137489  0.91186324
 0.90689399 0.90607007 0.90573379 0.90549458 0.90501449 0.90299286
 0.90284315 0.9026584  0.90228567 0.90225911 0.90225911 0.90225911
 0.90206702]

189. Best individuals (target): [0.96632247 0.95470293 0.94356669 0.93390028 0.93372698 0.92566811
 0.92291179 0.92291179 0.92279075 0.9186918  0.91817706 0.91455844
 0.91405598 0.91405598 0.91405598 0.91405598 0.9137489  0.91186324
 0.90689399 0.90607007 0.90573379 0.90549458 0.90501449 0.90299286
 0.90284315 0.9026584  0.90228567 0.90225911 0.90225911 0.90225911
 0.90206702]
Mean reward: 0.8614728789391778
Sessgen: 5.871695280075073, other: 0.0, select1: 0.0, select2: 0.0041790008544921875, select3: 0.0, fit: 0.15790200233459473, score: 0.0

190. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.


198. Best individuals (reward): [0.96632247 0.95470293 0.94356669 0.94350934 0.93390028 0.93372698
 0.93067058 0.92869745 0.92869745 0.92566811 0.92346349 0.92291179
 0.92291179 0.92279075 0.92221992 0.9199782  0.91940981 0.91940981
 0.91940981 0.91940981 0.9186918  0.91820959 0.91820959 0.91820959
 0.91817706 0.91594324 0.91594324 0.91594324 0.91455844 0.91405598]

198. Best individuals (target): [0.96632247 0.95470293 0.94356669 0.94350934 0.93390028 0.93372698
 0.93067058 0.92869745 0.92869745 0.92566811 0.92346349 0.92291179
 0.92291179 0.92279075 0.92221992 0.9199782  0.91940981 0.91940981
 0.91940981 0.91940981 0.9186918  0.91820959 0.91820959 0.91820959
 0.91817706 0.91594324 0.91594324 0.91594324 0.91455844 0.91405598]
Mean reward: 0.8979366429362804
Sessgen: 6.004518985748291, other: 0.0, select1: 0.004185676574707031, select2: 0.0, select3: 0.0, fit: 0.1494002342224121, score: 0.0

199. Best individuals (reward): [0.96632247 0.96082867 0.95470293 0.94356669 0.94350934 0.9339


207. Best individuals (reward): [0.96632247 0.96082867 0.96082867 0.96082867 0.95470293 0.95093899
 0.94493278 0.94493278 0.94493278 0.94356669 0.94350934 0.93390028
 0.93372698 0.93067058 0.93067058 0.93067058 0.92869745 0.92869745
 0.92869745 0.92803324 0.92803324 0.92741498 0.92566811 0.92346349
 0.92346349 0.92291179 0.92291179 0.92279075 0.92221992 0.92221992
 0.92221992]

207. Best individuals (target): [0.96632247 0.96082867 0.96082867 0.96082867 0.95470293 0.95093899
 0.94493278 0.94493278 0.94493278 0.94356669 0.94350934 0.93390028
 0.93372698 0.93067058 0.93067058 0.93067058 0.92869745 0.92869745
 0.92869745 0.92803324 0.92803324 0.92741498 0.92566811 0.92346349
 0.92346349 0.92291179 0.92291179 0.92279075 0.92221992 0.92221992
 0.92221992]
Mean reward: 0.9067191938899665
Sessgen: 5.031508922576904, other: 0.0, select1: 0.006511211395263672, select2: 0.0, select3: 0.0, fit: 0.14688730239868164, score: 0.0

208. Best individuals (reward): [0.96632247 0.96082867 0.96082867 0.9


216. Best individuals (reward): [0.96632247 0.96082867 0.96082867 0.96082867 0.96082867 0.96082867
 0.96082867 0.96082867 0.95838627 0.95470293 0.95093899 0.95093899
 0.95093899 0.95093899 0.95093899 0.95093899 0.94493278 0.94493278
 0.94493278 0.94493278 0.94356669 0.94350934 0.94350934 0.94350934
 0.94350934 0.94350934 0.94350934 0.93390028 0.93372698 0.93076674
 0.93076674]

216. Best individuals (target): [0.96632247 0.96082867 0.96082867 0.96082867 0.96082867 0.96082867
 0.96082867 0.96082867 0.95838627 0.95470293 0.95093899 0.95093899
 0.95093899 0.95093899 0.95093899 0.95093899 0.94493278 0.94493278
 0.94493278 0.94493278 0.94356669 0.94350934 0.94350934 0.94350934
 0.94350934 0.94350934 0.94350934 0.93390028 0.93372698 0.93076674
 0.93076674]
Mean reward: 0.9154740646067109
Sessgen: 5.891501426696777, other: 0.0, select1: 0.003992319107055664, select2: 0.0, select3: 0.0, fit: 0.1552591323852539, score: 0.0

217. Best individuals (reward): [0.96632247 0.96082867 0.96082867 0.96


225. Best individuals (reward): [0.96632247 0.96082867 0.96082867 0.96082867 0.96082867 0.96082867
 0.96082867 0.96082867 0.96082867 0.96082867 0.95838627 0.95838627
 0.95838627 0.95838627 0.95838627 0.95838627 0.95470293 0.95093899
 0.95093899 0.95093899 0.95093899 0.95093899 0.95093899 0.94493278
 0.94493278 0.94493278 0.94493278 0.94493278 0.94493278 0.94493278]

225. Best individuals (target): [0.96632247 0.96082867 0.96082867 0.96082867 0.96082867 0.96082867
 0.96082867 0.96082867 0.96082867 0.96082867 0.95838627 0.95838627
 0.95838627 0.95838627 0.95838627 0.95838627 0.95470293 0.95093899
 0.95093899 0.95093899 0.95093899 0.95093899 0.95093899 0.94493278
 0.94493278 0.94493278 0.94493278 0.94493278 0.94493278 0.94493278]
Mean reward: 0.9190932563802714
Sessgen: 6.09162163734436, other: 0.0, select1: 0.006136655807495117, select2: 0.0021746158599853516, select3: 0.0, fit: 0.14829182624816895, score: 0.0

226. Best individuals (reward): [0.96632247 0.96082867 0.96082867 0.96082867