# Optimization of the snake parameters

The goal is to find the best parametrization for Cobra, i.e get the parameters that make the learninship as efficient as possible. First let import the files we need : 

In [270]:
import os
import sys
import math
import pygame
import random
import time

import tensorflow
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
ù
from collections import deque

import numpy as np

from Env import Env
from Viper import Viper

### To delete :

In [240]:
class Cobra :
    def __init__(self, epsilon_decay = 0.995, learning_rate = 0.001, batch_size = 32):
        
        self.memory  = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate
        self.batch_size = batch_size

        self.model = self.create_model()
        # "hack" implemented by DeepMind to improve convergence
        self.target_model = self.create_model()

    def create_model(self):
        model = Sequential()
        model.add(Dense(10, input_dim=8, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(4, activation='linear'))
        model.compile(loss="mean_squared_error", optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, new_state, done):
        self.memory.append([state, action, reward, new_state, done])

    def replay(self):
        batch_size = 32
        if len(self.memory) < batch_size:
            return

        samples = random.sample(self.memory, batch_size)
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(new_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)

    def target_train(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i]
        self.target_model.set_weights(target_weights)


    def act(self, state):
        # Exploration
        if np.random.random() < self.epsilon:
            return random.randint(0,3)

        return np.argmax(self.model.predict(state)[0])

In [251]:
# Cobra main function
def cobra_main(dqn_agent = Cobra(), nb_gens=10, max_action=500, show=True):
    """
    Learns and play Snake
    dqn_agent : Agent with preset
    nb_gens = number of games for training
    max_action = max number of action before next game
    show : True if you want to play with no randomness after training
    """
    env = Env(False)

    # Nb games played with no random move
    nb_games = 1

    # Recording score
    gen_scores = [0] * nb_gens
    real_scores = [0] * nb_games

    for trial in range(nb_gens + nb_games):
        print("Game : ", trial, " | Epsilon is ", dqn_agent.epsilon)

        # Does not show game until this end of the traning
        if trial == nb_gens and show:
            print("Traning Done")
            env.init_pygame()
            pygame.event.get()
            dqn_agent.epsilon = 0
            print('--- Cobra is playing ---')

        cur_state = np.array(env.reset()).reshape(1,8)
        for step in range(max_action):
            # Get the action from Cobra based on the current state
            action = dqn_agent.act(cur_state)

            # Applying move to environement to get a reward and a new state
            new_state, reward, done = env.step(action)

            # reward = reward if not done else -20
            new_state = np.array(new_state).reshape(1,8)

            # Remember the state, the move and the reward it got.
            dqn_agent.remember(cur_state, action, reward, new_state, done)

            cur_state = new_state
            # If has lost/won
            if done:
                dqn_agent.replay()
                dqn_agent.target_train()
                break
        if trial < nb_gens:
            gen_scores[trial] = env.score
        else :
            real_scores[trial-nb_gens] = env.score
    print("Done")
    return gen_scores, real_scores

### ----------------------------------------------------

To get the best parameters, you will train different Cobras using different values. The parameters we are going to modify are the followings :
- self.epsilon_decay
- self.learning_rate
- self.batch_size

First, we are looking for the best couple (epsilon_decay, learning_rate) :



In [215]:
l_epsilon_rate = np.linspace(0.75,0.999,5)
l_rate = np.linspace(0.0005,0.0015,5)
print(l_epsilon_rate)
print(l_rate)

matrix_param = np.array([l_epsilon_rate, l_rate]).T
print(matrix_param)

[0.75    0.81225 0.8745  0.93675 0.999  ]
[0.0005  0.00075 0.001   0.00125 0.0015 ]
[[7.5000e-01 5.0000e-04]
 [8.1225e-01 7.5000e-04]
 [8.7450e-01 1.0000e-03]
 [9.3675e-01 1.2500e-03]
 [9.9900e-01 1.5000e-03]]


In [216]:
matrix_cobra = []

for eps in l_epsilon_rate :
    cobra_line = []
    for rate in l_rate :
        cobra_line.append(Cobra(epsilon_decay = eps, learning_rate = rate))
    matrix_cobra.append(cobra_line)
print('done')

done


**matrix_cobra** now contains a 2D-array composed by cobra using different parameters :

In [248]:
matrix_cobra = np.array(matrix_cobra)

print(matrix_cobra)
print(matrix_cobra.shape)

[[<__main__.Cobra object at 0x7fb0a0ada690>
  <__main__.Cobra object at 0x7fb08557ded0>
  <__main__.Cobra object at 0x7fb086be19d0>
  <__main__.Cobra object at 0x7fb0a3cc7dd0>
  <__main__.Cobra object at 0x7fb0a0866d90>]
 [<__main__.Cobra object at 0x7fb0a4247f50>
  <__main__.Cobra object at 0x7fb087e32890>
  <__main__.Cobra object at 0x7fb0857b4f10>
  <__main__.Cobra object at 0x7fb0a0a0d990>
  <__main__.Cobra object at 0x7fb087e4eb50>]
 [<__main__.Cobra object at 0x7fb087ed43d0>
  <__main__.Cobra object at 0x7fb087e89810>
  <__main__.Cobra object at 0x7fb0a0ada7d0>
  <__main__.Cobra object at 0x7fb087260e90>
  <__main__.Cobra object at 0x7fb085472450>]
 [<__main__.Cobra object at 0x7fb0a3f9ba10>
  <__main__.Cobra object at 0x7fb0a2df8410>
  <__main__.Cobra object at 0x7fb0877e60d0>
  <__main__.Cobra object at 0x7fb0a31d4650>
  <__main__.Cobra object at 0x7fb087c35750>]
 [<__main__.Cobra object at 0x7fb0a2d80490>
  <__main__.Cobra object at 0x7fb0a44ba690>
  <__main__.Cobra object at 

### Training part 

In the following cell, each **cobra** from **matrix_cobra** will have a training by using the **cobra_main** contained in **main.py**. After each training, a mean of ten games's score will be saved in **l_score**. Note that the training is composed by 200 generations with 500 maximum actions for each of them.

In [218]:
l_score = []
evol_score = []

for cobra_line in matrix_cobra :
    score_line = []
    score_evol_line = []
    for cobra in cobra_line  :
        
        print("epsilon_decay : ", cobra.epsilon_decay)
        print("learning_rate : ", cobra.learning_rate)
        
        score_evol, real_score = cobra_main(dqn_agent = cobra, nb_gens = 200, max_action = 500, show = False)
        
        score_line.append(np.mean(real_score))
        score_evol_line.append(score_evol)
        print('The score :', np.mean(real_score))
        time.sleep(2)
    l_score.append(score_line)
    evol_score.append(score_evol_line)
    

print(l_score)

epsilon_decay :  0.75
learning_rate :  0.0005
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  0.75
Game :  2  | Epsilon is  0.5625
Game :  3  | Epsilon is  0.421875
Game :  4  | Epsilon is  0.31640625
Game :  5  | Epsilon is  0.2373046875
Game :  6  | Epsilon is  0.177978515625
Game :  7  | Epsilon is  0.13348388671875
Game :  8  | Epsilon is  0.1001129150390625
Game :  9  | Epsilon is  0.07508468627929688
Game :  10  | Epsilon is  0.056313514709472656
Game :  11  | Epsilon is  0.04223513603210449
Game :  12  | Epsilon is  0.03167635202407837
Game :  13  | Epsilon is  0.023757264018058777
Game :  14  | Epsilon is  0.017817948013544083
Game :  15  | Epsilon is  0.017817948013544083
Game :  16  | Epsilon is  0.013363461010158062
Game :  17  | Epsilon is  0.010022595757618546
Game :  18  | Epsilon is  0.01
Game :  19  | Epsilon is  0.01
Game :  20  | Epsilon is  0.01
Game :  21  | Epsilon is  0.01
Game :  22  | Epsilon is  0.01
Game :  23  | Epsilon is  0.01
Game :  24  | Epsilon is

Game :  36  | Epsilon is  0.01
Game :  37  | Epsilon is  0.01
Game :  38  | Epsilon is  0.01
Game :  39  | Epsilon is  0.01
Game :  40  | Epsilon is  0.01
Game :  41  | Epsilon is  0.01
Game :  42  | Epsilon is  0.01
Game :  43  | Epsilon is  0.01
Game :  44  | Epsilon is  0.01
Game :  45  | Epsilon is  0.01
Game :  46  | Epsilon is  0.01
Game :  47  | Epsilon is  0.01
Game :  48  | Epsilon is  0.01
Game :  49  | Epsilon is  0.01
Game :  50  | Epsilon is  0.01
Game :  51  | Epsilon is  0.01
Game :  52  | Epsilon is  0.01
Game :  53  | Epsilon is  0.01
Game :  54  | Epsilon is  0.01
Game :  55  | Epsilon is  0.01
Game :  56  | Epsilon is  0.01
Game :  57  | Epsilon is  0.01
Game :  58  | Epsilon is  0.01
Game :  59  | Epsilon is  0.01
Game :  60  | Epsilon is  0.01
Game :  61  | Epsilon is  0.01
Game :  62  | Epsilon is  0.01
Game :  63  | Epsilon is  0.01
Game :  64  | Epsilon is  0.01
Game :  65  | Epsilon is  0.01
Game :  66  | Epsilon is  0.01
Game :  67  | Epsilon is  0.01
Game :  

Game :  59  | Epsilon is  0.010022595757618546
Game :  60  | Epsilon is  0.010022595757618546
Game :  61  | Epsilon is  0.010022595757618546
Game :  62  | Epsilon is  0.010022595757618546
Game :  63  | Epsilon is  0.010022595757618546
Game :  64  | Epsilon is  0.010022595757618546
Game :  65  | Epsilon is  0.010022595757618546
Game :  66  | Epsilon is  0.010022595757618546
Game :  67  | Epsilon is  0.010022595757618546
Game :  68  | Epsilon is  0.010022595757618546
Game :  69  | Epsilon is  0.010022595757618546
Game :  70  | Epsilon is  0.010022595757618546
Game :  71  | Epsilon is  0.010022595757618546
Game :  72  | Epsilon is  0.010022595757618546
Game :  73  | Epsilon is  0.010022595757618546
Game :  74  | Epsilon is  0.010022595757618546
Game :  75  | Epsilon is  0.010022595757618546
Game :  76  | Epsilon is  0.010022595757618546
Game :  77  | Epsilon is  0.010022595757618546
Game :  78  | Epsilon is  0.010022595757618546
Game :  79  | Epsilon is  0.01
Game :  80  | Epsilon is  0.0

Game :  92  | Epsilon is  0.01
Game :  93  | Epsilon is  0.01
Game :  94  | Epsilon is  0.01
Game :  95  | Epsilon is  0.01
Game :  96  | Epsilon is  0.01
Game :  97  | Epsilon is  0.01
Game :  98  | Epsilon is  0.01
Game :  99  | Epsilon is  0.01
Game :  100  | Epsilon is  0.01
Game :  101  | Epsilon is  0.01
Game :  102  | Epsilon is  0.01
Game :  103  | Epsilon is  0.01
Game :  104  | Epsilon is  0.01
Game :  105  | Epsilon is  0.01
Game :  106  | Epsilon is  0.01
Game :  107  | Epsilon is  0.01
Game :  108  | Epsilon is  0.01
Game :  109  | Epsilon is  0.01
Game :  110  | Epsilon is  0.01
Game :  111  | Epsilon is  0.01
Game :  112  | Epsilon is  0.01
Game :  113  | Epsilon is  0.01
Game :  114  | Epsilon is  0.01
Game :  115  | Epsilon is  0.01
Game :  116  | Epsilon is  0.01
Game :  117  | Epsilon is  0.01
Game :  118  | Epsilon is  0.01
Game :  119  | Epsilon is  0.01
Game :  120  | Epsilon is  0.01
Game :  121  | Epsilon is  0.01
Game :  122  | Epsilon is  0.01
Game :  123  | E

Game :  134  | Epsilon is  0.01
Game :  135  | Epsilon is  0.01
Game :  136  | Epsilon is  0.01
Game :  137  | Epsilon is  0.01
Game :  138  | Epsilon is  0.01
Game :  139  | Epsilon is  0.01
Game :  140  | Epsilon is  0.01
Game :  141  | Epsilon is  0.01
Game :  142  | Epsilon is  0.01
Game :  143  | Epsilon is  0.01
Game :  144  | Epsilon is  0.01
Game :  145  | Epsilon is  0.01
Game :  146  | Epsilon is  0.01
Game :  147  | Epsilon is  0.01
Game :  148  | Epsilon is  0.01
Game :  149  | Epsilon is  0.01
Game :  150  | Epsilon is  0.01
Game :  151  | Epsilon is  0.01
Game :  152  | Epsilon is  0.01
Game :  153  | Epsilon is  0.01
Game :  154  | Epsilon is  0.01
Game :  155  | Epsilon is  0.01
Game :  156  | Epsilon is  0.01
Game :  157  | Epsilon is  0.01
Game :  158  | Epsilon is  0.01
Game :  159  | Epsilon is  0.01
Game :  160  | Epsilon is  0.01
Game :  161  | Epsilon is  0.01
Game :  162  | Epsilon is  0.01
Game :  163  | Epsilon is  0.01
Game :  164  | Epsilon is  0.01
Game :  

Game :  172  | Epsilon is  0.01
Game :  173  | Epsilon is  0.01
Game :  174  | Epsilon is  0.01
Game :  175  | Epsilon is  0.01
Game :  176  | Epsilon is  0.01
Game :  177  | Epsilon is  0.01
Game :  178  | Epsilon is  0.01
Game :  179  | Epsilon is  0.01
Game :  180  | Epsilon is  0.01
Game :  181  | Epsilon is  0.01
Game :  182  | Epsilon is  0.01
Game :  183  | Epsilon is  0.01
Game :  184  | Epsilon is  0.01
Game :  185  | Epsilon is  0.01
Game :  186  | Epsilon is  0.01
Game :  187  | Epsilon is  0.01
Game :  188  | Epsilon is  0.01
Game :  189  | Epsilon is  0.01
Game :  190  | Epsilon is  0.01
Game :  191  | Epsilon is  0.01
Game :  192  | Epsilon is  0.01
Game :  193  | Epsilon is  0.01
Game :  194  | Epsilon is  0.01
Game :  195  | Epsilon is  0.01
Game :  196  | Epsilon is  0.01
Game :  197  | Epsilon is  0.01
Game :  198  | Epsilon is  0.01
Game :  199  | Epsilon is  0.01
Game :  200  | Epsilon is  0.01
Game :  201  | Epsilon is  0.01
Game :  202  | Epsilon is  0.01
Game :  

Done
The score : 9.4
epsilon_decay :  0.81225
learning_rate :  0.001
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  0.81225
Game :  3  | Epsilon is  0.6597500625
Game :  4  | Epsilon is  0.535881988265625
Game :  5  | Epsilon is  0.435270144968754
Game :  6  | Epsilon is  0.35354817525087046
Game :  7  | Epsilon is  0.28716950534751956
Game :  8  | Epsilon is  0.23325343071852278
Game :  9  | Epsilon is  0.18946009910112013
Game :  10  | Epsilon is  0.15388896549488484
Game :  11  | Epsilon is  0.12499631222322022
Game :  12  | Epsilon is  0.10152825460331062
Game :  13  | Epsilon is  0.08246632480153905
Game :  14  | Epsilon is  0.0669832723200501
Game :  15  | Epsilon is  0.05440716294196069
Game :  16  | Epsilon is  0.04419221809960757
Game :  17  | Epsilon is  0.03589512915140625
Game :  18  | Epsilon is  0.02915581865322973
Game :  19  | Epsilon is  0.02368181370108585
Game :  20  | Epsilon is  0.019235553178706982
Game :  21  | Epsilon is  0.01

Game :  27  | Epsilon is  0.01
Game :  28  | Epsilon is  0.01
Game :  29  | Epsilon is  0.01
Game :  30  | Epsilon is  0.01
Game :  31  | Epsilon is  0.01
Game :  32  | Epsilon is  0.01
Game :  33  | Epsilon is  0.01
Game :  34  | Epsilon is  0.01
Game :  35  | Epsilon is  0.01
Game :  36  | Epsilon is  0.01
Game :  37  | Epsilon is  0.01
Game :  38  | Epsilon is  0.01
Game :  39  | Epsilon is  0.01
Game :  40  | Epsilon is  0.01
Game :  41  | Epsilon is  0.01
Game :  42  | Epsilon is  0.01
Game :  43  | Epsilon is  0.01
Game :  44  | Epsilon is  0.01
Game :  45  | Epsilon is  0.01
Game :  46  | Epsilon is  0.01
Game :  47  | Epsilon is  0.01
Game :  48  | Epsilon is  0.01
Game :  49  | Epsilon is  0.01
Game :  50  | Epsilon is  0.01
Game :  51  | Epsilon is  0.01
Game :  52  | Epsilon is  0.01
Game :  53  | Epsilon is  0.01
Game :  54  | Epsilon is  0.01
Game :  55  | Epsilon is  0.01
Game :  56  | Epsilon is  0.01
Game :  57  | Epsilon is  0.01
Game :  58  | Epsilon is  0.01
Game :  

Game :  66  | Epsilon is  0.01
Game :  67  | Epsilon is  0.01
Game :  68  | Epsilon is  0.01
Game :  69  | Epsilon is  0.01
Game :  70  | Epsilon is  0.01
Game :  71  | Epsilon is  0.01
Game :  72  | Epsilon is  0.01
Game :  73  | Epsilon is  0.01
Game :  74  | Epsilon is  0.01
Game :  75  | Epsilon is  0.01
Game :  76  | Epsilon is  0.01
Game :  77  | Epsilon is  0.01
Game :  78  | Epsilon is  0.01
Game :  79  | Epsilon is  0.01
Game :  80  | Epsilon is  0.01
Game :  81  | Epsilon is  0.01
Game :  82  | Epsilon is  0.01
Game :  83  | Epsilon is  0.01
Game :  84  | Epsilon is  0.01
Game :  85  | Epsilon is  0.01
Game :  86  | Epsilon is  0.01
Game :  87  | Epsilon is  0.01
Game :  88  | Epsilon is  0.01
Game :  89  | Epsilon is  0.01
Game :  90  | Epsilon is  0.01
Game :  91  | Epsilon is  0.01
Game :  92  | Epsilon is  0.01
Game :  93  | Epsilon is  0.01
Game :  94  | Epsilon is  0.01
Game :  95  | Epsilon is  0.01
Game :  96  | Epsilon is  0.01
Game :  97  | Epsilon is  0.01
Game :  

Game :  78  | Epsilon is  0.013687181997271458
Game :  79  | Epsilon is  0.013687181997271458
Game :  80  | Epsilon is  0.013687181997271458
Game :  81  | Epsilon is  0.011969440656613891
Game :  82  | Epsilon is  0.011969440656613891
Game :  83  | Epsilon is  0.011969440656613891
Game :  84  | Epsilon is  0.011969440656613891
Game :  85  | Epsilon is  0.011969440656613891
Game :  86  | Epsilon is  0.011969440656613891
Game :  87  | Epsilon is  0.011969440656613891
Game :  88  | Epsilon is  0.011969440656613891
Game :  89  | Epsilon is  0.011969440656613891
Game :  90  | Epsilon is  0.011969440656613891
Game :  91  | Epsilon is  0.011969440656613891
Game :  92  | Epsilon is  0.011969440656613891
Game :  93  | Epsilon is  0.011969440656613891
Game :  94  | Epsilon is  0.01046727585420885
Game :  95  | Epsilon is  0.01046727585420885
Game :  96  | Epsilon is  0.01046727585420885
Game :  97  | Epsilon is  0.01046727585420885
Game :  98  | Epsilon is  0.01046727585420885
Game :  99  | Epsi

Game :  100  | Epsilon is  0.01
Game :  101  | Epsilon is  0.01
Game :  102  | Epsilon is  0.01
Game :  103  | Epsilon is  0.01
Game :  104  | Epsilon is  0.01
Game :  105  | Epsilon is  0.01
Game :  106  | Epsilon is  0.01
Game :  107  | Epsilon is  0.01
Game :  108  | Epsilon is  0.01
Game :  109  | Epsilon is  0.01
Game :  110  | Epsilon is  0.01
Game :  111  | Epsilon is  0.01
Game :  112  | Epsilon is  0.01
Game :  113  | Epsilon is  0.01
Game :  114  | Epsilon is  0.01
Game :  115  | Epsilon is  0.01
Game :  116  | Epsilon is  0.01
Game :  117  | Epsilon is  0.01
Game :  118  | Epsilon is  0.01
Game :  119  | Epsilon is  0.01
Game :  120  | Epsilon is  0.01
Game :  121  | Epsilon is  0.01
Game :  122  | Epsilon is  0.01
Game :  123  | Epsilon is  0.01
Game :  124  | Epsilon is  0.01
Game :  125  | Epsilon is  0.01
Game :  126  | Epsilon is  0.01
Game :  127  | Epsilon is  0.01
Game :  128  | Epsilon is  0.01
Game :  129  | Epsilon is  0.01
Game :  130  | Epsilon is  0.01
Game :  

Game :  131  | Epsilon is  0.01
Game :  132  | Epsilon is  0.01
Game :  133  | Epsilon is  0.01
Game :  134  | Epsilon is  0.01
Game :  135  | Epsilon is  0.01
Game :  136  | Epsilon is  0.01
Game :  137  | Epsilon is  0.01
Game :  138  | Epsilon is  0.01
Game :  139  | Epsilon is  0.01
Game :  140  | Epsilon is  0.01
Game :  141  | Epsilon is  0.01
Game :  142  | Epsilon is  0.01
Game :  143  | Epsilon is  0.01
Game :  144  | Epsilon is  0.01
Game :  145  | Epsilon is  0.01
Game :  146  | Epsilon is  0.01
Game :  147  | Epsilon is  0.01
Game :  148  | Epsilon is  0.01
Game :  149  | Epsilon is  0.01
Game :  150  | Epsilon is  0.01
Game :  151  | Epsilon is  0.01
Game :  152  | Epsilon is  0.01
Game :  153  | Epsilon is  0.01
Game :  154  | Epsilon is  0.01
Game :  155  | Epsilon is  0.01
Game :  156  | Epsilon is  0.01
Game :  157  | Epsilon is  0.01
Game :  158  | Epsilon is  0.01
Game :  159  | Epsilon is  0.01
Game :  160  | Epsilon is  0.01
Game :  161  | Epsilon is  0.01
Game :  

Game :  163  | Epsilon is  0.01
Game :  164  | Epsilon is  0.01
Game :  165  | Epsilon is  0.01
Game :  166  | Epsilon is  0.01
Game :  167  | Epsilon is  0.01
Game :  168  | Epsilon is  0.01
Game :  169  | Epsilon is  0.01
Game :  170  | Epsilon is  0.01
Game :  171  | Epsilon is  0.01
Game :  172  | Epsilon is  0.01
Game :  173  | Epsilon is  0.01
Game :  174  | Epsilon is  0.01
Game :  175  | Epsilon is  0.01
Game :  176  | Epsilon is  0.01
Game :  177  | Epsilon is  0.01
Game :  178  | Epsilon is  0.01
Game :  179  | Epsilon is  0.01
Game :  180  | Epsilon is  0.01
Game :  181  | Epsilon is  0.01
Game :  182  | Epsilon is  0.01
Game :  183  | Epsilon is  0.01
Game :  184  | Epsilon is  0.01
Game :  185  | Epsilon is  0.01
Game :  186  | Epsilon is  0.01
Game :  187  | Epsilon is  0.01
Game :  188  | Epsilon is  0.01
Game :  189  | Epsilon is  0.01
Game :  190  | Epsilon is  0.01
Game :  191  | Epsilon is  0.01
Game :  192  | Epsilon is  0.01
Game :  193  | Epsilon is  0.01
Game :  

Game :  195  | Epsilon is  0.01
Game :  196  | Epsilon is  0.01
Game :  197  | Epsilon is  0.01
Game :  198  | Epsilon is  0.01
Game :  199  | Epsilon is  0.01
Game :  200  | Epsilon is  0.01
Game :  201  | Epsilon is  0.01
Game :  202  | Epsilon is  0.01
Game :  203  | Epsilon is  0.01
Game :  204  | Epsilon is  0.01
Game :  205  | Epsilon is  0.01
Game :  206  | Epsilon is  0.01
Game :  207  | Epsilon is  0.01
Game :  208  | Epsilon is  0.01
Game :  209  | Epsilon is  0.01
Done
The score : 0.1
epsilon_decay :  0.93675
learning_rate :  0.0005
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  0.93675
Game :  3  | Epsilon is  0.8775005625
Game :  4  | Epsilon is  0.8219986519218749
Game :  5  | Epsilon is  0.7700072371878163
Game :  6  | Epsilon is  0.7213042794356869
Game :  7  | Epsilon is  0.6756817837613797
Game :  8  | Epsilon is  0.6329449109384724
Game :  9  | Epsilon is  0.592911145321614
Game :  10  | Epsilon is  0.5554095153800219
Game :  11  |

Game :  208  | Epsilon is  0.01
Game :  209  | Epsilon is  0.01
Done
The score : 2.0
epsilon_decay :  0.93675
learning_rate :  0.00075
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  0.93675
Game :  3  | Epsilon is  0.8775005625
Game :  4  | Epsilon is  0.8219986519218749
Game :  5  | Epsilon is  0.7700072371878163
Game :  6  | Epsilon is  0.7213042794356869
Game :  7  | Epsilon is  0.6756817837613797
Game :  8  | Epsilon is  0.6329449109384724
Game :  9  | Epsilon is  0.592911145321614
Game :  10  | Epsilon is  0.5554095153800219
Game :  11  | Epsilon is  0.5202798635322355
Game :  12  | Epsilon is  0.4873721621638216
Game :  13  | Epsilon is  0.4565458729069599
Game :  14  | Epsilon is  0.4276693464455947
Game :  15  | Epsilon is  0.4006192602829108
Game :  16  | Epsilon is  0.37528009207001667
Game :  17  | Epsilon is  0.3515436262465881
Game :  18  | Epsilon is  0.32930849188649136
Game :  19  | Epsilon is  0.30847972977467075
Game :  20  | Epsilo

Game :  8  | Epsilon is  0.6329449109384724
Game :  9  | Epsilon is  0.592911145321614
Game :  10  | Epsilon is  0.5554095153800219
Game :  11  | Epsilon is  0.5202798635322355
Game :  12  | Epsilon is  0.4873721621638216
Game :  13  | Epsilon is  0.4565458729069599
Game :  14  | Epsilon is  0.4276693464455947
Game :  15  | Epsilon is  0.4006192602829108
Game :  16  | Epsilon is  0.37528009207001667
Game :  17  | Epsilon is  0.3515436262465881
Game :  18  | Epsilon is  0.32930849188649136
Game :  19  | Epsilon is  0.30847972977467075
Game :  20  | Epsilon is  0.28896838686642284
Game :  21  | Epsilon is  0.2706911363971216
Game :  22  | Epsilon is  0.25356992202000367
Game :  23  | Epsilon is  0.23753162445223844
Game :  24  | Epsilon is  0.22250774920563435
Game :  25  | Epsilon is  0.20843413406837796
Game :  26  | Epsilon is  0.19525067508855304
Game :  27  | Epsilon is  0.18290106988920204
Game :  28  | Epsilon is  0.17133257721871
Game :  29  | Epsilon is  0.1604957917096266
Game 

Game :  9  | Epsilon is  0.5554095153800219
Game :  10  | Epsilon is  0.5202798635322355
Game :  11  | Epsilon is  0.4873721621638216
Game :  12  | Epsilon is  0.4565458729069599
Game :  13  | Epsilon is  0.4276693464455947
Game :  14  | Epsilon is  0.4006192602829108
Game :  15  | Epsilon is  0.37528009207001667
Game :  16  | Epsilon is  0.3515436262465881
Game :  17  | Epsilon is  0.32930849188649136
Game :  18  | Epsilon is  0.30847972977467075
Game :  19  | Epsilon is  0.28896838686642284
Game :  20  | Epsilon is  0.2706911363971216
Game :  21  | Epsilon is  0.25356992202000367
Game :  22  | Epsilon is  0.23753162445223844
Game :  23  | Epsilon is  0.22250774920563435
Game :  24  | Epsilon is  0.20843413406837796
Game :  25  | Epsilon is  0.19525067508855304
Game :  26  | Epsilon is  0.18290106988920204
Game :  27  | Epsilon is  0.17133257721871
Game :  28  | Epsilon is  0.1604957917096266
Game :  29  | Epsilon is  0.1503444328839927
Game :  30  | Epsilon is  0.14083514750408016
Ga

Game :  21  | Epsilon is  0.2706911363971216
Game :  22  | Epsilon is  0.25356992202000367
Game :  23  | Epsilon is  0.23753162445223844
Game :  24  | Epsilon is  0.22250774920563435
Game :  25  | Epsilon is  0.20843413406837796
Game :  26  | Epsilon is  0.19525067508855304
Game :  27  | Epsilon is  0.18290106988920204
Game :  28  | Epsilon is  0.17133257721871
Game :  29  | Epsilon is  0.1604957917096266
Game :  30  | Epsilon is  0.1503444328839927
Game :  31  | Epsilon is  0.14083514750408016
Game :  32  | Epsilon is  0.13192732442444707
Game :  33  | Epsilon is  0.12358292115460079
Game :  34  | Epsilon is  0.11576630139157229
Game :  35  | Epsilon is  0.10844408282855535
Game :  36  | Epsilon is  0.10158499458964922
Game :  37  | Epsilon is  0.0951597436818539
Game :  38  | Epsilon is  0.08914088989397663
Game :  39  | Epsilon is  0.0835027286081826
Game :  40  | Epsilon is  0.07822118102371504
Game :  41  | Epsilon is  0.07327369132396507
Game :  42  | Epsilon is  0.06863913034772

Game :  33  | Epsilon is  0.9694605362958227
Game :  34  | Epsilon is  0.9684910757595269
Game :  35  | Epsilon is  0.9675225846837673
Game :  36  | Epsilon is  0.9665550620990835
Game :  37  | Epsilon is  0.9655885070369844
Game :  38  | Epsilon is  0.9646229185299474
Game :  39  | Epsilon is  0.9636582956114175
Game :  40  | Epsilon is  0.9626946373158061
Game :  41  | Epsilon is  0.9617319426784903
Game :  42  | Epsilon is  0.9607702107358118
Game :  43  | Epsilon is  0.959809440525076
Game :  44  | Epsilon is  0.9588496310845509
Game :  45  | Epsilon is  0.9578907814534664
Game :  46  | Epsilon is  0.9569328906720129
Game :  47  | Epsilon is  0.9559759577813409
Game :  48  | Epsilon is  0.9550199818235596
Game :  49  | Epsilon is  0.9540649618417361
Game :  50  | Epsilon is  0.9531108968798944
Game :  51  | Epsilon is  0.9521577859830145
Game :  52  | Epsilon is  0.9512056281970315
Game :  53  | Epsilon is  0.9502544225688344
Game :  54  | Epsilon is  0.9493041681462656
Game :  55 

Game :  3  | Epsilon is  0.997002999
Game :  4  | Epsilon is  0.996005996001
Game :  5  | Epsilon is  0.995009990004999
Game :  6  | Epsilon is  0.994014980014994
Game :  7  | Epsilon is  0.993020965034979
Game :  8  | Epsilon is  0.9920279440699441
Game :  9  | Epsilon is  0.9910359161258742
Game :  10  | Epsilon is  0.9900448802097482
Game :  11  | Epsilon is  0.9890548353295385
Game :  12  | Epsilon is  0.988065780494209
Game :  13  | Epsilon is  0.9870777147137147
Game :  14  | Epsilon is  0.986090636999001
Game :  15  | Epsilon is  0.9851045463620021
Game :  16  | Epsilon is  0.98411944181564
Game :  17  | Epsilon is  0.9831353223738244
Game :  18  | Epsilon is  0.9821521870514506
Game :  19  | Epsilon is  0.9811700348643991
Game :  20  | Epsilon is  0.9801888648295347
Game :  21  | Epsilon is  0.9792086759647052
Game :  22  | Epsilon is  0.9782294672887405
Game :  23  | Epsilon is  0.9772512378214517
Game :  24  | Epsilon is  0.9762739865836303
Game :  25  | Epsilon is  0.9752977

Game :  185  | Epsilon is  0.8310273589761736
Game :  186  | Epsilon is  0.8301963316171974
Game :  187  | Epsilon is  0.8293661352855802
Game :  188  | Epsilon is  0.8285367691502946
Game :  189  | Epsilon is  0.8277082323811443
Game :  190  | Epsilon is  0.8268805241487632
Game :  191  | Epsilon is  0.8260536436246144
Game :  192  | Epsilon is  0.8252275899809898
Game :  193  | Epsilon is  0.8244023623910088
Game :  194  | Epsilon is  0.8235779600286178
Game :  195  | Epsilon is  0.8227543820685892
Game :  196  | Epsilon is  0.8219316276865206
Game :  197  | Epsilon is  0.8211096960588341
Game :  198  | Epsilon is  0.8202885863627752
Game :  199  | Epsilon is  0.8194682977764125
Game :  200  | Epsilon is  0.818648829478636
Game :  201  | Epsilon is  0.8178301806491574
Game :  202  | Epsilon is  0.8170123504685082
Game :  203  | Epsilon is  0.8161953381180397
Game :  204  | Epsilon is  0.8153791427799216
Game :  205  | Epsilon is  0.8145637636371417
Game :  206  | Epsilon is  0.813749

Game :  156  | Epsilon is  0.8554924148377159
Game :  157  | Epsilon is  0.8546369224228781
Game :  158  | Epsilon is  0.8537822855004553
Game :  159  | Epsilon is  0.8529285032149548
Game :  160  | Epsilon is  0.8520755747117399
Game :  161  | Epsilon is  0.8512234991370281
Game :  162  | Epsilon is  0.8503722756378911
Game :  163  | Epsilon is  0.8495219033622532
Game :  164  | Epsilon is  0.8486723814588909
Game :  165  | Epsilon is  0.847823709077432
Game :  166  | Epsilon is  0.8469758853683546
Game :  167  | Epsilon is  0.8461289094829862
Game :  168  | Epsilon is  0.8452827805735033
Game :  169  | Epsilon is  0.8444374977929298
Game :  170  | Epsilon is  0.8435930602951368
Game :  171  | Epsilon is  0.8427494672348417
Game :  172  | Epsilon is  0.8419067177676068
Game :  173  | Epsilon is  0.8410648110498392
Game :  174  | Epsilon is  0.8402237462387894
Game :  175  | Epsilon is  0.8393835224925505
Game :  176  | Epsilon is  0.838544138970058
Game :  177  | Epsilon is  0.8377055

Game :  127  | Epsilon is  0.8806777104745716
Game :  128  | Epsilon is  0.8797970327640969
Game :  129  | Epsilon is  0.8789172357313328
Game :  130  | Epsilon is  0.8780383184956015
Game :  131  | Epsilon is  0.8771602801771059
Game :  132  | Epsilon is  0.8762831198969288
Game :  133  | Epsilon is  0.8754068367770318
Game :  134  | Epsilon is  0.8745314299402548
Game :  135  | Epsilon is  0.8736568985103146
Game :  136  | Epsilon is  0.8727832416118043
Game :  137  | Epsilon is  0.8719104583701925
Game :  138  | Epsilon is  0.8710385479118223
Game :  139  | Epsilon is  0.8701675093639105
Game :  140  | Epsilon is  0.8692973418545467
Game :  141  | Epsilon is  0.8684280445126921
Game :  142  | Epsilon is  0.8675596164681794
Game :  143  | Epsilon is  0.8666920568517111
Game :  144  | Epsilon is  0.8658253647948594
Game :  145  | Epsilon is  0.8649595394300645
Game :  146  | Epsilon is  0.8640945798906344
Game :  147  | Epsilon is  0.8632304853107438
Game :  148  | Epsilon is  0.86236

Game :  98  | Epsilon is  0.9075119613694457
Game :  99  | Epsilon is  0.9066044494080763
Game :  100  | Epsilon is  0.9056978449586682
Game :  101  | Epsilon is  0.9047921471137096
Game :  102  | Epsilon is  0.9038873549665959
Game :  103  | Epsilon is  0.9029834676116293
Game :  104  | Epsilon is  0.9020804841440176
Game :  105  | Epsilon is  0.9011784036598737
Game :  106  | Epsilon is  0.9002772252562138
Game :  107  | Epsilon is  0.8993769480309576
Game :  108  | Epsilon is  0.8984775710829266
Game :  109  | Epsilon is  0.8975790935118436
Game :  110  | Epsilon is  0.8966815144183318
Game :  111  | Epsilon is  0.8957848329039134
Game :  112  | Epsilon is  0.8948890480710096
Game :  113  | Epsilon is  0.8939941590229386
Game :  114  | Epsilon is  0.8931001648639156
Game :  115  | Epsilon is  0.8922070646990518
Game :  116  | Epsilon is  0.8913148576343527
Game :  117  | Epsilon is  0.8904235427767183
Game :  118  | Epsilon is  0.8895331192339416
Game :  119  | Epsilon is  0.8886435

The score is evaluated on ten games. It is now contained in an array. Each line refers to an epsilon_decay, each column refers to a learning_rate :

In [219]:
l_score = np.array(l_score)
print(l_score)

#save_score = [[1.0, 13.3, 0.6, 9.3, 0.3], [5.9, 9.4, 0.6, 0.0, 13.4], [0.0, 10.8, 0.6, 13.4, 0.1], [2.0, 3.3, 1.3, 18.3, 9.3], [0.1, 0.3, 0.2, 0.3, 0.3]]

[[ 1.  13.3  0.6  9.3  0.3]
 [ 5.9  9.4  0.6  0.  13.4]
 [ 0.  10.8  0.6 13.4  0.1]
 [ 2.   3.3  1.3 18.3  9.3]
 [ 0.1  0.3  0.2  0.3  0.3]]


### 3D-Representation of the results

#### !!! Warning : The following line needs to be run once only !!!

In [254]:
l_epsilon_rate, l_rate = np.meshgrid(l_epsilon_rate,l_rate)
print(l_epsilon_rate)

[[0.75    0.81225 0.8745  0.93675 0.999  ]
 [0.75    0.81225 0.8745  0.93675 0.999  ]
 [0.75    0.81225 0.8745  0.93675 0.999  ]
 [0.75    0.81225 0.8745  0.93675 0.999  ]
 [0.75    0.81225 0.8745  0.93675 0.999  ]]


#### -----------------------------------------------------------

In [255]:
%matplotlib

graph = plt.axes(projection='3d', xlabel = 'Epsilon_decay', ylabel = 'Learnin_rate')
graph.plot_surface(l_epsilon_rate, l_rate, l_score, cmap = 'plasma') 
plt.title('The score')

Using matplotlib backend: MacOSX


Text(0.5, 0.92, 'The score')

### Get the best parametrization 

In [276]:
#Cell to delete the meshgrid effect 
l_epsilon_rate = np.linspace(0.75,0.999,5)
l_rate = np.linspace(0.0005,0.0015,5)

We want to get the best parametrization values which can be severals. In order to do that, let use **np.where(l_score == l_score.max())**. It returns a tuple containing the coordonates (length = 2). The problem is that the first element contains the line and the second, the column. The following code provides a solution to switch it into an array with the form : [param1[line, column], param2[column], .. etc ..]

In [277]:
best_parametrization_index = np.where(l_score == l_score.max()) #Il peut y avoir plusieurs max 
print('The best score is : ', l_score.max())
print(np.where(l_score == l_score.max())) 

#In order to use zip, we need to retype the arrays into lists
array0 = list(best_parametrization_index[0])
print('Index of the best epsilon :', array0)
array1 = list(best_parametrization_index[1])
print('Index of the best learning rate :', array1)

best_parametrization = []
for eps_index, learning_index in zip(array0,array1):
    best_parametrization.append((l_epsilon_rate[eps_index], l_rate[learning_index]))
print('List of best parametrization:', best_parametrization)
    
#best_parametrization[0][i] for i in {0,1} are respectively the epsilon_decay and the learning_rate which provide the most efficient learning

The best score is :  18.3
(array([3]), array([3]))
Index of the best epsilon : [3]
Index of the best learning rate : [3]
List of best parametrization: [(0.93675, 0.00125)]


The best parametrizations are now contained in **best_parametrization**. These value will provide the most efficient learning for Cobra. Now we want to find the best batch_size. We will use the same process, first let creates a cobra_matrix :

In [280]:
matrix_cobra_bis = [] #As the previous matrix : it will contains cobras using different init parameters

l_batch_size = np.linspace(30,250,11)
print(l_batch_size)
for param in best_parametrization :
    cobra_line_bis = []
    for batch_sz in l_batch_size :
        cobra_line_bis.append(Cobra(epsilon_decay = param[0], learning_rate = param[1], batch_size = batch_sz))
    matrix_cobra_bis.append(cobra_line_bis)
print(matrix_cobra_bis)

[ 30.  52.  74.  96. 118. 140. 162. 184. 206. 228. 250.]
[[<__main__.Cobra object at 0x7fb0750f6a50>, <__main__.Cobra object at 0x7fb04ab83950>, <__main__.Cobra object at 0x7fb037f98bd0>, <__main__.Cobra object at 0x7fb03c278f50>, <__main__.Cobra object at 0x7fb0272bedd0>, <__main__.Cobra object at 0x7fb0502c7950>, <__main__.Cobra object at 0x7fb0575b1f90>, <__main__.Cobra object at 0x7fb060216490>, <__main__.Cobra object at 0x7fb027208b10>, <__main__.Cobra object at 0x7fb060c7fd50>, <__main__.Cobra object at 0x7fb0502a3a90>]]


In [281]:
batch_score = []
batch_score_progress = []


for cobra_line_bis in matrix_cobra_bis :
    batch_score_line = []
    batch_score_progress_line = []
    for cobra in cobra_line_bis  :
        
        print("epsilon_decay : ", cobra.epsilon_decay)
        print("learning_rate : ", cobra.learning_rate)
        print("batch size : ", cobra.batch_size)
        
        score_evol, real_score = cobra_main(dqn_agent = cobra, nb_gens = 200, max_action = 500, show = False)
        
        batch_score_line.append(np.mean(real_score))
        batch_score_progress_line.append(score_evol)
        print('The score :', np.mean(real_score))
    batch_score.append(batch_score_line)
    batch_score_progress.append(batch_score_progress_line)
    

print(batch_score)

epsilon_decay :  0.93675
learning_rate :  0.00125
batch size :  30.0
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  1.0
Game :  3  | Epsilon is  0.93675
Game :  4  | Epsilon is  0.8775005625
Game :  5  | Epsilon is  0.8219986519218749
Game :  6  | Epsilon is  0.7700072371878163
Game :  7  | Epsilon is  0.7213042794356869
Game :  8  | Epsilon is  0.6756817837613797
Game :  9  | Epsilon is  0.6329449109384724
Game :  10  | Epsilon is  0.592911145321614
Game :  11  | Epsilon is  0.5554095153800219
Game :  12  | Epsilon is  0.5202798635322355
Game :  13  | Epsilon is  0.4873721621638216
Game :  14  | Epsilon is  0.4565458729069599
Game :  15  | Epsilon is  0.4276693464455947
Game :  16  | Epsilon is  0.4006192602829108
Game :  17  | Epsilon is  0.37528009207001667
Game :  18  | Epsilon is  0.3515436262465881
Game :  19  | Epsilon is  0.32930849188649136
Game :  20  | Epsilon is  0.30847972977467075
Game :  21  | Epsilon is  0.28896838686642284
Game :  22

Game :  176  | Epsilon is  0.01176011910052657
Game :  177  | Epsilon is  0.01176011910052657
Game :  178  | Epsilon is  0.01176011910052657
Game :  179  | Epsilon is  0.01176011910052657
Game :  180  | Epsilon is  0.01176011910052657
Game :  181  | Epsilon is  0.01176011910052657
Game :  182  | Epsilon is  0.011016291567418263
Game :  183  | Epsilon is  0.011016291567418263
Game :  184  | Epsilon is  0.011016291567418263
Game :  185  | Epsilon is  0.011016291567418263
Game :  186  | Epsilon is  0.011016291567418263
Game :  187  | Epsilon is  0.011016291567418263
Game :  188  | Epsilon is  0.011016291567418263
Game :  189  | Epsilon is  0.011016291567418263
Game :  190  | Epsilon is  0.011016291567418263
Game :  191  | Epsilon is  0.011016291567418263
Game :  192  | Epsilon is  0.011016291567418263
Game :  193  | Epsilon is  0.011016291567418263
Game :  194  | Epsilon is  0.011016291567418263
Game :  195  | Epsilon is  0.011016291567418263
Game :  196  | Epsilon is  0.01101629156741826

Game :  181  | Epsilon is  0.01
Game :  182  | Epsilon is  0.01
Game :  183  | Epsilon is  0.01
Game :  184  | Epsilon is  0.01
Game :  185  | Epsilon is  0.01
Game :  186  | Epsilon is  0.01
Game :  187  | Epsilon is  0.01
Game :  188  | Epsilon is  0.01
Game :  189  | Epsilon is  0.01
Game :  190  | Epsilon is  0.01
Game :  191  | Epsilon is  0.01
Game :  192  | Epsilon is  0.01
Game :  193  | Epsilon is  0.01
Game :  194  | Epsilon is  0.01
Game :  195  | Epsilon is  0.01
Game :  196  | Epsilon is  0.01
Game :  197  | Epsilon is  0.01
Game :  198  | Epsilon is  0.01
Game :  199  | Epsilon is  0.01
Game :  200  | Epsilon is  0.01
Done
The score : 2.0
epsilon_decay :  0.93675
learning_rate :  0.00125
batch size :  74.0
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  0.93675
Game :  3  | Epsilon is  0.8775005625
Game :  4  | Epsilon is  0.8219986519218749
Game :  5  | Epsilon is  0.7700072371878163
Game :  6  | Epsilon is  0.7213042794356869
Game :  7

Game :  3  | Epsilon is  0.93675
Game :  4  | Epsilon is  0.8775005625
Game :  5  | Epsilon is  0.8219986519218749
Game :  6  | Epsilon is  0.7700072371878163
Game :  7  | Epsilon is  0.7213042794356869
Game :  8  | Epsilon is  0.6756817837613797
Game :  9  | Epsilon is  0.6329449109384724
Game :  10  | Epsilon is  0.592911145321614
Game :  11  | Epsilon is  0.5554095153800219
Game :  12  | Epsilon is  0.5202798635322355
Game :  13  | Epsilon is  0.4873721621638216
Game :  14  | Epsilon is  0.4565458729069599
Game :  15  | Epsilon is  0.4276693464455947
Game :  16  | Epsilon is  0.4006192602829108
Game :  17  | Epsilon is  0.37528009207001667
Game :  18  | Epsilon is  0.3515436262465881
Game :  19  | Epsilon is  0.32930849188649136
Game :  20  | Epsilon is  0.30847972977467075
Game :  21  | Epsilon is  0.28896838686642284
Game :  22  | Epsilon is  0.2706911363971216
Game :  23  | Epsilon is  0.25356992202000367
Game :  24  | Epsilon is  0.23753162445223844
Game :  25  | Epsilon is  0.2

Game :  21  | Epsilon is  0.2706911363971216
Game :  22  | Epsilon is  0.25356992202000367
Game :  23  | Epsilon is  0.23753162445223844
Game :  24  | Epsilon is  0.22250774920563435
Game :  25  | Epsilon is  0.20843413406837796
Game :  26  | Epsilon is  0.19525067508855304
Game :  27  | Epsilon is  0.18290106988920204
Game :  28  | Epsilon is  0.17133257721871
Game :  29  | Epsilon is  0.1604957917096266
Game :  30  | Epsilon is  0.1503444328839927
Game :  31  | Epsilon is  0.14083514750408016
Game :  32  | Epsilon is  0.13192732442444707
Game :  33  | Epsilon is  0.12358292115460079
Game :  34  | Epsilon is  0.11576630139157229
Game :  35  | Epsilon is  0.10844408282855535
Game :  36  | Epsilon is  0.10158499458964922
Game :  37  | Epsilon is  0.0951597436818539
Game :  38  | Epsilon is  0.08914088989397663
Game :  39  | Epsilon is  0.0835027286081826
Game :  40  | Epsilon is  0.07822118102371504
Game :  41  | Epsilon is  0.07327369132396507
Game :  42  | Epsilon is  0.06863913034772

Game :  38  | Epsilon is  0.0951597436818539
Game :  39  | Epsilon is  0.08914088989397663
Game :  40  | Epsilon is  0.0835027286081826
Game :  41  | Epsilon is  0.07822118102371504
Game :  42  | Epsilon is  0.07327369132396507
Game :  43  | Epsilon is  0.06863913034772427
Game :  44  | Epsilon is  0.0642977053532307
Game :  45  | Epsilon is  0.06023087548963886
Game :  46  | Epsilon is  0.0564212726149192
Game :  47  | Epsilon is  0.05285262712202556
Game :  48  | Epsilon is  0.04950969845655744
Game :  49  | Epsilon is  0.04637821002918018
Game :  50  | Epsilon is  0.04344478824483453
Game :  51  | Epsilon is  0.04069690538834875
Game :  52  | Epsilon is  0.03812282612253569
Game :  53  | Epsilon is  0.03571155737028531
Game :  54  | Epsilon is  0.033452801366614764
Game :  55  | Epsilon is  0.031336911680176376
Game :  56  | Epsilon is  0.029354852016405218
Game :  57  | Epsilon is  0.02749815762636759
Game :  58  | Epsilon is  0.02575889915649984
Game :  59  | Epsilon is  0.0241296

Game :  55  | Epsilon is  0.031336911680176376
Game :  56  | Epsilon is  0.029354852016405218
Game :  57  | Epsilon is  0.02749815762636759
Game :  58  | Epsilon is  0.02575889915649984
Game :  59  | Epsilon is  0.024129648784851222
Game :  60  | Epsilon is  0.02260344849920938
Game :  61  | Epsilon is  0.02260344849920938
Game :  62  | Epsilon is  0.021173780381634387
Game :  63  | Epsilon is  0.01983453877249601
Game :  64  | Epsilon is  0.018580004195135637
Game :  65  | Epsilon is  0.017404818929793307
Game :  66  | Epsilon is  0.017404818929793307
Game :  67  | Epsilon is  0.01630396413248388
Game :  68  | Epsilon is  0.01630396413248388
Game :  69  | Epsilon is  0.015272738401104272
Game :  70  | Epsilon is  0.014306737697234426
Game :  71  | Epsilon is  0.014306737697234426
Game :  72  | Epsilon is  0.014306737697234426
Game :  73  | Epsilon is  0.013401836537884348
Game :  74  | Epsilon is  0.012554170376863164
Game :  75  | Epsilon is  0.012554170376863164
Game :  76  | Epsilo

Game :  66  | Epsilon is  0.014306737697234426
Game :  67  | Epsilon is  0.014306737697234426
Game :  68  | Epsilon is  0.013401836537884348
Game :  69  | Epsilon is  0.012554170376863164
Game :  70  | Epsilon is  0.01176011910052657
Game :  71  | Epsilon is  0.01176011910052657
Game :  72  | Epsilon is  0.011016291567418263
Game :  73  | Epsilon is  0.010319511125779057
Game :  74  | Epsilon is  0.01
Game :  75  | Epsilon is  0.01
Game :  76  | Epsilon is  0.01
Game :  77  | Epsilon is  0.01
Game :  78  | Epsilon is  0.01
Game :  79  | Epsilon is  0.01
Game :  80  | Epsilon is  0.01
Game :  81  | Epsilon is  0.01
Game :  82  | Epsilon is  0.01
Game :  83  | Epsilon is  0.01
Game :  84  | Epsilon is  0.01
Game :  85  | Epsilon is  0.01
Game :  86  | Epsilon is  0.01
Game :  87  | Epsilon is  0.01
Game :  88  | Epsilon is  0.01
Game :  89  | Epsilon is  0.01
Game :  90  | Epsilon is  0.01
Game :  91  | Epsilon is  0.01
Game :  92  | Epsilon is  0.01
Game :  93  | Epsilon is  0.01
Game :

Game :  87  | Epsilon is  0.01
Game :  88  | Epsilon is  0.01
Game :  89  | Epsilon is  0.01
Game :  90  | Epsilon is  0.01
Game :  91  | Epsilon is  0.01
Game :  92  | Epsilon is  0.01
Game :  93  | Epsilon is  0.01
Game :  94  | Epsilon is  0.01
Game :  95  | Epsilon is  0.01
Game :  96  | Epsilon is  0.01
Game :  97  | Epsilon is  0.01
Game :  98  | Epsilon is  0.01
Game :  99  | Epsilon is  0.01
Game :  100  | Epsilon is  0.01
Game :  101  | Epsilon is  0.01
Game :  102  | Epsilon is  0.01
Game :  103  | Epsilon is  0.01
Game :  104  | Epsilon is  0.01
Game :  105  | Epsilon is  0.01
Game :  106  | Epsilon is  0.01
Game :  107  | Epsilon is  0.01
Game :  108  | Epsilon is  0.01
Game :  109  | Epsilon is  0.01
Game :  110  | Epsilon is  0.01
Game :  111  | Epsilon is  0.01
Game :  112  | Epsilon is  0.01
Game :  113  | Epsilon is  0.01
Game :  114  | Epsilon is  0.01
Game :  115  | Epsilon is  0.01
Game :  116  | Epsilon is  0.01
Game :  117  | Epsilon is  0.01
Game :  118  | Epsilo

Game :  104  | Epsilon is  0.01
Game :  105  | Epsilon is  0.01
Game :  106  | Epsilon is  0.01
Game :  107  | Epsilon is  0.01
Game :  108  | Epsilon is  0.01
Game :  109  | Epsilon is  0.01
Game :  110  | Epsilon is  0.01
Game :  111  | Epsilon is  0.01
Game :  112  | Epsilon is  0.01
Game :  113  | Epsilon is  0.01
Game :  114  | Epsilon is  0.01
Game :  115  | Epsilon is  0.01
Game :  116  | Epsilon is  0.01
Game :  117  | Epsilon is  0.01
Game :  118  | Epsilon is  0.01
Game :  119  | Epsilon is  0.01
Game :  120  | Epsilon is  0.01
Game :  121  | Epsilon is  0.01
Game :  122  | Epsilon is  0.01
Game :  123  | Epsilon is  0.01
Game :  124  | Epsilon is  0.01
Game :  125  | Epsilon is  0.01
Game :  126  | Epsilon is  0.01
Game :  127  | Epsilon is  0.01
Game :  128  | Epsilon is  0.01
Game :  129  | Epsilon is  0.01
Game :  130  | Epsilon is  0.01
Game :  131  | Epsilon is  0.01
Game :  132  | Epsilon is  0.01
Game :  133  | Epsilon is  0.01
Game :  134  | Epsilon is  0.01
Game :  

Game :  110  | Epsilon is  0.010319511125779057
Game :  111  | Epsilon is  0.01
Game :  112  | Epsilon is  0.01
Game :  113  | Epsilon is  0.01
Game :  114  | Epsilon is  0.01
Game :  115  | Epsilon is  0.01
Game :  116  | Epsilon is  0.01
Game :  117  | Epsilon is  0.01
Game :  118  | Epsilon is  0.01
Game :  119  | Epsilon is  0.01
Game :  120  | Epsilon is  0.01
Game :  121  | Epsilon is  0.01
Game :  122  | Epsilon is  0.01
Game :  123  | Epsilon is  0.01
Game :  124  | Epsilon is  0.01
Game :  125  | Epsilon is  0.01
Game :  126  | Epsilon is  0.01
Game :  127  | Epsilon is  0.01
Game :  128  | Epsilon is  0.01
Game :  129  | Epsilon is  0.01
Game :  130  | Epsilon is  0.01
Game :  131  | Epsilon is  0.01
Game :  132  | Epsilon is  0.01
Game :  133  | Epsilon is  0.01
Game :  134  | Epsilon is  0.01
Game :  135  | Epsilon is  0.01
Game :  136  | Epsilon is  0.01
Game :  137  | Epsilon is  0.01
Game :  138  | Epsilon is  0.01
Game :  139  | Epsilon is  0.01
Game :  140  | Epsilon i

In [260]:
print(evol_score)
print(len(evol_score))

[[[0, 0, 0, 3, 0, 0, 0, 2, 2, 2, 1, 5, 0, 0, 2, 3, 3, 2, 4, 2, 7, 9, 5, 9, 2, 2, 3, 0, 2, 3, 3, 4, 4, 5, 2, 2, 2, 0, 1, 3, 4, 2, 3, 2, 2, 0, 1, 1, 0, 1, 2, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 0, 1, 2, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 2, 2, 1, 1, 2, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 3, 0], [0, 0, 2, 1, 0, 0, 3, 4, 3, 3, 3, 1, 6, 0, 5, 0, 0, 3, 1, 3, 0, 9, 3, 3, 5, 4, 5, 8, 4, 7, 5, 3, 4, 4, 14, 6, 15, 14, 14, 6, 13, 5, 5, 8, 14, 19, 11, 18, 11, 15, 10, 13, 9, 5, 17, 8, 9, 17, 10, 26, 8, 23, 16, 3, 2, 11, 11, 9, 20, 18, 20, 11, 21, 9, 6, 11, 7, 9, 12, 18, 13, 8, 10, 11, 19, 8, 12, 3, 16, 16, 7, 11, 15, 18, 12, 4, 4, 7, 13, 18, 9, 3, 10, 11, 3, 11, 17, 7, 1, 7, 13, 22, 4, 9, 3, 12

In [261]:
print(evol_score[3][3])

[0, 0, 1, 0, 1, 0, 0, 2, 0, 0, 0, 2, 1, 1, 1, 1, 0, 4, 1, 3, 5, 6, 2, 3, 5, 1, 4, 2, 4, 4, 8, 8, 2, 4, 3, 5, 1, 6, 9, 11, 6, 13, 5, 15, 8, 13, 7, 2, 22, 11, 18, 15, 9, 9, 8, 1, 13, 20, 9, 13, 12, 7, 7, 10, 9, 13, 19, 9, 9, 13, 11, 11, 10, 10, 1, 14, 8, 9, 16, 13, 19, 9, 5, 15, 9, 10, 19, 13, 17, 3, 14, 7, 11, 18, 15, 9, 12, 14, 8, 12, 21, 10, 14, 8, 6, 17, 4, 16, 20, 22, 9, 23, 13, 17, 8, 13, 25, 25, 13, 16, 17, 23, 17, 10, 6, 13, 13, 14, 9, 8, 6, 16, 7, 23, 24, 17, 19, 14, 19, 17, 8, 14, 8, 12, 21, 13, 10, 7, 14, 7, 29, 16, 7, 20, 10, 13, 9, 17, 13, 14, 10, 15, 8, 20, 22, 9, 31, 26, 13, 16, 18, 23, 14, 21, 25, 17, 28, 16, 25, 22, 13, 24, 18, 14, 22, 1, 19, 27, 30, 18, 15, 29, 17, 20, 18, 19, 19, 12, 24, 15]


In [274]:
print(evol_score[3][3])

[0, 0, 1, 0, 1, 0, 0, 2, 0, 0, 0, 2, 1, 1, 1, 1, 0, 4, 1, 3, 5, 6, 2, 3, 5, 1, 4, 2, 4, 4, 8, 8, 2, 4, 3, 5, 1, 6, 9, 11, 6, 13, 5, 15, 8, 13, 7, 2, 22, 11, 18, 15, 9, 9, 8, 1, 13, 20, 9, 13, 12, 7, 7, 10, 9, 13, 19, 9, 9, 13, 11, 11, 10, 10, 1, 14, 8, 9, 16, 13, 19, 9, 5, 15, 9, 10, 19, 13, 17, 3, 14, 7, 11, 18, 15, 9, 12, 14, 8, 12, 21, 10, 14, 8, 6, 17, 4, 16, 20, 22, 9, 23, 13, 17, 8, 13, 25, 25, 13, 16, 17, 23, 17, 10, 6, 13, 13, 14, 9, 8, 6, 16, 7, 23, 24, 17, 19, 14, 19, 17, 8, 14, 8, 12, 21, 13, 10, 7, 14, 7, 29, 16, 7, 20, 10, 13, 9, 17, 13, 14, 10, 15, 8, 20, 22, 9, 31, 26, 13, 16, 18, 23, 14, 21, 25, 17, 28, 16, 25, 22, 13, 24, 18, 14, 22, 1, 19, 27, 30, 18, 15, 29, 17, 20, 18, 19, 19, 12, 24, 15]


In [2]:
batch_score = [[1.0, 2.0, 0.0, 14.0, 2.0, 1.0, 0.0, 0.0, 8.0, 10.0, 0.0]]

On remarque que le batch de taille 228 a été le plus efficace.

