# Optimization of the snake parameters

The goal is to find the best parametrization for Cobra, i.e get the parameters that make the learninship as efficient as possible. First let import the files we need : 

In [199]:
import os
import sys
import math
import pygame
import random
import time

import tensorflow
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from collections import deque

import numpy as np

from Env import Env
from Viper import Viper

### To delete :

In [195]:
class Cobra :
    def __init__(self, epsilon_decay = 0.995, learning_rate = 0.001, batch_size = 32):
        
        self.memory  = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate
        self.batch_size = batch_size

        self.model = self.create_model()
        # "hack" implemented by DeepMind to improve convergence
        self.target_model = self.create_model()

    def create_model(self):
        model = Sequential()
        model.add(Dense(10, input_dim=8, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(4, activation='linear'))
        model.compile(loss="mean_squared_error", optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, new_state, done):
        self.memory.append([state, action, reward, new_state, done])

    def replay(self):
        batch_size = 32
        if len(self.memory) < batch_size:
            return

        samples = random.sample(self.memory, batch_size)
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(new_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)

    def target_train(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i]
        self.target_model.set_weights(target_weights)


    def act(self, state):
        # Exploration
        if np.random.random() < self.epsilon:
            return random.randint(0,3)

        return np.argmax(self.model.predict(state)[0])

In [182]:
# Cobra main function
def cobra_main(dqn_agent = Cobra(), nb_gens=10, max_action=500, show=True):
    """
    Learns and play Snake
    dqn_agent : Agent with preset
    nb_gens = number of games for training
    max_action = max number of action before next game
    show : True if you want to play with no randomness after training
    """
    env = Env(False)

    # Nb games played with no random move
    nb_games = 2

    # Recording score
    gen_scores = [0] * nb_gens
    real_scores = [0] * nb_games

    for trial in range(nb_gens + nb_games):
        print("Game : ", trial, " | Epsilon is ", dqn_agent.epsilon)

        # Does not show game until this end of the traning
        if trial == nb_gens and show:
            print("Traning Done")
            env.init_pygame()
            pygame.event.get()
            dqn_agent.epsilon = 0
            print('--- Cobra is playing ---')

        cur_state = np.array(env.reset()).reshape(1,8)
        for step in range(max_action):
            # Get the action from Cobra based on the current state
            action = dqn_agent.act(cur_state)

            # Applying move to environement to get a reward and a new state
            new_state, reward, done = env.step(action)

            # reward = reward if not done else -20
            new_state = np.array(new_state).reshape(1,8)

            # Remember the state, the move and the reward it got.
            dqn_agent.remember(cur_state, action, reward, new_state, done)

            cur_state = new_state
            # If has lost/won
            if done:
                dqn_agent.replay()
                dqn_agent.target_train()
                break
        if trial < nb_gens:
            gen_scores[trial] = env.score
        else :
            real_scores[trial-nb_gens] = env.score
    print("Done")
    return gen_scores, real_scores

### ----------------------------------------------------

To get the best parameters, you will train different Cobras using different values. The parameters we are going to modify are the followings :
- self.epsilon_decay



In [183]:
l_epsilon_rate = np.linspace(0.75,0.999,2)
l_rate = np.linspace(0.0005,0.0015,2)
print(l_epsilon_rate)
print(l_rate)

matrix_param = np.array([l_epsilon_rate, l_rate]).T
print(matrix_param)

[0.75  0.999]
[0.0005 0.0015]
[[7.50e-01 5.00e-04]
 [9.99e-01 1.50e-03]]


In [184]:
'''


for eps in l_epsilon_rate :
    matrix_cobra.append(Cobra(epsilon_decay = eps))
'''
matrix_cobra = []

for eps in l_epsilon_rate :
    cobra_line = []
    for rate in l_rate :
        cobra_line.append(Cobra(epsilon_decay = eps, learning_rate = rate))
    matrix_cobra.append(cobra_line)
print('done')

done


In [185]:
matrix_cobra = np.array(matrix_cobra)
print(matrix_cobra)
print(matrix_cobra.shape)

[[<__main__.Cobra object at 0x7fb087e254d0>
  <__main__.Cobra object at 0x7fb084ddff50>]
 [<__main__.Cobra object at 0x7fb0874e5a10>
  <__main__.Cobra object at 0x7fb0a4853910>]]
(2, 2)


In [207]:
'''
for cobra in matrix_cobra :
    print("epsilon_decay : ", cobra.epsilon_decay)
    score_evol, real_score = cobra_main(dqn_agent = cobra, nb_gens = 50, max_action = 300, show = False)
    l_score.append(np.mean(real_score))
    score_evols.append(score_evol)
print(l_score)
'''

l_score = []
evol_score = []

for cobra_line in matrix_cobra :
    score_line = []
    score_evol_line = []
    for cobra in cobra_line  :
        
        print("epsilon_decay : ", cobra.epsilon_decay)
        print("learning_rate : ", cobra.learning_rate)
        
        score_evol, real_score = cobra_main(dqn_agent = cobra, nb_gens = 2, max_action = 100, show = False)
        
        score_line.append(np.mean(real_score))
        score_evol_line.append(score_evol)
        print('The score :', np.mean(real_score))
        time.sleep(2)
    l_score.append(score_line)
    evol_score.append(score_evol_line)
    

print(l_score)

epsilon_decay :  0.75
learning_rate :  0.0005
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  0.75
Game :  2  | Epsilon is  0.5625
Game :  3  | Epsilon is  0.421875
Done
The score : 0.0
epsilon_decay :  0.75
learning_rate :  0.0015
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  1.0
Game :  3  | Epsilon is  0.75
Done
The score : 0.0
epsilon_decay :  0.999
learning_rate :  0.0005
Game :  0  | Epsilon is  1.0


KeyboardInterrupt: 

Remark : the score is evaluated on two games.

In [186]:
l_score = np.array(l_score)
print(l_score)

[[0.5 0.5]
 [0.  0.5]]


#### !!! Warning : The following line needs to be run once only !!!

In [137]:
l_epsilon_rate, l_rate = np.meshgrid(l_epsilon_rate,l_rate)
print(l_epsilon_rate)

[[0.75  0.999]
 [0.75  0.999]]


#### -----------------------------------------------------------

In [95]:
%matplotlib

l_score = np.array(l_score)


graph = plt.axes(projection='3d', xlabel = 'Epsilon_decay', ylabel = 'Learnin_rate')
graph.plot_surface(l_epsilon_rate, l_rate, l_score, cmap = 'plasma') 
plt.title('The score')

Using matplotlib backend: MacOSX


Text(0.5, 0.92, 'The score')

### Get the best parametrization : (not finished)

In [138]:
#Cell to delete the meshgrid effect 
l_epsilon_rate = np.linspace(0.75,0.999,2)
l_rate = np.linspace(0.0005,0.0015,2)

##### Welcome in the dark side

In [187]:
best_parametrization_index = np.where(l_score == l_score.max()) #Il peut y avoir plusieurs max 
print(l_score.max())
print(np.where(l_score == l_score.max()))

array0 = list(best_parametrization_index[0])
print(array0)
array1 = list(best_parametrization_index[1])
print(array1)

best_parametrization = []
for eps_index, learning_index in zip(array0,array1):
    best_parametrization.append((l_epsilon_rate[eps_index], l_rate[learning_index]))
print('List of best parametrization:', best_parametrization)
    
#best_parametrization[0][i] for i in {0,1} are respectively the epsilon_decay and the learning_rate which provide the most efficient learning

0.5
(array([0, 0, 1]), array([0, 1, 1]))
[0, 0, 1]
[0, 1, 1]
List of best parametrization: [(0.75, 0.0005), (0.75, 0.0015), (0.999, 0.0015)]


best_parametrization_index contains the index of the parametrizations which probide the most efficient learning. Now we want to find the best batch_size. We will use the same process, first we create a cobra_matrix :

In [188]:
matrix_cobra_bis = []

l_batch_size = np.linspace(32,100,5)

for param in best_parametrization :
    cobra_line_bis = []
    for batch_sz in l_batch_size :
        cobra_line_bis.append(Cobra(epsilon_decay = param[0], learning_rate = param[1], batch_size = batch_sz))
    matrix_cobra_bis.append(cobra_line_bis)
print(matrix_cobra_bis)

[[<__main__.Cobra object at 0x7fb0a44b72d0>, <__main__.Cobra object at 0x7fb0854ce090>, <__main__.Cobra object at 0x7fb0a16ad150>, <__main__.Cobra object at 0x7fb0a0b73650>, <__main__.Cobra object at 0x7fb0a070c450>], [<__main__.Cobra object at 0x7fb0a3aafb50>, <__main__.Cobra object at 0x7fb087872e90>, <__main__.Cobra object at 0x7fb0a2daee10>, <__main__.Cobra object at 0x7fb086823190>, <__main__.Cobra object at 0x7fb0866fd550>], [<__main__.Cobra object at 0x7fb0a1281e10>, <__main__.Cobra object at 0x7fb0a2fce250>, <__main__.Cobra object at 0x7fb0a3c6e310>, <__main__.Cobra object at 0x7fb087e96a90>, <__main__.Cobra object at 0x7fb0a167c150>]]


In [208]:
batch_score = []
batch_score_progress = []


for cobra_line_bis in matrix_cobra_bis :
    batch_score_line = []
    batch_score_progress_line = []
    for cobra in cobra_line  :
        
        print("epsilon_decay : ", cobra.epsilon_decay)
        print("learning_rate : ", cobra.learning_rate)
        print("btach size : ", cobra.batch_size)
        
        score_evol, real_score = cobra_main(dqn_agent = cobra, nb_gens = 50, max_action = 100, show = False)
        
        batch_score_line.append(np.mean(real_score))
        batch_score_progress_line.append(score_evol)
        print('The score :', np.mean(real_score))
    batch_score.append(batch_score_line)
    batch_score_progress.append(batch_score_progress_line)
    

print(batch_score)

epsilon_decay :  0.999
learning_rate :  0.0005


AttributeError: 'Cobra' object has no attribute 'batch_size'