# Optimization of the snake parameters

The goal is to find the best parametrization for Cobra, i.e get the parameters that make the learninship as efficient as possible. First let import the files we need : 

In [188]:
import os
import sys
import math
import pygame
import random

import tensorflow
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

import numpy as np

from collections import deque

import numpy as np

from Env import Env
from Viper import Viper

In [189]:
class Cobra :
    def __init__(self, epsilon_decay = 0.995, learning_rate = 0.001):
        self.memory  = deque(maxlen=2000)

        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate

        self.model = self.create_model()
        # "hack" implemented by DeepMind to improve convergence
        self.target_model = self.create_model()

    def create_model(self):
        model = Sequential()
        model.add(Dense(10, input_dim=8, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(4, activation='linear'))
        model.compile(loss="mean_squared_error", optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, new_state, done):
        self.memory.append([state, action, reward, new_state, done])

    def replay(self):
        batch_size = 32
        if len(self.memory) < batch_size:
            return

        samples = random.sample(self.memory, batch_size)
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(new_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)

    def target_train(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i]
        self.target_model.set_weights(target_weights)


    def act(self, state):
        # Exploration
        if np.random.random() < self.epsilon:
            return random.randint(0,3)

        return np.argmax(self.model.predict(state)[0])

In [190]:
def cobra_main(dqn_agent = Cobra(), nb_gens=10, max_action=500, show=True):
    """
    Learns and play Snake
    dqn_agent : Agent with preset
    nb_gens = number of games for training
    max_action = max number of action before next game
    show : True if you want to play with no randomness after training
    """
    env = Env(False)

    # Nb games played with no random move
    nb_games = 2

    # Recording score
    scores = [0] * nb_gens

    for trial in range(nb_gens + nb_games):
        print("Game : ", trial, " | Epsilon is ", dqn_agent.epsilon)

        # Does not show game until this end of the traning
        if trial == nb_gens and show:
            print("Traning Done")
            env.init_pygame()
            pygame.event.get()
            dqn_agent.epsilon = 0
            print('--- Cobra is playing ---')

        cur_state = np.array(env.reset()).reshape(1,8)
        for step in range(max_action):
            # Get the action from Cobra based on the current state
            action = dqn_agent.act(cur_state)

            # Applying move to environement to get a reward and a new state
            new_state, reward, done = env.step(action)

            # reward = reward if not done else -20
            new_state = np.array(new_state).reshape(1,8)

            # Remember the state, the move and the reward it got.
            dqn_agent.remember(cur_state, action, reward, new_state, done)

            cur_state = new_state
            # If has lost/won
            if done:
                dqn_agent.replay()
                dqn_agent.target_train()
                break
        if trial < nb_gens:
            scores[trial] = env.score
    print("Done")
    return scores

To get the best parameters, you will train different Cobras using different values. The parameters we are going to modify are the followings :
- self.epsilon_decay
- self.learning_rate

In [201]:
l_epsilon_rate = np.linspace(0,1,5)
l_rate = np.linspace(0.0005,0.0015,3)
print(l_epsilon_rate)
print(l_rate)

[0.   0.25 0.5  0.75 1.  ]
[0.0005 0.001  0.0015]


In [202]:
matrix_cobra = []

for eps in l_epsilon_rate :
    matrix_rate = []
    for rate in l_rate :
        matrix_rate.append(Cobra(epsilon_decay = eps, learning_rate = rate))
    matrix_cobra.append(matrix_rate)

for line in matrix_cobra :
    print(line)

[<__main__.Cobra object at 0x7fd69b422690>, <__main__.Cobra object at 0x7fd69b9b4dd0>, <__main__.Cobra object at 0x7fd69b441750>]
[<__main__.Cobra object at 0x7fd697b91510>, <__main__.Cobra object at 0x7fd697c78610>, <__main__.Cobra object at 0x7fd69aaea810>]
[<__main__.Cobra object at 0x7fd69b3c5190>, <__main__.Cobra object at 0x7fd69b7a5e10>, <__main__.Cobra object at 0x7fd69793b790>]
[<__main__.Cobra object at 0x7fd697c54350>, <__main__.Cobra object at 0x7fd69a40a250>, <__main__.Cobra object at 0x7fd697d5e650>]
[<__main__.Cobra object at 0x7fd697c56d50>, <__main__.Cobra object at 0x7fd697c6f550>, <__main__.Cobra object at 0x7fd69b8ed590>]


In [203]:
l_score = []
for line in matrix_cobra :
    for agent in matrix_rate :
        
        print("epsilon_decay : ", agent.epsilon_decay)
        print("learning_rate : ", agent.learning_rate)
        
        score = np.mean(cobra_main(dqn_agent = agent, nb_gens = 10, max_action = 500, show = False))
        l_score.append(score)
        print('The score :', score)

print(l_score)

epsilon_decay :  1.0
learning_rate :  0.0005
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  1.0
Game :  3  | Epsilon is  1.0
Game :  4  | Epsilon is  1.0
Game :  5  | Epsilon is  1.0
Game :  6  | Epsilon is  1.0
Game :  7  | Epsilon is  1.0
Game :  8  | Epsilon is  1.0
Game :  9  | Epsilon is  1.0
Game :  10  | Epsilon is  1.0
Game :  11  | Epsilon is  1.0
Done
The score : 0.1
epsilon_decay :  1.0
learning_rate :  0.001
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  1.0
Game :  3  | Epsilon is  1.0
Game :  4  | Epsilon is  1.0
Game :  5  | Epsilon is  1.0
Game :  6  | Epsilon is  1.0
Game :  7  | Epsilon is  1.0
Game :  8  | Epsilon is  1.0
Game :  9  | Epsilon is  1.0
Game :  10  | Epsilon is  1.0
Game :  11  | Epsilon is  1.0
Done
The score : 0.2
epsilon_decay :  1.0
learning_rate :  0.0015
Game :  0  | Epsilon is  1.0
Game :  1  | Epsilon is  1.0
Game :  2  | Epsilon is  1.0
Game :  3  | Epsilon is  1.0
Game :  

KeyboardInterrupt: 

Remark : the score is evaluated on two games.