# Jupyter Snake

## General Imports

In [3]:
import os

from algorithms import *
from snake_environment import *
from states_bracket import *
from epsilon_scheduler import *
from states_bracket import NeighPlusFoodDirectionPlusTailBracket
from utils import *

In [4]:
current_path = os.getcwd()
models_path = current_path + "/models/"

In [None]:
# Bracketer
bracketer = NeighPlusFoodDirectionBracket(neigh="V", radius=1)
# General Settings
gamma = 0.99
lr_v = 0.15
n_episodes = 25000
epsilon_schedule = LinearEpsilonDecay(eps=1, coefficient=0.999, minimum=0.15)

## QLearning

In [None]:
# Environment
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
Q_p = QLearning(env.action_space.n, gamma=gamma, lr_v=lr_v)
Q_p.learning(env, epsilon_schedule, n_episodes, bracketer)

In [None]:
name_specs = f'linear from 1 to 015 with 0999 as coefficient vn1 plus fd 1000 max iterations'
model_path = f"{models_path}QLearning gamma {gamma} lr {lr_v} epsilon {name_specs} 2nd"

In [None]:
Q_p.save(f"{model_path}")

In [None]:
Q_p.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
Q_p.play(env, bracketer)

In [None]:
Q_p.print_q_values(bracketer)

##  SARSA

In [None]:
# Environment
epsilon_schedule = LinearEpsilonDecay(eps=1, coefficient=0.9999, minimum=0.30)
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
SARSA_p = SARSA(env.action_space.n, gamma=gamma, lr_v=lr_v)
bracketer = FoodDirectionBracket()
SARSA_p.learning(env, epsilon_schedule, n_episodes=50000, bracketer=bracketer)

In [None]:
name_specs = f'linear from 1 to 030 with 09999 as coefficient vn1 plus fd 1000 max iterations'
model_path = f"{models_path}SARSA gamma {gamma} lr {lr_v} epsilon {name_specs}"

In [None]:
SARSA.save(f"{model_path}")

In [None]:
SARSA.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
SARSA_p.play(env, bracketer)

In [None]:
SARSA.print_q_values(bracketer)

## Monte Carlo

In [None]:
# Environment
env = SnakeEnv(render_mode="nonhuman")
n_episodes = 5000
bracketer = NeighPlusFoodDirectionBracket(neigh="V", radius=1)
MC = Montecarlo(env.action_space.n, gamma=gamma, lr_v=lr_v)

In [None]:
MC.learning(env, epsilon_schedule, n_episodes, bracketer)

In [None]:
model_path = f"{models_path}MC gamma {gamma} lr {lr_v} epsilon {epsilon_schedule} episodes {n_episodes} bracketer {bracketer.__class__.__name__}"

In [None]:
MC.save(f"{model_path}")

In [None]:
MC.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human")
MC.play(env, bracketer)

In [None]:
get_model_average_performance(model_name='MC', action_space=env.action_space.n, gamma=gamma, lr_v=lr_v, model_path=model_path, bracketer=bracketer, num_episodes=1000, render_mode='nonhuman')

## Double Deep Q-Learning

In [43]:
# Environment
batch_size = 128
memory_size = 10000
target_update_freq = 200

# Bracketer
bracketer = NeighPlusFoodRelativePositionPlusTailBracket(neigh='M', radius=5)

# General Settings
gamma = 0.95
lr_v = 0.001
epsilon = 0.1
n_episodes = 5000
epsilon_schedule = LinearEpsilonDecay(eps=1, coefficient=0.999, minimum=0.05)
device = "cuda" if torch.cuda.is_available() else "cpu"
device = 'xpu' if torch.xpu.is_available() else device
print(f"Using device: {device}")

# Environment
env = SnakeEnv(render_mode="nonhuman")
state_dim = bracketer.get_state_dim()

Using device: xpu


In [44]:
ddql = DeepDoubleQLearning(
    env.action_space.n,
    state_dim=state_dim,
    gamma=gamma,
    lr_v=lr_v,
    batch_size=batch_size,
    memory_size=memory_size,
    target_update_freq=target_update_freq,
    device=device
)

In [7]:
env = SnakeEnv(render_mode='nonhuman', max_step=1000)
ddql.learning(env, epsilon_schedule, n_episodes, bracketer)

KeyboardInterrupt: 

In [45]:
model_path = f"{models_path}DDQL gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes} bracketer {bracketer.__class__.__name__} Moore radius 5"

In [26]:
ddql.save(path=f'{model_path}')

In [46]:
ddql.upload(model_path)

In [10]:
env = SnakeEnv(render_mode='human', max_step=5000)
ddql.play(env, bracketer)

157.0

In [47]:
get_model_average_performance(
    model_name='DDQL',
    action_space=env.action_space.n,
    gamma=gamma,
    lr_v=lr_v,
    model_path=model_path,
    bracketer=bracketer,
    num_episodes=500,
    render_mode='nonhuman',
    state_dim=state_dim,
    batch_size=batch_size,
    memory_size=memory_size,
    target_update_freq=target_update_freq,
    device=device
)

Episode 500/500


(33.984,
 15.596,
 array([114.,   4.,   5.,   6.,   6.,   2.,   3.,   7.,   4.,   7.,   6.,
         11.,   8.,   7.,  14.,  12.,  14.,  23.,  13.,  15.,  16.,  13.,
         20.,  22.,  24.,  23.,  13.,  19.,   9.,  14.,  14.,   8.,   2.,
          8.,   5.,   3.,   3.,   1.,   1.,   1.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.]))