# Jupyter Snake

## General Imports

In [1]:
import os

from algorithms import *
from snake_environment import *
from states_bracket import *
from epsilon_scheduler import *
from utils import *

In [2]:
current_path = os.getcwd()
models_path = current_path + "/models/"

In [3]:
# Bracketer
bracketer = NeighPlusFoodDirectionBracket(neigh="V", radius=1)
# General Settings
gamma = 0.99
lr_v = 0.15
n_episodes = 25000
epsilon_schedule = LinearEpsilonDecay(eps=1, coefficient=0.999, minimum=0.15)

## QLearning

In [None]:
# Environment
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
Q_p = QLearning(env.action_space.n, gamma=gamma, lr_v=lr_v)
Q_p.learning(env, epsilon_schedule, n_episodes, bracketer)

Episode 9400/25000 : epsilon 0.15 : Average performance -3.975


In [None]:
name_specs = f'linear from 1 to 015 with 0999 as coefficient vn1 plus fd 1000 max iterations'
model_path = f"{models_path}QLearning gamma {gamma} lr {lr_v} epsilon {name_specs} 2nd"

In [None]:
Q_p.save(f"{model_path}")

In [None]:
Q_p.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
Q_p.play(env, bracketer)

In [None]:
Q_p.print_q_values(bracketer)

##  SARSA

In [None]:
# Environment
epsilon_schedule = LinearEpsilonDecay(eps=1, coefficient=0.9999, minimum=0.30)
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
SARSA_p = SARSA(env.action_space.n, gamma=gamma, lr_v=lr_v)
bracketer = FoodDirectionBracket()
SARSA_p.learning(env, epsilon_schedule, n_episodes=50000, bracketer=bracketer)

In [None]:
name_specs = f'linear from 1 to 030 with 09999 as coefficient vn1 plus fd 1000 max iterations'
model_path = f"{models_path}SARSA gamma {gamma} lr {lr_v} epsilon {name_specs}"

In [None]:
SARSA.save(f"{model_path}")

In [None]:
SARSA.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
SARSA_p.play(env, bracketer)

In [None]:
SARSA.print_q_values(bracketer)

## Monte Carlo

In [None]:
# Environment
env = SnakeEnv(render_mode="nonhuman")
n_episodes = 5000
bracketer = NeighPlusFoodDirectionBracket(neigh="V", radius=1)
MC = Montecarlo(env.action_space.n, gamma=gamma, lr_v=lr_v)

In [None]:
MC.learning(env, epsilon_schedule, n_episodes, bracketer)

In [None]:
model_path = f"{models_path}MC gamma {gamma} lr {lr_v} epsilon {epsilon_schedule} episodes {n_episodes} bracketer {bracketer.__class__.__name__}"

In [None]:
MC.save(f"{model_path}")

In [None]:
MC.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human")
MC.play(env, bracketer)

In [None]:
get_model_average_score(model_name='MC', action_space=env.action_space.n, gamma=gamma, lr_v=lr_v, model_path=model_path, bracketer=bracketer, num_episodes=1000, render_mode='nonhuman')

## Double Deep Q-Learning

In [None]:
# Environment
batch_size = 128
memory_size = 10000
target_update_freq = 200

# Bracketer
bracketer = NeighPlusFoodDirectionBracket(neigh="V", radius=1)
# General Settings
gamma = 0.95
lr_v = 0.001
epsilon = 0.1
n_episodes = 5001
epsilon_schedule = LinearEpsilonDecay(eps=1, coefficient=0.999, minimum=0.05)
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'xpu' if torch.xpu.is_available() else device
print(f"Using device: {device}")

# Environment
env = SnakeEnv(render_mode="nonhuman")
state_dim = bracketer.get_state_dim()

In [None]:
ddql = DeepDoubleQLearning(
    env.action_space.n,
    state_dim=state_dim,
    gamma=gamma,
    lr_v=lr_v,
    batch_size=batch_size,
    memory_size=memory_size,
    target_update_freq=target_update_freq,
    device=device
)

In [None]:
ddql.learning(env, epsilon_schedule, n_episodes, bracketer)

In [None]:
model_path = f"{models_path}DDQN gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes} bracketer {bracketer.__class__.__name__}"

In [None]:
ddql.save(path=f'{model_path}')

In [None]:
ddql.upload(model_path)

In [None]:
env = SnakeEnv(render_mode='human', max_step=2000)
 ddql.play(env, bracketer)

In [None]:
get_model_average_score(
    model_name='DDQN',
    action_space=env.action_space.n,
    gamma=gamma,
    lr_v=lr_v,
    model_path=model_path,
    bracketer=bracketer,
    num_episodes=1000,
    render_mode='nonhuman',
    state_dim=state_dim,
    batch_size=batch_size,
    memory_size=memory_size,
    target_update_freq=target_update_freq,
    device=device
)

## Atari-like DDQL

In [None]:
batch_size = 32
memory_size = 10000
target_update_freq = 200

# Bracketer
width = 20
height = 20
bracketer = FullGrid(width=width, height=height)
# General Settings
gamma = 0.95
lr_v = 0.001
epsilon = 0.1
n_episodes = 20001
epsilon_schedule = LinearEpsilonDecay(eps=1, coefficient=0.9999, minimum=0.05)
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'xpu' if torch.xpu.is_available() else device

In [None]:
atari = AtariDQL(
    env.action_space.n,
    width=width,
    height=height,
    n_layers=1,
    gamma=gamma,
    lr_v=lr_v,
    batch_size=batch_size,
    memory_size=memory_size,
    target_update_freq=target_update_freq,
    device=device
)

In [None]:
atari.learning(env, epsilon_schedule, n_episodes, bracketer)

In [None]:
model_path = f"{models_path}AtariDQL gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes} bracketer {bracketer.__class__.__name__}"

In [None]:
atari.save(path=f'{model_path}')

In [None]:
atari.upload(model_path)

In [None]:
env = SnakeEnv(render_mode='human', max_step=2000)
atari.play(env, bracketer)

In [None]:
get_model_average_score(
    model_name='AtariDDQL',
    action_space=env.action_space.n,
    gamma=gamma,
    lr_v=lr_v,
    model_path=model_path,
    bracketer=bracketer,
    num_episodes=1000,
    render_mode='nonhuman',
    batch_size=batch_size,
    memory_size=memory_size,
    target_update_freq=target_update_freq,
    device=device,
    height=height,
    width=width,
    n_layers=1
)