# Jupyter Snake

In [1]:
import os

from algorithms import *
from snake_environment import *
from states_bracket import *
from epsilon_scheduler import *
from utils import *

In [2]:
current_path = os.getcwd()
models_path = current_path + "/models/"

In [5]:
# Bracketer
bracketer = NeighPlusFoodDirectionBracket(neigh="V", radius=1)
# General Settings 
gamma = 0.99
lr_v = 0.15
n_episodes = 25000
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.999, minimum=0.15)

## QLearning

In [6]:
# Environment
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
Q_p = QLearning(env.action_space.n, gamma=gamma, lr_v=lr_v)
Q_p.learning(env, epsilon_schedule, n_episodes, bracketer)

Episode 24900/25000 : epsilon 0.15


Learning finished


Episode 0 : Performance -47.0
Episode 100 : Performance -11.5
Episode 200 : Performance -10.0
Episode 300 : Performance -23.5
Episode 400 : Performance -15.5
Episode 500 : Performance -21.0
Episode 600 : Performance -24.0
Episode 700 : Performance -21.0
Episode 800 : Performance -16.0
Episode 900 : Performance -45.5
Episode 1000 : Performance -15.5
Episode 1100 : Performance 19.0
Episode 1200 : Performance 2.0
Episode 1300 : Performance -6.5
Episode 1400 : Performance -11.0
Episode 1500 : Performance -26.5
Episode 1600 : Performance -10.5
Episode 1700 : Performance 11.0
Episode 1800 : Performance -15.0
Episode 1900 : Performance -3.0
Episode 2000 : Performance -10.5
Episode 2100 : Performance -15.0
Episode 2200 : Performance -31.0
Episode 2300 : Performance -14.5
Episode 2400 : Performance 4.0
Episode 2500 : Performance 28.5
Episode 2600 : Performance -18.0
Episode 2700 : Performance -19.5
Episode 2800 : Performance -21.5
Episode

In [9]:
name_specs = f'linear from 1 to 015 with 0999 as coefficient vn1 plus fd 1000 max iterations'
model_path = f"./models/QLearning gamma {gamma} lr {lr_v} epsilon {name_specs} 2nd"

In [10]:
Q_p.save(f"{model_path}")

In [11]:
Q_p.upload(f"{model_path}")

In [12]:
env = SnakeEnv(render_mode="human", max_step=1000)
Q_p.play(env, bracketer)

52.5

In [8]:
Q_p.print_q_values(bracketer)

defaultdict(<class 'int'>, {(1, 1, 0, 0, 0, 0, 0, 0, 0, 0): np.float64(37.11972384589169), (1, 1, 0, 0, 0, 0, 0, 0, 0, 1): np.float64(37.7995422901348), (1, 1, 0, 0, 0, 0, 0, 0, 0, 2): np.float64(38.741721421504444), (1, 1, 0, 0, 0, 0, 0, 0, 0, 3): np.float64(40.881465806268785), (1, 0, 0, 0, 0, 0, 0, 0, 0, 0): np.float64(42.559451575383996), (1, 0, 0, 0, 0, 0, 0, 0, 0, 1): np.float64(43.421302199841016), (1, 0, 0, 0, 0, 0, 0, 0, 0, 2): np.float64(37.3916146383655), (1, 0, 0, 0, 0, 0, 0, 0, 0, 3): np.float64(37.02112221580351), (1, 0, 0, 1, 0, 0, 0, 0, 0, 0): np.float64(36.49040184904307), (1, 0, 0, 1, 0, 0, 0, 0, 0, 1): np.float64(36.82669685927253), (1, 0, 0, 1, 0, 0, 0, 0, 0, 2): np.float64(39.779955203778364), (1, 0, 0, 1, 0, 0, 0, 0, 0, 3): np.float64(37.70513754730227), (1, 1, 0, 0, 0, 1, 0, 0, 0, 0): np.float64(36.7396394075434), (1, 1, 0, 0, 0, 1, 0, 0, 0, 1): np.float64(36.80620555367401), (1, 1, 0, 0, 0, 1, 0, 0, 0, 2): -9.999999999999995, (1, 1, 0, 0, 0, 1, 0, 0, 0, 3): np.f

## SARSA

In [None]:
# Environment
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.9999, minimum=0.30)
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
SARSA_p = SARSA(env.action_space.n, gamma=gamma, lr_v=lr_v)
bracketer = FoodDirectionBracket()
SARSA_p.learning(env, epsilon_schedule, n_episodes = 50000, bracketer = bracketer)

In [None]:
name_specs = f'linear from 1 to 030 with 09999 as coefficient vn1 plus fd 1000 max iterations'
model_path = f"{models_path}SARSA gamma {gamma} lr {lr_v} epsilon {name_specs}"
SARSA_p.save(f"{model_path}")

In [None]:
SARSA_p.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
SARSA_p.play(env, bracketer)

In [None]:
SARSA_p.print_q_values(bracketer)

## Double Deep Q-Learning


In [2]:
BATCH_SIZE = 128
MEMORY_SIZE = 10000
TARGET_UPDATE_FREQ = 200

# Bracketer
bracketer = NeighPlusFoodDirectionBracket(neigh="V", radius=1)
# General Settings
gamma = 0.95
lr_v = 0.001
epsilon = 0.1
n_episodes = 5001
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.999, minimum=0.05)
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'xpu' if torch.xpu.is_available() else device
print(f"Using device: {device}")

# Environment
env = SnakeEnv(render_mode="nonhuman")
state_dim = bracketer.get_state_dim()

ddqn = DeepDoubleQLearning(
    env.action_space.n,
    state_dim=state_dim,
    gamma=gamma,
    lr_v=lr_v,
    batch_size=BATCH_SIZE,
    memory_size=MEMORY_SIZE,
    target_update_freq=TARGET_UPDATE_FREQ,
    device=device
)

Using device: cpu


In [7]:
env = SnakeEnv(render_mode="nonhuman")
ddqn.learning(env, epsilon_schedule, n_episodes, bracketer)

Episode 3000/5001 : epsilon 0.05


KeyboardInterrupt: 

In [9]:
model_path = f"{models_path}DDQN gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes} bracketer {bracketer.__class__.__name__} "

In [10]:
ddqn.save(path=f'{model_path}')

In [11]:
ddqn.upload(model_path)

In [13]:
env = SnakeEnv(render_mode='human', max_step=2000)
ddqn.play(env, bracketer)

97.5

In [14]:
get_model_average_score(model_name='DDQN', action_space=env.action_space.n, gamma=gamma, lr_v=lr_v, model_path=model_path, bracketer=bracketer, num_episodes=1000, render_mode='nonhuman', state_dim=state_dim, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, target_update_freq=TARGET_UPDATE_FREQ, device=device)

Episode 1000/1000


16.211

## Atari-like DDQL

In [3]:
BATCH_SIZE = 32
MEMORY_SIZE = 10000
TARGET_UPDATE_FREQ = 200

# Bracketer
width = 20
height = 20
bracketer = FullGrid(width=width, height=height)
# General Settings
gamma = 0.95
lr_v = 0.001
epsilon = 0.1
n_episodes = 20001
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.9999, minimum=0.05)
device = "cuda" if torch.cuda.is_available() else "cpu"
device = 'xpu' if torch.xpu.is_available() else device
print(f"Using device: {device}")

# Environment
env = SnakeEnv(render_mode="nonhuman")
state_dim = bracketer.get_state_dim()

atari = AtariDeepQLearning(
    env.action_space.n,
    gamma=gamma,
    lr_v=lr_v,
    batch_size=BATCH_SIZE,
    memory_size=MEMORY_SIZE,
    target_update_freq=TARGET_UPDATE_FREQ,
    device=device,
    width=width,
    height=height,
    n_layers=1
)

Using device: xpu


In [None]:
env = SnakeEnv(render_mode="nonhuman")
atari.learning(env, epsilon_schedule, n_episodes, bracketer)

In [7]:
model_path = f"{models_path}AtariDQL gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes} bracketer {bracketer.__class__.__name__} "

In [8]:
atari.save(path=f'{model_path}')

In [9]:
atari.upload(model_path)

In [18]:
env = SnakeEnv(render_mode='human', max_step=2000)
atari.play(env, bracketer)

-10

In [16]:
get_model_average_score(model_name='AtariDDQL', action_space=env.action_space.n, gamma=gamma, lr_v=lr_v, model_path=model_path, bracketer=bracketer, num_episodes=1000, render_mode='nonhuman', batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, target_update_freq=TARGET_UPDATE_FREQ, device=device, height=height, width=width, n_layers = 1)

Error uploading model DDQN. Returning...


## Monte Carlo

In [15]:
env = SnakeEnv(render_mode="nonhuman")
n_episodes = 5000
bracketer = NeighPlusFoodDirectionBracket(neigh="V", radius=1)
MC = Montecarlo(env.action_space.n, gamma=gamma, lr_v=lr_v)

In [16]:
MC.learning(env, epsilon_schedule, n_episodes, bracketer)

Episode 1300/5000 : epsilon 0.05


KeyboardInterrupt: 

In [6]:
path = "./models/"
MC.save(f"{path}MC gamma {gamma} lr {lr_v} epsilon {epsilon_schedule} episodes {n_episodes} bracketer {bracketer.__class__.__name__}")

In [19]:
env = SnakeEnv(render_mode="human")
# MC.upload(f"{path}MC gamma {gamma} lr {lr_v} epsilon {epsilon_schedule} episodes {n_episodes} bracketer {bracketer.__class__.__name__}")

In [20]:
MC.play(env, bracketer)

-5.5

In [7]:
model_path = f"{path}MC gamma {gamma} lr {lr_v} epsilon {epsilon_schedule} episodes {n_episodes} bracketer {bracketer.__class__.__name__}"

get_model_average_score(model_name='MC', action_space=env.action_space.n, gamma=gamma, lr_v=lr_v, model_path=model_path, bracketer=bracketer, num_episodes=1000, render_mode='nonhuman')

Episode 100/100


-20.965