# Jupyter Snake

In [1]:
import os

import torch.xpu

from algorithms import *
from snake_environment import *
from states_bracket import *
from epsilon_scheduler import *
from DeepDQL import *

In [2]:
# Bracketer
bracketer = VonNeumann1NeighPlusFoodDirectionBracket()
# General Settings 
gamma = 0.99
lr_v = 0.15
n_episodes = 25000
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.999, minimum=0.15)

## QLearning

In [None]:
# Environment
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
Q_p = QLearning(env.action_space.n, gamma=gamma, lr_v=lr_v)
Q_p.learning(env, epsilon_schedule, n_episodes, bracketer)

In [None]:
path = "./models/"

In [None]:
name = f"QLearning gamma {gamma} lr {lr_v} epsilon linear from 1 to 015 with 0999 as coefficient vn1 plus fd 1000 max iterations"
Q_p.save(f"{path}{name}")

In [None]:
Q_p.upload(f"{path}{name}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
Q_p.play(env, bracketer)

In [None]:
Q_p.print_q_values(bracketer)

## SARSA

In [None]:
# Environment
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.9999, minimum=0.30)
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
SARSA_p = SARSA(env.action_space.n, gamma=gamma, lr_v=lr_v)
bracketer = FoodDirectionBracket()
SARSA_p.learning(env, epsilon_schedule, n_episodes = 50000, bracketer = bracketer)

In [None]:
path = "./models/"
name = f"SARSA 50000 episodes gamma {gamma} lr {lr_v} epsilon linear from 1 to 030 with 09999 as coefficient vn1 plus fd 1000 max iterations"
SARSA_p.save(f"{path}{name}")

In [None]:
SARSA_p.upload(f"{path}{name}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
SARSA_p.play(env, bracketer)

In [None]:
SARSA_p.print_q_values(bracketer)

## DDQN


In [2]:
BATCH_SIZE = 128
MEMORY_SIZE = 10000
TARGET_UPDATE_FREQ = 200

# Bracketer
bracketer = FoodRelativePositionBracket()
# General Settings
gamma = 0.95
lr_v = 0.001
epsilon = 0.1
n_episodes = 5001
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.999, minimum=0.05)
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'xpu' if torch.xpu.is_available() else device
print(f"Using device: {device}")

# Environment
env = SnakeEnv(render_mode="nonhuman")
state_dim = bracketer.get_state_dim()

ddqn = DeepDoubleQLearning(
    env.action_space.n,
    state_dim=state_dim,
    gamma=gamma,
    lr_v=lr_v,
    batch_size=BATCH_SIZE,
    memory_size=MEMORY_SIZE,
    target_update_freq=TARGET_UPDATE_FREQ,
    device=device
)

Using device: cpu


In [3]:
env = SnakeEnv(render_mode="nonhuman")
ddqn.learning(env, epsilon_schedule, n_episodes, bracketer)

iteration 0 : epsilon 0.999
iteration 500 : epsilon 0.6057725659163237
iteration 1000 : epsilon 0.36732772934619257
iteration 1500 : epsilon 0.22273980093919937
iteration 2000 : epsilon 0.13506472547210188
iteration 2500 : epsilon 0.08190040571973876
iteration 3000 : epsilon 0.05
iteration 3500 : epsilon 0.05
iteration 4000 : epsilon 0.05
iteration 4500 : epsilon 0.05
iteration 5000 : epsilon 0.05


Learning finished


Episode 0 : Performance -11.5
Episode 100 : Performance -54.5
Episode 200 : Performance -40.5
Episode 300 : Performance -29.0
Episode 400 : Performance -21.0
Episode 500 : Performance -18.5
Episode 600 : Performance -29.0
Episode 700 : Performance -14.5
Episode 800 : Performance 2.5
Episode 900 : Performance -18.5
Episode 1000 : Performance -10.5
Episode 1100 : Performance -1.0
Episode 1200 : Performance -21.0
Episode 1300 : Performance 3.0
Episode 1400 : Performance -12.5
Episode 1500 : Performance 23.5
Episode 1600 : Performance 13.0
Episode 1700 : Performance 4.0
Epis

In [4]:
current_path = os.getcwd()
model_path = os.path.join(current_path, "models/")
print(f"Model path: {model_path}")

Model path: D:\university\reinforcement\project_2\models/


In [5]:
ddqn.save(path=f"{model_path}DDQN gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes} bracketer {bracketer.__class__.__name__}")

In [6]:
ddqn.upload(f"{model_path}DDQN gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes} bracketer {bracketer.__class__.__name__}")

In [21]:
env = SnakeEnv(render_mode='human', max_step=1000)
ddqn.play(env, bracketer)

## DDQN - Ruben Edition

In [None]:
env = SnakeEnv(render_mode="human")

deepDQL = DeepDQL(env, NN)

returns = deepDQL.learnQ(n_traj = 5000, n_traj_for_Qtarget_update=200, batch_size=128)
deepDQL.evaluation_averaged()
plt.plot(returns)
plt.show()

## Monte Carlo

In [None]:
env = SnakeEnv(render_mode="nonhuman")
n_episodes = 5000
MC = Montecarlo(env.action_space.n, gamma=gamma, lr_v=lr_v)

In [None]:
MC.learning(env, epsilon, n_episodes, bracketer)

In [None]:
path = "./models/"
MC.save(f"{path}MC gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes}")

In [None]:
env = SnakeEnv(render_mode="human")
MC = Montecarlo(env.action_space.n, gamma=gamma, lr_v=lr_v)
MC.upload(f"{path}MC gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes}")

In [None]:
MC.play(env, bracketer)