# Jupyter Snake

In [2]:
import os

from algorithms import *
from snake_environment import *
from states_bracket import *
from epsilon_scheduler import *
from utils import *

In [5]:
current_path = os.getcwd()
models_path = current_path + "/models/"

In [4]:
# Bracketer
bracketer = VonNeumann1NeighPlusFoodDirectionBracket()
# General Settings 
gamma = 0.99
lr_v = 0.15
n_episodes = 25000
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.999, minimum=0.15)

## QLearning

In [None]:
# Environment
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
Q_p = QLearning(env.action_space.n, gamma=gamma, lr_v=lr_v)
Q_p.learning(env, epsilon_schedule, n_episodes, bracketer)

In [None]:
name_specs = f'linear from 1 to 015 with 0999 as coefficient vn1 plus fd 1000 max iterations'
model_path = f"{models_path}QLearning gamma {gamma} lr {lr_v} epsilon {name_specs}"

In [None]:
Q_p.save(f"{model_path}")

In [None]:
Q_p.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
Q_p.play(env, bracketer)

In [None]:
Q_p.print_q_values(bracketer)

## SARSA

In [None]:
# Environment
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.9999, minimum=0.30)
env = SnakeEnv(render_mode="nonhuman", max_step=1000)
SARSA_p = SARSA(env.action_space.n, gamma=gamma, lr_v=lr_v)
bracketer = FoodDirectionBracket()
SARSA_p.learning(env, epsilon_schedule, n_episodes = 50000, bracketer = bracketer)

In [None]:
name_specs = f'linear from 1 to 030 with 09999 as coefficient vn1 plus fd 1000 max iterations'
model_path = f"{models_path}SARSA gamma {gamma} lr {lr_v} epsilon {name_specs}"
SARSA_p.save(f"{model_path}")

In [None]:
SARSA_p.upload(f"{model_path}")

In [None]:
env = SnakeEnv(render_mode="human", max_step=1000)
SARSA_p.play(env, bracketer)

In [None]:
SARSA_p.print_q_values(bracketer)

## DDQN


In [7]:
BATCH_SIZE = 128
MEMORY_SIZE = 10000
TARGET_UPDATE_FREQ = 200

# Bracketer
bracketer = VonNeumann1NeighPlusFoodRelPosBracket()
# General Settings
gamma = 0.95
lr_v = 0.001
epsilon = 0.1
n_episodes = 5001
epsilon_schedule = LinearEpsilonDecay(eps = 1, coefficient=0.999, minimum=0.05)
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'xpu' if torch.xpu.is_available() else device
print(f"Using device: {device}")

# Environment
env = SnakeEnv(render_mode="nonhuman")
state_dim = bracketer.get_state_dim()

ddqn = DeepDoubleQLearning(
    env.action_space.n,
    state_dim=state_dim,
    gamma=gamma,
    lr_v=lr_v,
    batch_size=BATCH_SIZE,
    memory_size=MEMORY_SIZE,
    target_update_freq=TARGET_UPDATE_FREQ,
    device=device
)

Using device: cpu


In [10]:
env = SnakeEnv(render_mode="nonhuman")
ddqn.learning(env, epsilon_schedule, n_episodes, bracketer)

Episode 169 : epsilon 0.8268805241487632


KeyboardInterrupt: 

In [9]:
model_path = f"{models_path}DDQN gamma {gamma} lr {lr_v} epsilon {epsilon} episodes {n_episodes} bracketer {bracketer.__class__.__name__} "

Model path: D:\university\reinforcement\project_3/models/DDQN gamma 0.95 lr 0.001 epsilon 0.1 episodes 5001 bracketer VonNeumann1NeighPlusFoodRelPosBracket 


In [12]:
ddqn.save(path=f'{model_path}')

In [13]:
ddqn.upload(model_path)

In [14]:
env = SnakeEnv(render_mode='human', max_step=200)
ddqn.play(env, bracketer)

15.5

In [15]:
get_model_average_score(model_name='DDQN', action_space=env.action_space.n, gamma=gamma, lr_v=lr_v, model_path=model_path, bracketer=bracketer, num_episodes=1000, render_mode='nonhuman', state_dim=state_dim, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, target_update_freq=TARGET_UPDATE_FREQ, device=device)

Episode 1000/1000


6.7665

## DDQN - Ruben Edition

In [None]:
env = SnakeEnv(render_mode="human")

deepDQL = DeepDQL(env, NN)

returns = deepDQL.learnQ(n_traj = 5000, n_traj_for_Qtarget_update=200, batch_size=128)
deepDQL.evaluation_averaged()
plt.plot(returns)
plt.show()

## Monte Carlo

In [4]:
env = SnakeEnv(render_mode="nonhuman")
n_episodes = 5000
bracketer = VonNeumann1NeighPlusFoodRelPosBracket()
MC = Montecarlo(env.action_space.n, gamma=gamma, lr_v=lr_v)

In [5]:
MC.learning(env, epsilon_schedule, n_episodes, bracketer)

4999


In [6]:
path = "./models/"
MC.save(f"{path}MC gamma {gamma} lr {lr_v} epsilon {epsilon_schedule} episodes {n_episodes} bracketer {bracketer.__class__.__name__}")

In [61]:
env = SnakeEnv(render_mode="human")
MC.upload(f"{path}MC gamma {gamma} lr {lr_v} epsilon {epsilon_schedule} episodes {n_episodes} bracketer {bracketer.__class__.__name__}")

In [64]:
MC.play(env, bracketer)

-27.5

In [7]:
model_path = f"{path}MC gamma {gamma} lr {lr_v} epsilon {epsilon_schedule} episodes {n_episodes} bracketer {bracketer.__class__.__name__}"

get_model_average_score(model_name='MC', action_space=env.action_space.n, gamma=gamma, lr_v=lr_v, model_path=model_path, bracketer=bracketer, num_episodes=1000, render_mode='nonhuman')

Episode 100/100


-20.965