# Initialization

In [1]:
from agent_configs.dqn.rainbow_config import RainbowConfig
import gymnasium as gym
import torch

import sys
sys.path.append('..')

from dqn.rainbow.rainbow_agent import RainbowAgent

agent_callable = RainbowAgent # RainbowAgent etc
config_callable = RainbowConfig # RainbowConfig etc
base_name = "Rainbow"

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# K-Armed Bandits

## K-Armed Bandits

In [None]:
game_config = ArmedBanditsConfig()
config_dict = {}
config = config_callable(config_dict, game_config)

env = gym.make('ArmedBandits-v0')
agent = agent_callable(env, config, name=f'{base_name}_ArmedBandits-v0', device=device)
agent.train()

## Non Stationairy K-Armed Bandits

In [None]:
game_config = NonstationaryArmedBanditsConfig()
config_dict = {}
config = config_callable(config_dict, game_config)

env = gym.make('NonstationaryArmedBandits-v0')
agent = agent_callable(env, config, name=f'{base_name}_NonstationaryArmedBandits-v0', device=device)
agent.train()

# Classic Control

## CartPole-v1

In [None]:
from game_configs.cartpole_config import CartPoleConfig

game_config = CartPoleConfig()
config_dict = {}
config = config_callable(config_dict, game_config)

env = gym.make("CartPole-v1", render_mode="rgb_array")
agent = agent_callable(env, config, name=f'{base_name}_CartPole-v1', device=device)
agent.train()

## Discrete Classic Control
1. CartPole-v1
2. Acrobot-v1
3. LunarLander-v2
4. MountainCar-v0 (600 steps)

In [None]:
from game_configs.classiccontrol_config import ClassicControlConfig

game_config = ClassicControlConfig()
config_dict = {}
config = config_callable

env = gym.make("CartPole-v1", render_mode="rgb_array")
(config_dict, game_config)
agent = agent_callable(env, config, name=f'{base_name}_DiscreteClassicControl_CartPole-v1', device=device)
agent.train()

env = gym.make("Acrobot-v1", render_mode="rgb_array")
agent = agent_callable(env, config, name=f'{base_name}_DiscreteClassicControl_Acrobot-v1', device=device)
agent.train()

env = gym.make("LunarLander-v2", render_mode="rgb_array")
agent = agent_callable(env, config, name=f'{base_name}_DiscreteClassicControl_LunarLander-v2', device=device)
agent.train()

env = gym.make("MountainCar-v0", render_mode="rgb_array", max_episode_steps=600)
agent = agent_callable(env, config, name=f'{base_name}_DiscreteClassicControl_MountainCar-v0', device=device)
agent.train()

## Continuous Classic Control

# Toy Text
Includes highly random single player games (Blackjack)

## Blackjack

In [None]:
from game_configs.blackjack_config import BlackjackConfig
from utils.utils import KLDivergenceLoss

game_config = BlackjackConfig()
config_dict = {
    "dense_layers": [128, 128],
    "batch_size": 32,
    "learning_rate": 0.0001,
    "n_steps": 3,
    "loss_function": KLDivergenceLoss(),
    "training_steps": 100000,
    "atom_size": 51,
}
config = config_callable(config_dict, game_config)

env = gym.make("Blackjack-v1", render_mode="rgb_array")
agent = agent_callable(env, config, name=f'{base_name}_Blackjack-v1', device=device)
agent.checkpoint_trials = 1000
agent.train()

In [2]:
from game_configs.toytext_config import ToyTextConfig
from utils.utils import KLDivergenceLoss

game_config = ToyTextConfig()
config_dict = {
    "dense_layers": [128, 128],
    "batch_size": 32,
    "learning_rate": 0.005,
    "n_steps": 3,
    "loss_function": KLDivergenceLoss(),
    "training_steps": 30000,
    "atom_size": 51,
}
config = config_callable(config_dict, game_config)

# env = gym.make("Blackjack-v1", render_mode="rgb_array")
# agent = agent_callable(env, config, name=f'{base_name}_ToyText_Blackjack-v1', device=device)
# agent.checkpoint_trials = 1000
# agent.train()

# env = gym.make("FrozenLake-v1", render_mode="rgb_array")
# agent = agent_callable(env, config, name=f'{base_name}_ToyText_FrozenLake-v1', device=device)
# agent.checkpoint_trials = 10
# agent.train()

# env = gym.make("Taxi-v3", render_mode="rgb_array")
# agent = agent_callable(env, config, name=f'{base_name}_ToyText_Taxi-v3', device=device)
# agent.checkpoint_trials = 10
# agent.train()

env = gym.make("CliffWalking-v0", render_mode="rgb_array", max_episode_steps=500)
agent = agent_callable(env, config, name=f'{base_name}_ToyText_CliffWalking-v0', device=device)
agent.checkpoint_trials = 10
agent.train()

Using default save_intermediate_weights     : False
Using         training_steps                : 30000
Using default adam_epsilon                  : 1e-06
Using         learning_rate                 : 0.005
Using default clipnorm                      : 0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using         loss_function                 : <utils.utils.KLDivergenceLoss object at 0x17efc4fe0>
Using default activation                    : relu
Using         kernel_initializer            : None
Using default minibatch_size                : 64
Using default replay_buffer_size            : 5000
Using default min_replay_buffer_size        : 64
Using default num_minibatches               : 1
Using default training_iterations           : 1
Using default conv_layers                   : []
Using default dense_layers_widths           : [128]
Using default value_hidden_layers_widths    : []
Using default advantage_hidden_layers_widths: []
Using default noisy_s

  logger.warn(


int64
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_1000/videos/Rainbow_ToyText_CliffWalking-v0/1000/Rainbow_ToyText_CliffWalking-v0-episode-9.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_1000/videos/Rainbow_ToyText_CliffWalking-v0/1000/Rainbow_ToyText_CliffWalking-v0-episode-9.mp4



  axs[row][col].legend()
  axs[row][col].set_xlim(1, len(values))
  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_1000/videos/Rainbow_ToyText_CliffWalking-v0/1000/Rainbow_ToyText_CliffWalking-v0-episode-9.mp4
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_2000/videos/Rainbow_ToyText_CliffWalking-v0/2000/Rainbow_ToyText_CliffWalking-v0-episode-19.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_2000/videos/Rainbow_ToyText_CliffWalking-v0/2000/Rainbow_ToyText_CliffWalking-v0-episode-19.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_2000/videos/Rainbow_ToyText_CliffWalking-v0/2000/Rainbow_ToyText_CliffWalking-v0-episode-19.mp4
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_3000/videos/Rainbow_ToyText_CliffWalking-v0/3000/Rainbow_ToyText_CliffWalking-v0-episode-29.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_3000/videos/Rainbow_ToyText_CliffWalking-v0/3000/Rainbow_ToyText_CliffWalking-v0-episode-29.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_3000/videos/Rainbow_ToyText_CliffWalking-v0/3000/Rainbow_ToyText_CliffWalking-v0-episode-29.mp4
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_4000/videos/Rainbow_ToyText_CliffWalking-v0/4000/Rainbow_ToyText_CliffWalking-v0-episode-39.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_4000/videos/Rainbow_ToyText_CliffWalking-v0/4000/Rainbow_ToyText_CliffWalking-v0-episode-39.mp4



                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_4000/videos/Rainbow_ToyText_CliffWalking-v0/4000/Rainbow_ToyText_CliffWalking-v0-episode-39.mp4
score:  -200


  axs[row][col].legend()


score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_5000/videos/Rainbow_ToyText_CliffWalking-v0/5000/Rainbow_ToyText_CliffWalking-v0-episode-49.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_5000/videos/Rainbow_ToyText_CliffWalking-v0/5000/Rainbow_ToyText_CliffWalking-v0-episode-49.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_5000/videos/Rainbow_ToyText_CliffWalking-v0/5000/Rainbow_ToyText_CliffWalking-v0-episode-49.mp4
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_6000/videos/Rainbow_ToyText_CliffWalking-v0/6000/Rainbow_ToyText_CliffWalking-v0-episode-59.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_6000/videos/Rainbow_ToyText_CliffWalking-v0/6000/Rainbow_ToyText_CliffWalking-v0-episode-59.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_6000/videos/Rainbow_ToyText_CliffWalking-v0/6000/Rainbow_ToyText_CliffWalking-v0-episode-59.mp4
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_7000/videos/Rainbow_ToyText_CliffWalking-v0/7000/Rainbow_ToyText_CliffWalking-v0-episode-69.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_7000/videos/Rainbow_ToyText_CliffWalking-v0/7000/Rainbow_ToyText_CliffWalking-v0-episode-69.mp4



                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_7000/videos/Rainbow_ToyText_CliffWalking-v0/7000/Rainbow_ToyText_CliffWalking-v0-episode-69.mp4
score:  -200


  axs[row][col].legend()


score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_8000/videos/Rainbow_ToyText_CliffWalking-v0/8000/Rainbow_ToyText_CliffWalking-v0-episode-79.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_8000/videos/Rainbow_ToyText_CliffWalking-v0/8000/Rainbow_ToyText_CliffWalking-v0-episode-79.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_8000/videos/Rainbow_ToyText_CliffWalking-v0/8000/Rainbow_ToyText_CliffWalking-v0-episode-79.mp4
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_9000/videos/Rainbow_ToyText_CliffWalking-v0/9000/Rainbow_ToyText_CliffWalking-v0-episode-89.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_9000/videos/Rainbow_ToyText_CliffWalking-v0/9000/Rainbow_ToyText_CliffWalking-v0-episode-89.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_9000/videos/Rainbow_ToyText_CliffWalking-v0/9000/Rainbow_ToyText_CliffWalking-v0-episode-89.mp4
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_10000/videos/Rainbow_ToyText_CliffWalking-v0/10000/Rainbow_ToyText_CliffWalking-v0-episode-99.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_10000/videos/Rainbow_ToyText_CliffWalking-v0/10000/Rainbow_ToyText_CliffWalking-v0-episode-99.mp4



                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_10000/videos/Rainbow_ToyText_CliffWalking-v0/10000/Rainbow_ToyText_CliffWalking-v0-episode-99.mp4
score:  -200


  axs[row][col].legend()


score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
score:  -200
Moviepy - Building video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_11000/videos/Rainbow_ToyText_CliffWalking-v0/11000/Rainbow_ToyText_CliffWalking-v0-episode-109.mp4.
Moviepy - Writing video checkpoints/Rainbow_ToyText_CliffWalking-v0/step_11000/videos/Rainbow_ToyText_CliffWalking-v0/11000/Rainbow_ToyText_CliffWalking-v0-episode-109.mp4



                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_ToyText_CliffWalking-v0/step_11000/videos/Rainbow_ToyText_CliffWalking-v0/11000/Rainbow_ToyText_CliffWalking-v0-episode-109.mp4
score:  -200


  axs[row][col].legend()


# MuJoCo

# Board Games

## 2 Player Zero-Sum Perfect Information Deterministic Competitive

### Tic Tac Toe

### Connect 4

### Chess

## 2-Player Zero-Sum Imperfect Information Non Deterministic Competitive

### 2 Player LeDuc Hold'em

### 2 Player Texas Hold'em

## 3+ Player Zero-Sum Imperfect Information Non Deterministic Competitive

## Mississippi Marbles

## Leduc Hold'em

## Texas Hold'em

## 3+ Player Zero-Sum Perfect Information Deterministic Competitive

## 4 Player Chess