# Initialization

In [1]:
from agent_configs.dqn.rainbow_config import RainbowConfig
import gymnasium as gym
import torch

import sys
sys.path.append('..')

from dqn.rainbow.rainbow_agent import RainbowAgent

agent_callable = RainbowAgent # RainbowAgent etc
config_callable = RainbowConfig # RainbowConfig etc
base_name = "Rainbow"

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# K-Armed Bandits

## K-Armed Bandits

In [None]:
game_config = ArmedBanditsConfig()
config_dict = {}
config = config_callable(config_dict, game_config)

env = gym.make('ArmedBandits-v0')
agent = agent_callable(env, config, name=f'{base_name}_ArmedBandits-v0', device=device)
agent.train()

## Non Stationairy K-Armed Bandits

In [None]:
game_config = NonstationaryArmedBanditsConfig()
config_dict = {}
config = config_callable(config_dict, game_config)

env = gym.make('NonstationaryArmedBandits-v0')
agent = agent_callable(env, config, name=f'{base_name}_NonstationaryArmedBandits-v0', device=device)
agent.train()

# Classic Control

## CartPole-v1

In [None]:
from game_configs.cartpole_config import CartPoleConfig

game_config = CartPoleConfig()
config_dict = {}
config = config_callable(config_dict, game_config)

env = gym.make("CartPole-v1", render_mode="rgb_array")
agent = agent_callable(env, config, name=f'{base_name}_CartPole-v1', device=device)
agent.train()

## Discrete Classic Control
1. CartPole-v1
2. Acrobot-v1
3. LunarLander-v2
4. MountainCar-v0 (600 steps)

In [None]:
from game_configs.classiccontrol_config import ClassicControlConfig

game_config = ClassicControlConfig()
config_dict = {}
config = config_callable

env = gym.make("CartPole-v1", render_mode="rgb_array")
(config_dict, game_config)
agent = agent_callable(env, config, name=f'{base_name}_DiscreteClassicControl_CartPole-v1', device=device)
agent.train()

env = gym.make("Acrobot-v1", render_mode="rgb_array")
agent = agent_callable(env, config, name=f'{base_name}_DiscreteClassicControl_Acrobot-v1', device=device)
agent.train()

env = gym.make("LunarLander-v2", render_mode="rgb_array")
agent = agent_callable(env, config, name=f'{base_name}_DiscreteClassicControl_LunarLander-v2', device=device)
agent.train()

env = gym.make("MountainCar-v0", render_mode="rgb_array", max_episode_steps=600)
agent = agent_callable(env, config, name=f'{base_name}_DiscreteClassicControl_MountainCar-v0', device=device)
agent.train()

## Continuous Classic Control

# Toy Text
Includes highly random single player games (Blackjack)

## Blackjack

In [None]:
from game_configs.blackjack_config import BlackjackConfig
from utils.utils import KLDivergenceLoss

game_config = BlackjackConfig()
config_dict = {
    "dense_layers": [128, 128],
    "batch_size": 32,
    "learning_rate": 0.0001,
    "n_steps": 3,
    "loss_function": KLDivergenceLoss(),
    "training_steps": 100000,
    "atom_size": 51,
}
config = config_callable(config_dict, game_config)

env = gym.make("Blackjack-v1", render_mode="rgb_array")
agent = agent_callable(env, config, name=f'{base_name}_Blackjack-v1', device=device)
agent.checkpoint_trials = 1000
agent.train()

In [1]:
from game_configs.toytext_config import ToyTextConfig
from utils.utils import KLDivergenceLoss

game_config = ToyTextConfig()
config_dict = {
    "dense_layers": [128, 128],
    "batch_size": 32,
    "learning_rate": 0.005,
    "n_steps": 3,
    "loss_function": KLDivergenceLoss(),
    "training_steps": 30000,
    "atom_size": 51,
}
config = config_callable(config_dict, game_config)

# env = gym.make("Blackjack-v1", render_mode="rgb_array")
# agent = agent_callable(env, config, name=f'{base_name}_ToyText_Blackjack-v1', device=device)
# agent.checkpoint_trials = 1000
# agent.train()

# env = gym.make("FrozenLake-v1", render_mode="rgb_array")
# agent = agent_callable(env, config, name=f'{base_name}_ToyText_FrozenLake-v1', device=device)
# agent.checkpoint_trials = 10
# agent.train()

# env = gym.make("Taxi-v3", render_mode="rgb_array")
# agent = agent_callable(env, config, name=f'{base_name}_ToyText_Taxi-v3', device=device)
# agent.checkpoint_trials = 10
# agent.train()

env = gym.make("CliffWalking-v0", render_mode="rgb_array", max_episode_steps=500)
agent = agent_callable(env, config, name=f'{base_name}_ToyText_CliffWalking-v0', device=device)
agent.checkpoint_trials = 10
agent.train()

NameError: name 'config_callable' is not defined

# MuJoCo

# Board Games

## 2 Player Zero-Sum Perfect Information Deterministic Competitive

### Tic Tac Toe

### Connect 4

### Chess

## 2-Player Zero-Sum Imperfect Information Non Deterministic Competitive

### 2 Player LeDuc Hold'em

### 2 Player Texas Hold'em

## 3+ Player Zero-Sum Imperfect Information Non Deterministic Competitive

## Mississippi Marbles

## Leduc Hold'em

## Texas Hold'em

## 3+ Player Zero-Sum Perfect Information Deterministic Competitive

## 4 Player Chess