# PitsAndOrbs Project

## Checking the raw Game

In [1]:
from pitsandorbs import PitsAndOrbs


game = PitsAndOrbs()
game.show_board()

[[0 3 3 0 0]
 [2 0 3 0 0]
 [2 0 0 0 2]
 [2 0 3 0 0]
 [3 0 2 1 0]]
{'player position': (4, 3), 'player direction': '3.south', 'player has orb': False, 'player movements#': 0}

Directions: ['0.west', '1.north', '2.east', '3.south']
Cell Types: ['0.nothing', '1.player', '2.orb', '3.pit', '4.player&orb', '5.player&pit', '6.orb&pit', '7.player&orb&pit', '8.out of bound']
Actions: ['0.turn right', '1.move forward', '2.pick orb up', '3.put orb down']



In [2]:
game.get_observation()

array([[3, 0, 0],
       [2, 1, 0],
       [8, 8, 8]], dtype=uint8)

In [3]:
game.get_info()

{'player position': (4, 3),
 'player direction': '3.south',
 'player has orb': False,
 'player movements#': 0}

In [4]:
game.step(1)

(array([[3, 0, 0],
        [2, 1, 0],
        [8, 8, 8]], dtype=uint8),
 0,
 False,
 {'player position': (4, 3),
  'player direction': '3.south',
  'player has orb': False,
  'player movements#': 0})

In [5]:
game.show_board()

[[0 3 3 0 0]
 [2 0 3 0 0]
 [2 0 0 0 2]
 [2 0 3 0 0]
 [3 0 2 1 0]]
{'player position': (4, 3), 'player direction': '3.south', 'player has orb': False, 'player movements#': 0}

Directions: ['0.west', '1.north', '2.east', '3.south']
Cell Types: ['0.nothing', '1.player', '2.orb', '3.pit', '4.player&orb', '5.player&pit', '6.orb&pit', '7.player&orb&pit', '8.out of bound']
Actions: ['0.turn right', '1.move forward', '2.pick orb up', '3.put orb down']



## Checking the gym Environment

In [6]:
from pitsandorbsenv import PitsAndOrbsEnv


env = PitsAndOrbsEnv()
obs = env.reset()
obs

OrderedDict([('board',
              array([[3, 2, 0],
                     [0, 1, 0],
                     [8, 8, 8]], dtype=uint8)),
             ('player_direction', 3),
             ('player_has_orb', 0)])

In [7]:
env.observation_space.sample()

OrderedDict([('board',
              array([[5, 1, 6],
                     [7, 8, 6],
                     [8, 1, 1]], dtype=uint8)),
             ('player_direction', 1),
             ('player_has_orb', 0)])

In [8]:
env.step(0)

(OrderedDict([('board',
               array([[3, 2, 0],
                      [0, 1, 0],
                      [8, 8, 8]], dtype=uint8)),
              ('player_direction', 0),
              ('player_has_orb', 0)]),
 0,
 False,
 {'player position': (4, 1),
  'player direction': '0.west',
  'player has orb': False,
  'player movements#': 0})

## Checking the gym Environment Wrapper

In [9]:
from pitsandorbsenv import PitsAndOrbsEnv

from onehot_wrapper import OnehotWrapper


env = OnehotWrapper(PitsAndOrbsEnv())
env.reset()

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0.])

## Implementing Reinforcement Learning Approach

In [1]:
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3 import A2C

from utils import make_env

In [2]:
env = make_env(num_stack=4)
env.reset()

<gym.wrappers.frame_stack.LazyFrames at 0x18767def9c0>

In [3]:
env.close()

del env

In [4]:
vec_env = make_vec_env(lambda: make_env(num_stack=4), n_envs=1_000)

n_steps = 10_000_000
model_path = "./models/PitsAndOrbs A2C (stable-baselines3) (10mil iters).zip"

In [5]:
model = A2C("MlpPolicy", vec_env, verbose=0, tensorboard_log=f"./logs/{n_steps: _}")

model.learn(total_timesteps=n_steps, log_interval=1)

model.save(model_path)

In [6]:
vec_env.close()

In [7]:
model = A2C.load(model_path)

In [9]:
env = make_env(num_stack=4)

state = env.reset()
obs = env.render()
rewards = 0

for _ in range(300):
    action, _ = model.predict(state, deterministic=False)
    state, reward, done, info = env.step(action)

    rewards += reward

    print("Chosen Action:", action)
    print()
    obs = env.render()

    if done:
        print(f"Episode is done successfully with {info['player movements#']} movements.")
        break

print("Total Reward:", rewards)

env.close()

[[0 3 2 0 0]
 [1 0 0 0 0]
 [0 0 2 2 3]
 [3 0 3 0 0]
 [2 3 0 2 0]]
{'player position': (1, 0), 'player direction': '3.south', 'player has orb': False, 'player movements#': 0}

Directions: ['0.west', '1.north', '2.east', '3.south']
Cell Types: ['0.nothing', '1.player', '2.orb', '3.pit', '4.player&orb', '5.player&pit', '6.orb&pit', '7.player&orb&pit', '8.out of bound']
Actions: ['0.turn right', '1.move forward', '2.pick orb up', '3.put orb down']

Chosen Action: 0

[[0 3 2 0 0]
 [1 0 0 0 0]
 [0 0 2 2 3]
 [3 0 3 0 0]
 [2 3 0 2 0]]
{'player position': (1, 0), 'player direction': '0.west', 'player has orb': False, 'player movements#': 0}

Directions: ['0.west', '1.north', '2.east', '3.south']
Cell Types: ['0.nothing', '1.player', '2.orb', '3.pit', '4.player&orb', '5.player&pit', '6.orb&pit', '7.player&orb&pit', '8.out of bound']
Actions: ['0.turn right', '1.move forward', '2.pick orb up', '3.put orb down']

Chosen Action: 0

[[0 3 2 0 0]
 [1 0 0 0 0]
 [0 0 2 2 3]
 [3 0 3 0 0]
 [2 3 0 2 0]]
{

In [1]:
!python ./ "./models/PitsAndOrbs A2C (stable-baselines3) (10mil iters).zip" "./gifs/First phase (single agent with 10mil-iters-A2C model).gif"

pygame 2.4.0 (SDL 2.26.4, Python 3.10.10)
Hello from the pygame community. https://www.pygame.org/contribute.html

Using the agent located at: ./models/PitsAndOrbs A2C (stable-baselines3) (10mil iters).zip


Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
Total Rewards: 0, Current Reward: 0
