In [59]:
# Auto reload modules
%load_ext autoreload
%autoreload 2


from ScheduleGym import ScheduleGym
import gymnasium as gym
import numpy as np

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Create one environment

In [60]:
env = ScheduleGym(num_days=2, num_hours=4, num_classes=1, num_subjects=2)


In [61]:
state_dim = env.get_state_sizes()[0]
action_dims = env.get_action_sizes()
print(f'State dimensions: {state_dim}, Action dimensions: {action_dims}')

State dimensions: 10, Action dimensions: [1, 2, 4, 2]


In [62]:
state, info = env.reset()
state

array([ 0.04225352,  0.05633803, -0.01408451, -0.01408451, -0.01408451,
       -0.01408451, -0.01408451, -0.01408451, -0.01408451, -0.01408451])

In [63]:
env.render()

Class 1:
Day 1: [-1 -1 -1 -1]
Day 2: [-1 -1 -1 -1]

Fitness: -7.0, Actions left: 70
Target Hours:
Class 1: [3 4]


In [64]:
action = np.random.randint(action_dims)
print(f'Performing action {action}')
state, reward, done, truncated, info = env.step(action)
print(f'State: {state}')
print(f'Reward: {reward}, Done: {done}, Truncated: {truncated}')
env.render()


Performing action [0 1 1 0]
State: [ 0.02816901  0.05633803 -0.01408451 -0.01408451 -0.01408451 -0.01408451
 -0.01408451  0.         -0.01408451 -0.01408451]
Reward: -0.10000000000000009, Done: False, Truncated: False
Class 1:
Day 1: [-1 -1 -1 -1]
Day 2: [-1  0 -1 -1]

Fitness: -6.0, Actions left: 69
Target Hours:
Class 1: [2 4]


# Create several environments

In [75]:
num_envs = 5
# envs = gym.vector.AsyncVectorEnv(
#     [lambda: ScheduleGym(num_days=2, num_hours=4, num_classes=1, num_subjects=2) for i in range(num_envs)]
# )

envs = gym.vector.SyncVectorEnv(
    [lambda: ScheduleGym(num_days=2, num_hours=4, num_classes=1, num_subjects=2) for i in range(num_envs)]
)

In [76]:
envs.single_observation_space

Box(0.0, 1.0, (10,), float32)

In [77]:
envs.single_action_space

MultiDiscrete([1 2 4 2])

In [78]:
action_dims

[1, 2, 4, 2]

In [79]:
envs.reset()

(array([[ 0.04225352,  0.05633803, -0.01408451, -0.01408451, -0.01408451,
         -0.01408451, -0.01408451, -0.01408451, -0.01408451, -0.01408451],
        [ 0.05882353,  0.03921569, -0.01960784, -0.01960784, -0.01960784,
         -0.01960784, -0.01960784, -0.01960784, -0.01960784, -0.01960784],
        [ 0.07317073,  0.02439024, -0.02439024, -0.02439024, -0.02439024,
         -0.02439024, -0.02439024, -0.02439024, -0.02439024, -0.02439024],
        [ 0.04225352,  0.05633803, -0.01408451, -0.01408451, -0.01408451,
         -0.01408451, -0.01408451, -0.01408451, -0.01408451, -0.01408451],
        [ 0.05882353,  0.03921569, -0.01960784, -0.01960784, -0.01960784,
         -0.01960784, -0.01960784, -0.01960784, -0.01960784, -0.01960784]],
       dtype=float32),
 {})

In [86]:
#Create a batch of actions, one for each environment
actions = np.random.randint(low=np.zeros(len(action_dims)), high=action_dims, size=(num_envs, len(action_dims)))
actions

states, rewards, dones, truncated, infos = envs.step(actions)
print(f'States: {states}')
print(f'Rewards: {rewards}')
print(f'Dones: {dones}')
print(f'Truncated: {truncated}')
print(f'Infos: {infos}')

States: [[ 0.04225352  0.01408451 -0.01408451 -0.01408451 -0.01408451  0.01408451
   0.01408451 -0.01408451 -0.01408451  0.01408451]
 [ 0.03278688  0.06557377 -0.01639344 -0.01639344 -0.01639344 -0.01639344
  -0.01639344 -0.01639344 -0.01639344 -0.01639344]
 [ 0.05882353  0.03921569 -0.01960784 -0.01960784 -0.01960784 -0.01960784
  -0.01960784 -0.01960784 -0.01960784 -0.01960784]
 [ 0.02816901  0.02816901 -0.01408451 -0.01408451 -0.01408451 -0.01408451
   0.01408451  0.          0.01408451 -0.01408451]
 [ 0.01960784  0.          0.         -0.01960784  0.01960784 -0.01960784
  -0.01960784  0.01960784  0.         -0.01960784]]
Rewards: [-0.1  49.9  39.45 -2.1  -0.1 ]
Dones: [False  True  True False False]
Truncated: [False False False False False]
Infos: {'final_observation': array([None,
       array([ 0.        ,  0.        , -0.01960784,  0.        ,  0.01960784,
               0.01960784, -0.01960784, -0.01960784,  0.        ,  0.        ]),
       array([ 0.        ,  0.        ,  

In [87]:
envs.envs[1].render()

Class 1:
Day 1: [-1 -1 -1 -1]
Day 2: [-1 -1 -1 -1]

Fitness: -6.0, Actions left: 60
Target Hours:
Class 1: [2 4]
