This jupyter gives you a simple example of how you should use the Simulated Network (asynchronous) environment. This environment is not meant as a training ground of your algorithms, but only to check whether or not your algorithm can be executed.

In [2]:
pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-1.1.1-py3-none-any.whl.metadata (9.4 kB)
Collecting cloudpickle>=1.2.0 (from gymnasium)
  Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.1.1-py3-none-any.whl (965 kB)
   ---------------------------------------- 0.0/965.4 kB ? eta -:--:--
   ---------------------------------------- 965.4/965.4 kB 9.0 MB/s eta 0:00:00
Downloading cloudpickle-3.1.1-py3-none-any.whl (20 kB)
Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, cloudpickle, gymnasium
Successfully installed cloudpickle-3.1.1 farama-notifications-0.0.4 gymnasium-1.1.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
import gymnasium as gym
import numpy as np

# Add parent directory to path
import sys
from pathlib import Path
current_dir = Path().resolve()
root_dir = current_dir.parent
if str(root_dir) not in sys.path:
    sys.path.insert(0,str(root_dir))

from Gyms.SimulatedNetwork import SimulatedNetwork

In [4]:
# Define size of state and action spaces
state_dim  = 4 # Dimension of reduced state space
action_dim = 2 # Number of stimuli in action space (each stimulus needs a value of {0,1,2,3,4}

In [5]:
# Create environment and initialize it
env      = SimulatedNetwork(action_dim=action_dim,state_dim=state_dim)
state, _ = env.reset()
env.render() # This function gives you the current state + reward, which both is 0 after initialization

Current state: [0. 0. 0. 0.], Reward: 0


In [6]:
# Get the action space dimensions
env.action_space

MultiDiscrete([5 5])

In [7]:
# Get the state space dimensions
env.observation_space

Box(-1.0, 1.0, (4,), float32)

In [8]:
# You can now for example get a random action:
action = env.action_space.sample()
action
# This action can then be applied to the environment with:
# state, reward, terminated, truncated, info = env.step(action)

array([3, 1])

In [9]:
action.shape

(2,)

In [10]:
env.step(action)

(array([-0.5       , -0.08075128, -0.42857785,  0.        ]),
 4,
 False,
 False,
 {'spikes': array([ 2.88662547,  3.04964471,  7.641619  , 10.01748852, 12.27443785,
         13.79736249]),
  'elecs': array([1, 2, 3, 0, 1, 2]),
  'action': array([3, 1]),
  'missed_cyc': 0,
  'stim_id': 1,
  'simulated': True,
  'comment': 'none'})

In [11]:
# Example code, that stimulates the network 100 times with a randomly sampled action, while calculating also the average reward received

total_reward = 0
action_count = 0

for _ in range(100):
    # For simplicity, choose a random action
    action = env.action_space.sample()
    print(f"Stimulate with action: {action}")
    
    state, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    action_count += 1

    print(f"Reward: {reward}, Avg. reward: {total_reward/action_count}")
    print(f"State: {state}")

    # If you want a more complete plotting of each step
    # env.render()

    print("-----------------------------")

Stimulate with action: [0 4]
Reward: -1, Avg. reward: -1.0
State: [ 0.5        -0.42282242  0.19129197  0.        ]
-----------------------------
Stimulate with action: [0 3]
Reward: 0, Avg. reward: -0.5
State: [ 0.        -0.3030573  0.         0.       ]
-----------------------------
Stimulate with action: [0 4]
Reward: 1, Avg. reward: 0.0
State: [ 0.5         0.         -0.44462879  0.        ]
-----------------------------
Stimulate with action: [1 0]
Reward: 0, Avg. reward: 0.0
State: [-1.         -0.43428782  0.          0.        ]
-----------------------------
Stimulate with action: [2 0]
Reward: 0, Avg. reward: 0.0
State: [-0.5        -0.47804017  0.          0.        ]
-----------------------------
Stimulate with action: [3 2]
Reward: 1, Avg. reward: 0.16666666666666666
State: [ 0.         -0.40090203  0.          0.        ]
-----------------------------
Stimulate with action: [1 0]
Reward: 0, Avg. reward: 0.14285714285714285
State: [-1.         -0.44515691  0.          0. 

In [12]:
state, reward, terminated, truncated, info = env.step(action)

In [13]:
state,reward

(array([0.5       , 0.        , 0.37476053, 0.        ]), -2)

In [14]:
info

{'spikes': array([ 0.82112395,  4.50709643,  8.01823285,  8.09481998, 10.91126747]),
 'elecs': array([3, 1, 0, 2, 1]),
 'action': array([2, 1]),
 'missed_cyc': 0,
 'stim_id': 102,
 'simulated': True,
 'comment': 'none'}

In [15]:
print(f"Average reward: {total_reward/action_count}")

Average reward: 0.34
