This jupyter notebook teaches you how to interact with the real neuronal networks (static state function).

Before you run this code, set your group_id and password in "auth.py". Ask your mentor for your password/id.

In [1]:
import gymnasium as gym
import numpy as np

# Add parent directory to path
import sys
from pathlib import Path
current_dir = Path().resolve()
root_dir = current_dir.parent
if str(root_dir) not in sys.path:
    sys.path.insert(0,str(root_dir))

from Gyms.RealNetworkSync import RealNetworkSync

In [2]:
# Define size of state and action spaces, as well as stimulation period
state_dim   = 4   # Dimension of reduced state space
action_dim  = 5   # Number of stimuli in action space
circuit_id  = 0   # Each group has 4 biological/simulated circuits. You choose here which one you want to use. Must be in {0,1,2,3}

In [3]:
# Create environment and initialize it
from Reward.TrainingReward import TrainingReward
env      = RealNetworkSync(action_dim=action_dim,state_dim=state_dim,circuit_id=circuit_id)
state, _ = env.reset()
env.render() # This function gives you the current state + reward, which both is 0 after initialization

Host/Port open and accessable
Current state: [0. 0. 0. 0.], Reward: 0


In [4]:
from Algorithms.MultiArmedBandit import MABAgent  # Update path if needed

# Initialize MAB agent
agent = MABAgent(
    epsilon=0.9,            # Initial exploration rate (90% random actions)
    alpha=0.1,              # Learning rate (constant step-size)
    initial_q=0.0,          # Optimistic initial values
    n_actions=125,           # Number of actions, should be a power of 5
)

In [5]:
from tqdm import tqdm
# Example code, that stimulates the network 100 times with a randomly sampled action, while calculating also the average reward received

total_reward = 0
action_count = 0

rewards_over_time = []

for _ in range(100):
    action_idx = agent.select_action()
    action = agent.action_map(action_idx)
    print(f"Stimulate with action: {action}")
    
    state, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    action_count += 1

    rewards_over_time.append(reward)

    agent.update(action_idx, reward)

    # Plot information
    print(f"Info: {info}")
    print(f"State: {state}, Reward: {reward}")

    print("-----------------------------")

Stimulate with action: [0 0 1 0 0]
Info: {'spikes': array([ 0.4,  0.5,  1.5,  2.2,  4. ,  4.1,  4.6,  6. ,  6. ,  8.2,  8.5,
       10.4, 11. , 11.3, 12.1, 12.1, 13.5, 16.6, 16.9, 17.9]), 'elecs': array([1., 2., 3., 3., 0., 1., 2., 2., 0., 3., 0., 1., 2., 2., 2., 0., 3.,
       1., 0., 3.]), 'action': array([0, 0, 1, 0, 0]), 'missed_cyc': 0, 'stim_id': 4313, 'simulated': False, 'comment': 'none'}
State: [-0.5        -0.06324555 -0.2         0.        ], Reward: 3
-----------------------------
Stimulate with action: [0 0 1 4 2]
Info: {'spikes': array([], dtype=float64), 'elecs': array([], dtype=float64), 'action': array([0, 0, 1, 4, 2]), 'missed_cyc': 0, 'stim_id': 4314, 'simulated': False, 'comment': 'none'}
State: [0. 0. 0. 0.], Reward: 0
-----------------------------
Stimulate with action: [0 0 3 3 0]
Info: {'spikes': array([ 1.7,  1.8,  2.8,  3.1,  3.3,  4. ,  4. ,  5.3,  5.7,  7.6,  8.1,
        8.2,  8.6, 11. , 13. , 17.6]), 'elecs': array([1., 1., 3., 0., 2., 2., 0., 3., 0., 1., 

In [6]:
print(f"Average reward: {total_reward/action_count}")

Average reward: 6.41
