In [30]:
import gym
from gym import spaces
import numpy as np
import math

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [22]:
space = spaces.MultiBinary(3)
print([space.sample() for _ in range(5)])

[array([0, 1, 0], dtype=int8), array([1, 0, 1], dtype=int8), array([1, 1, 1], dtype=int8), array([0, 0, 1], dtype=int8), array([1, 1, 0], dtype=int8)]


In [53]:
class MigraineEnv(gym.Env):
    def __init__(self, 
                 num_triggers: int = 2,
                 horizon: int = 20,
                 state_coefs: float = 1.0,
                 action_coefs: np.array = np.array([1.0, 2.0]),
                 shift: float = 0.0):
        self.num_triggers = num_triggers
        self.horizon = horizon
        self.timestep = 0
        self.state_coefs = state_coefs
        self.action_coefs = action_coefs
        self.observation_space = spaces.Discrete(2) # 0: no migraine, 1: migraine
        self.action_space = spaces.MultiBinary(num_triggers) # binary representation
        self.state = self.observation_space.sample() 
        
        # Construct transition matrix
        self.T = np.zeros((2, 2**num_triggers, 2)) # (next_state, action, state)
        for a in range(2**num_triggers):
            for s in range(2):
                binary_action = self._int_to_binary(a, self.num_triggers)
                self.T[1, a, s] = sigmoid(state_coefs*s+np.dot(action_coefs, binary_action)+shift)
                self.T[0, a, s] = 1 - self.T[1, a, s]

    def reset(self):
        # Reset the environment to its initial state and return the initial observation
        self.state = self.observation_space.sample()
        self.timestep = 0
        return self.state

    def step(self, action: np.array):
        # Take an action in the environment and return the new observation, reward, and done flag
        int_action = self._binary_to_int(action)
        prob = self.T[1, int_action, self.state]
        self.state = np.random.choice(2, size=1, p = [1-prob, prob])
        self.timestep += 1
        return self.state, 0, self.timestep >= self.horizon

    def render(self, mode='human'):
        pass
    
    def _int_to_binary(self, input, desired_len):
        binary_representation = [int(x) for x in bin(input)[2:]]
        binary_array = np.zeros(desired_len, dtype=int)
        binary_array[-len(binary_representation):] = binary_representation
        return binary_array
    
    def _binary_to_int(self, input):
        binary_string = ''.join(map(str, input))
        int_value = int(binary_string, 2)
        return int_value
        

In [54]:
# Test out environment
env = MigraineEnv(num_triggers=2, horizon=10, state_coefs=1.0, action_coefs=[1.0, 2.0], shift=0)

In [None]:
dataset = {'observations': [], 'actions': []}
num_trajs = 10
for n in range(num_trajs):
    traj_obs, traj_act = [], []
    

In [55]:
env.reset()

0