In [1]:
import gymnasium as gym
import matplotlib.pyplot as plt
import numpy as np
from gymnasium import spaces

class Notreal(gym.Env):
    """ will remove the stoichastic part of the system """
    def __init__(self):
        super(Notreal, self).__init__()
        # State is [y1, y2]
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(3,), dtype=np.float64)
        
        # Actions are [u1, u2], both in some control range
        self.action_space = spaces.Box(low=0, high=1000, shape=(2,), dtype=np.float64)
        
        # Time step for numerical integration
        self.dt = 0.01
        
        # Initial values for state variables y1 and y2
        self.state = np.array([1, 1])  # You  set this based on the problem
        self.um= .0572
        self.ud=0.0
        self.Kn= 393.1
        self.Ynx=504.1
        self.km=.000016
        self.kd=0.281
        self.ks=178.9
        self.ki=447.1
        self.ksq=23.51
        self.kiq=800
        self.Knp=16.89
        
        #the manipulated variables are light intelfnsity I and inflow rate Fn
        
        
        
        
        
        
    def reset(self,seed = None,options = None):
        # Reset the state to initial values
        self.state = np.array([1 , 150 , 0]) #initial value can be changed, the bigger the value helps the model, but 0 is optimal
        return self.state
        
        #change self. state ?


    def step(self, action):
        
        L, Fn = action
        
        cx,cn,cq = self.state
        dt = self.dt
        u_m=self.um
        k_s=self.ks 
        K_N=self.Kn
        k_i=self.ki
        x=cx
        n=cn 
        u_d=self.ud
        Y_nx=self.Ynx
        k_m=self.km
        k_sq=self.ksq
        k_iq=self.kiq
        k_d=self.kd
        q=cq
        K_Np=self.Knp


        
                

        dx   = u_m * L/(L+k_s+L**2./k_i) * x * n/(n+K_N) - u_d*x
        dn   = - Y_nx*u_m* L/(L+k_s+L**2./k_i) * x * n/(n+K_N)+ Fn
        dq   = (k_m * L/(L+k_sq+L**2./k_iq) * x - k_d * q/(n+K_Np)) * (np.sign(500. - n)+1)/2 * (np.sign(x - 10.0)+1)/2       
         
        # Update states
        
        cx+=dx * dt
        cn+=dn * dt
        cq+=dq * dt
        
        
                
        #so what is the sign function ? 
        
        # Ensure non-negative concentrations
        #y1 = max(0, y1)
        #y2 = max(0, y2)
        
        self.state = np.array([cx,cn,cq])
        
        # Reward is based on maximizing y2
        reward = cq * 10
        
        # Done if the system has run too long or if values go out of bounds
        done = False
        if cn<0  or cx < 0 or cq < 0 :
            reward = -1000
            done = True
        
        return self.state, reward, done, False, {}

    def render(self):
        # Optional rendering for visualization, not essential
        print(self.state)

#with added distrubences as stated in the paper, recheck the equations

In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np

class THIRD_ENV_WITH_DISTURBENCE(gym.Env):
    def __init__(self):
        super(THIRD_ENV_WITH_DISTURBENCE, self).__init__()

        # State space: concentrations of biomass (C_X), nitrate (C_N), and product (C_q)
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(3,), dtype=np.float32)
        
        # Action space: light intensity (I) and inflow rate (F_N)
        self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([400, 40]), dtype=np.float32)

        # Initial states
        self.state = None
        self.time_step = 0
        self.max_time_steps = 10  # Define the number of time steps in the batch

        # Disturbance and noise parameters
        self.sigma_d = np.array([4e-3, 1.0, 1e-7])  # 
        self.sigma_n = np.array([4e-4, 0.1, 1e-8])  

        
        self.penalty_coefficients = np.array([3.125e-8, 3.125e-6])

    def reset(self):
        # Initial concentrations (state)
        self.state = np.array([1.0, 150.0, 0.0], dtype=np.float32)
        self.time_step = 0
        return self.state + np.random.normal(0, self.sigma_n)  # Add initial measurement noise

    def step(self, action):
        # Apply system dynamics and additive disturbance
        I, F_N = action

        C_X, C_N, C_q = self.state

        dC_X = (I / (I + 178.9)) * C_X * (C_N / (C_N + 393.1)) - 0.0572 * C_X
        dC_N = -504.1 * (I / (I + 178.9)) * C_X * (C_N / (C_N + 393.1)) + F_N
        dC_q = (0.00016 * (I / (I + 23.51)) * C_X * (C_N / (C_N + 393.1))) if C_N <= 500 and C_X >= 10 else 0

        # Add additive disturbances to the dynamics
        disturbance = np.sin(self.time_step) * self.sigma_d + np.random.normal(0, self.sigma_d)
        
        dC_X += disturbance[0]
        dC_N += disturbance[1]
        dC_q += disturbance[2]

        # Update state with dynamics and disturbances
        self.state += np.array([dC_X, dC_N, dC_q])

        # Add measurement noise when observing the state
        noisy_state = self.state + np.random.normal(0, self.sigma_n)

        if self.time_step < self.max_time_steps - 1:
            reward = -np.dot(action - np.zeros(2), self.penalty_coefficients * (action - np.zeros(2)))
        else:
            reward = self.state[2]  # Final product concentration (C_q)

        self.time_step += 1
        done = self.time_step >= self.max_time_steps

        return noisy_state, reward, done, {}

    def render(self):
        pass
