In [1]:

import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
import matplotlib.pyplot as plt
from sb3_contrib import  RecurrentPPO
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3 import SAC


In [2]:
import gymnasium as gym
import matplotlib.pyplot as plt
import numpy as np
from gymnasium import spaces

class Notreal(gym.Env):
    """ will remove the stoichastic part of the system """
    def __init__(self):
        super(Notreal, self).__init__()
        # State is [y1, y2]
        self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float64)
        
        # Actions are [u1, u2], both in some control range
        
        self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)
        
                
        
        
        # Time step for numerical integration
        self.dt = 0.1
        
        # Initial values for state variables y1 and y2
        #self.state = np.array([1, 1])  # You  set this based on the problem
        self.state = np.array([1 , 150 , 0])
        
        self.um= .0572
        self.ud=0.0
        self.Kn= 393.1
        self.Ynx=504.1
        self.km=.000016
        self.kd=0.281
        self.ks=178.9
        self.ki=447.1
        self.ksq=23.51
        self.kiq=800
        self.Knp=16.89
        
        #the manipulated variables are light intelfnsity I and inflow rate Fn
        
        #values for normalizatrion
        self.action_low = np.array([0, 0])
        self.action_high = np.array([400, 40])
        
        
        self.max_CX=1000
        self.max_CN=10000
        self.max_CQ=100
        
        
        
        self.max_episodes=100000
        self.cureent_episode=0        
        
        
        
    def denormalize_action(self, action) -> np.ndarray:
        """ Convert the normalized action [-1, 1] to the original scale [0, 400] and [0, 40] """
        
        return 0.5 * (action + 1) * (self.action_high - self.action_low) + self.action_low

            
        
    def normalize_state(self, state) -> np.ndarray:
        """ Convert the state to a normalized form """
        return state / np.array([self.max_CX, self.max_CN, self.max_CQ])
        
    def dernomalize_state(self, state) -> np.ndarray:
        """ Convert the state to a normalized form """
        return state * np.array([self.max_CX, self.max_CN, self.max_CQ])
                
        
        
        
    def reset(self, seed=None, options=None):
        # Reset the state to initial values
        self.state = np.array([1, 150, 0])
        self.cureent_episode = 0
        
        # Return the observation and an empty info dictionary
        return self.state, {}

        
        #change self. state ?

    


    def step(self, action):
        
        L, Fn =  self.denormalize_action(action)
        
        cx,cn,cq = self.dernomalize_state( self.state)
        
        
        dt = self.dt
        u_m=self.um
        k_s=self.ks 
        K_N=self.Kn
        k_i=self.ki
        x=cx
        n=cn 
        u_d=self.ud
        Y_nx=self.Ynx
        k_m=self.km
        k_sq=self.ksq
        k_iq=self.kiq
        k_d=self.kd
        q=cq
        K_Np=self.Knp


        
                

        dx   = u_m * L/(L+k_s+L**2./k_i) * x * n/(n+K_N) - u_d*x
        dn   = - Y_nx*u_m* L/(L+k_s+L**2./k_i) * x * n/(n+K_N)+ Fn
        dq   = (k_m * L/(L+k_sq+L**2./k_iq) * x - k_d * q/(n+K_Np)) * (np.sign(500. - n)+1)/2 * (np.sign(x - 10.0)+1)/2       
         
        # Update states
        
        cx+=(dx * dt)
        cn+=(dn * dt)
        cq+=(dq * dt)
        
        
                
        #so what is the sign function ? 
        
        # Ensure non-negative concentrations
        #y1 = max(0, y1)
        #y2 = max(0, y2)
        
        self.state =self.normalize_state(np.array([cx,cn,cq]))
        
        # Reward is based on maximizing y2
        reward = cq * 100
        
        # Done if the system has run too long or if values go out of bounds
        done = False
        
        if cn<0  or cx < 0 or cq < 0 :
            reward = -1000
            done = True
        
        self.cureent_episode+=1
        if self.cureent_episode>=self.max_episodes:
            done = True
        
        
        return self.state, reward, done,False, {}
    

    def render(self):
        # Optional rendering for visualization, not essential
        print(self.state)

In [3]:
env_test=Notreal()


In [4]:
obs = env_test.reset()
obs[0]

array([  1, 150,   0])

In [7]:
from stable_baselines3 import PPO

In [9]:
path=r"PPOonNotrealV1.zip"
model_trained=PPO.load(path,env=env_test)
cx_values=[]
cN_values=[]
cQ_values=[]
u1_values=[]
time=[]
u2_values=[]
rewards=[]
obs = env_test.reset()[0]

for t in range(10000):
    action, _states = model_trained.predict(obs)
    obs, reward, done, _, _ = env_test.step(action)
    rewards.append(reward)
    cx_values.append(obs[0])
    cN_values.append(obs[1])
    cQ_values.append(obs[2])
    u1_values.append(action[1])
    u2_values.append(action[0])
    time.append(t)


# Plot the results
import matplotlib.pyplot as plt

# Assuming you have the rewards data in a variable called rewards
# Create a figure and three subplots
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True, figsize=(10, 12))

# Plot y1 and y2 in the first subplot
ax1.plot(time, cx_values, label='x')
ax1.plot(time, cN_values, label='n')
ax1.plot(time, cQ_values, label='q')

ax1.set_ylabel('y1 and y2 Values')
ax1.legend()

# Plot u1 and u2 in the second subplot
ax2.plot(time, u1_values, label='u1', color='orange')
ax2.plot(time, u2_values, label='u2', color='red')
ax2.set_ylabel('u1 and u2 Values')
ax2.legend()

# Plot rewards in the third subplot
ax3.plot(time, rewards, label='Rewards', color='green')
ax3.set_xlabel('Time')
ax3.set_ylabel('Rewards')
ax3.legend()

# Show the plot
plt.show()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


ValueError: Action spaces do not match: Box([120.   0.], [400.  40.], (2,), float32) != Box(-1.0, 1.0, (2,), float32)

In [21]:
cQ_values

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0