In [12]:
"""
CSTR (Continuously Stirred Tank Reactor) Environment Demo
========================================================

This demo showcases the CSTR environment from pc-gym, which simulates
a continuously stirred tank reactor for chemical process control.

The CSTR is a well-established model that's thoroughly tested and stable,
making it perfect for learning and experimentation.

State Variables (3 total):
- Ca: Concentration of reactant A (mol/L)
- T: Temperature (K)
- Cb: Concentration of reactant B (mol/L)

Action Variables (1 total):
- Tc: Coolant temperature (K)

Observations (3 total):
- Ca, T, Cb: Concentration A, Temperature, Concentration B
"""

import numpy as np
from pcgym import make_env
from rl.cstr.optimization.visualization import (
    plot_state_variables,
    plot_control_actions,
    plot_reward_evolution)
from rl.cstr.optimization.load_config_files import load_and_create_env_params
from rl.cstr.optimization.base_state_builder import denormalize_observations
from rl.cstr.optimization.base_action_adapter import denormalize_actions

In [2]:
# ============================================================================
# ENVIRONMENT SETUP
# ============================================================================

# Load configuration from YAML file
config_path = "/workspace/general_projects/rl_cstr_optimization/config/environments/cstr_environment.yaml"
env_params = load_and_create_env_params(config_path)

# Extract action space bounds from env_params for use in the notebook
a_space = env_params['a_space']
o_space = env_params['o_space']
nsteps = env_params['N']

print(f"Configuration loaded: {env_params}")
print(f"Action space: {a_space}")
print(f"Observation space: {o_space}")
print(f"Number of steps: {nsteps}")


Configuration loaded: {'N': 30, 'tsim': 26, 'SP': {'Ca': [0.85, 0.85, 0.85, 0.9, 0.9, 0.9, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87, 0.87]}, 'o_space': {'low': array([  0.7, 300. ,   0.8]), 'high': array([  1. , 350. ,   0.9])}, 'a_space': {'low': array([295]), 'high': array([302])}, 'x0': array([  0.8, 330. ,   0.8]), 'r_scale': {'ca': 1000}, 'model': 'cstr', 'normalise_a': True, 'normalise_o': True, 'noise': True, 'integration_method': 'casadi', 'noise_percentage': 0.001}
Action space: {'low': array([295]), 'high': array([302])}
Observation space: {'low': array([  0.7, 300. ,   0.8]), 'high': array([  1. , 350. ,   0.9])}
Number of steps: 30


In [3]:
# Create the CSTR environment with proper parameters
# The environment simulates a continuously stirred tank reactor for chemical process control
env = make_env(env_params)

# Reset the environment to get initial state
# This returns the initial observation (concentrations at reactor outlet)
initial_observation, initial_info = env.reset()

print("=" * 60)
print(f"CSTR REACTOR DEMO - {nsteps} STEP SIMULATION")
print("=" * 60)
print(f"Initial observation (normalized): {initial_observation}")

# Denormalize the initial observation for better understanding
initial_real = denormalize_observations(initial_observation, o_space)

print(f"Initial observation (real values):")
print(f"  Ca: {initial_real[0]}")
print(f"  T:  {initial_real[1]}")
print(f"  Cb: {initial_real[2]}")

print(f"Observation shape: {initial_observation.shape}")
print(f"Action space: {env.action_space}")
print(f"Observation space: {env.observation_space}")
print("=" * 60)

CSTR REACTOR DEMO - 30 STEP SIMULATION
Initial observation (normalized): [-0.33333328  0.2        -1.00000024]
Initial observation (real values):
  Ca: 0.8000000079472857
  T:  330.0
  Cb: 0.7999999880790669
Observation shape: (3,)
Action space: Box(-1.0, 1.0, (1,), float32)
Observation space: Box(-1.0, 1.0, (3,), float32)


  gym.logger.warn(
  gym.logger.warn(


In [4]:
# ============================================================================
# DATA STORAGE FOR ANALYSIS
# ============================================================================

# Lists to store data for visualization and analysis
observations = [initial_observation]  # Store all observations (concentrations at outlet)
actions = []       # Store all actions taken
rewards = []       # Store all rewards received
states = []        # Store full state information if available
denorm_actions = [] # Store denormalized actions
denorm_observations = [] # Store denormalized observations

In [5]:
# ============================================================================
# MAIN SIMULATION LOOP - nsteps STEPS
# ============================================================================

print(f"\nStarting {nsteps}-step simulation...")
print("-" * 40)

for step in range(nsteps):
    print(f"\nStep {step + 1}/{nsteps}:")

    # ========================================================================
    # ACTION SELECTION
    # ========================================================================
    
    # For this demo, we'll use a simple strategy:
    # - Moderate coolant temperature control
    # - Stay within the safe operating range
    
    # Since actions are normalized, we need to provide values between 0 and 1
    # These will be automatically scaled to the actual bounds defined in env_params
    action = np.array([
        0.5,    # Tc: Coolant temperature (normalized) - moderate value
    ])
    
    # Denormalize: actual_value = low + (normalized_value * (high - low))
    denorm_action = denormalize_actions(action, a_space)
    
    
    # ========================================================================
    # ENVIRONMENT STEP
    # ========================================================================
    
    # Execute the action in the environment
    # This advances the simulation by one time step
    # Returns: new_observation, reward, terminated, truncated, info
    observation, reward, terminated, truncated, info = env.step(action)
    
    # Denormalize observation for better understanding
    denorm_observation = denormalize_observations(observation, o_space)

    print(f" Normalized action: {action}")
    print(f" Denormalized action: {denorm_action}")
    print(f"  New observation (normalized): {observation}")
    print(f"  New observation (real values):")
    print(f"    Ca: {denorm_observation[0]}")
    print(f"    T:  {denorm_observation[1]}")
    print(f"    Cb: {denorm_observation[2]}")
    print(f"  Reward: {reward:.4f}")
    print(f"  Terminated: {terminated}")
    print(f"  Truncated: {truncated}")
    
    # ========================================================================
    # DATA STORAGE
    # ========================================================================
    
    # Store the data for later analysis
    observations.append(observation.copy())
    actions.append(action.copy())
    rewards.append(reward)
    denorm_actions.append(denorm_action.copy())
    denorm_observations.append(denorm_observation.copy())
    
    # ========================================================================
    # TERMINATION CHECK
    # ========================================================================
    
    # Check if the episode has ended
    if terminated or truncated:
        print(f"  Episode ended at step {step + 1}")
        break



Starting 30-step simulation...
----------------------------------------

Step 1/30:
 Normalized action: [0.5]
 Denormalized action: 298.5
  New observation (normalized): [-1.39431738e-01  4.27802928e-02  1.19209332e-07]
  New observation (real values):
    Ca: 0.8290852392990233
    T:  326.06950731944204
    Cb: 0.8500000059604667
  Reward: -0.0004
  Terminated: False
  Truncated: False

Step 2/30:
 Normalized action: [0.5]
 Denormalized action: 298.5
  New observation (normalized): [ 2.29272865e-02 -4.62370631e-03  1.19209332e-07]
  New observation (real values):
    Ca: 0.853439092977454
    T:  324.8844073421685
    Cb: 0.8500000059604667
  Reward: -0.0000
  Terminated: False
  Truncated: False

Step 3/30:
 Normalized action: [0.5]
 Denormalized action: 298.5
  New observation (normalized): [1.09029591e-01 2.90935272e-03 1.19209332e-07]
  New observation (real values):
    Ca: 0.8663544385765342
    T:  325.0727338181229
    Cb: 0.8500000059604667
  Reward: -0.0011
  Terminated: F

In [6]:
# ============================================================================
# SIMULATION COMPLETE - ANALYSIS AND VISUALIZATION
# ============================================================================

print("\n" + "=" * 60)
print("SIMULATION COMPLETE - ANALYSIS")
print("=" * 60)

# Convert lists to numpy arrays for easier analysis
observations = np.array(observations)
actions = np.array(actions)
rewards = np.array(rewards)
denorm_actions = np.array(denorm_actions)
denorm_observations = np.array(denorm_observations)

print(f"Total steps completed: {len(observations)}")
print(f"Average reward: {np.mean(rewards):.4f}")
print(f"Total reward: {np.sum(rewards):.4f}")
print(f"Denormalized actions: {denorm_actions}")
print(f"Denormalized observations: {denorm_observations}")


SIMULATION COMPLETE - ANALYSIS
Total steps completed: 30
Average reward: -0.0001
Total reward: -0.0034
Denormalized actions: [298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5
 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5 298.5
 298.5 298.5 298.5 298.5 298.5]
Denormalized observations: [[  0.82908524 326.06950732   0.85000001]
 [  0.85343909 324.88440734   0.85000001]
 [  0.86635444 325.07273382   0.85000001]
 [  0.87129847 324.78704079   0.90000002]
 [  0.87346216 324.95111973   0.90000002]
 [  0.87477002 324.88607697   0.90000002]
 [  0.87389595 324.78487511   0.87000001]
 [  0.874501   325.2345453    0.87000001]
 [  0.87479813 324.77927038   0.87000001]
 [  0.8737542  324.24296449   0.87000001]
 [  0.87184104 324.48812758   0.87000001]
 [  0.87310073 325.23002714   0.87000001]
 [  0.87241006 325.03113477   0.87000001]
 [  0.8744231  325.220858     0.87000001]
 [  0.87232065 325.43443847   0.87000001]
 [  0.87379948 324.63847411   0.8700000

In [7]:
# ============================================================================
# VISUALIZATION
# ============================================================================

In [13]:
# Create all four plots
variable_definitions = [
    {
        'name': 'Ca (Concentration A)',
        'index': 0,
        'color': 'blue',
        'symbol': 'circle',
        'yaxis_title': 'Concentration (mol/L)'
    },
    {
        'name': 'T (Temperature)',
        'index': 1,
        'color': 'red',
        'symbol': 'square',
        'yaxis_title': 'Temperature (K)'
    },
    {
        'name': 'Cb (Concentration B)',
        'index': 2,
        'color': 'green',
        'symbol': 'triangle-up',
        'yaxis_title': 'Concentration (mol/L)'
    }
]

plot_state_variables(denorm_observations, variable_definitions)

TypeError: plot_state_variables() takes 1 positional argument but 2 were given

In [9]:
# Create all four plots
plot_control_actions(denorm_actions)


In [10]:
plot_reward_evolution(rewards)

In [11]:
# Close the environment to free resources
env.close()