In [2]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from custom_sim_RL import ProcessSimulation

class CrystallizationEnv(gym.Env):
    def __init__(self, profile_length=100, runtime_bounds=(1000, 7200)):   # typical runtime is about 3600
        super(CrystallizationEnv, self).__init__()

        self.profile_length = profile_length
        self.temp_min = 290.00
        self.temp_max = 323.15
        self.runtime_min, self.runtime_max = runtime_bounds

        # Action: [temperature profile..., runtime]
        self.action_space = spaces.Box(
            low=np.array([self.temp_min] * self.profile_length + [self.runtime_min]),
            high=np.array([self.temp_max] * self.profile_length + [self.runtime_max]),
            dtype=np.float32
        )

        # Observation can remain static for now
        self.observation_space = spaces.Box(
            low=np.array([0.0, 0.0]), high=np.array([5.0, 1000]), dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        obs = np.zeros((1,), dtype=np.float32)
        return obs, {}

    def step(self, action):
        # Split action into temperature profile and runtime
        temperature_profile = np.clip(action[:-1], self.temp_min, self.temp_max)
        runtime = np.clip(action[-1], self.runtime_min, self.runtime_max)

        # Simulate crystallization
        d50, span = self._simulate_crystallization(temperature_profile, runtime)

        # Reward: higher D50 and lower span
        reward = d50 - span  # You can adjust: e.g., reward = 2 * d50 - 1.5 * span

        obs = np.zeros((1,), dtype=np.float32)
        terminated = True  # One-shot environment

        info =  {
            "D50": d50,
            "span": span,
            "runtime": runtime,
            "episode": {
                "r": reward,
                "l": 1
            }
        }

        return obs, reward, terminated, False, info

    def _simulate_crystallization(self, temperature_profile=None, runtime=None):
    
        sim = ProcessSimulation()

        sim.setup_run(temp_program=temperature_profile, runtime_cryst=runtime)
        d50, span = sim.output()

        return d50, span

Could not find GLIMDA.


In [1]:
from stable_baselines3 import SAC
from stable_baselines3.common.monitor import Monitor

env = CrystallizationEnv(profile_length=100, runtime_bounds=(1000, 7200))
env = Monitor(env)

model = SAC("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10)


NameError: name 'CrystallizationEnv' is not defined