In [22]:
import matplotlib
import matplotlib.pyplot as plt
# import pandas as pd

import numpy as np
from scipy.integrate import solve_ivp
import time

import gymnasium as gym
from gymnasium import spaces
import stable_baselines3 as sb

from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
# from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_checker import check_env

matplotlib.rcParams['axes.labelsize'] = 15

In [None]:
patm        = 98000     # Pascals
T_amb       = 293       # Kelvin
# nR          = 800e-8   # n (moles) * R (cte gases)
N           = 6e15      # Num Particulas
kb          = 1.38e-23  # J/K

rho_agua    = 1000      # kg/m^3
viscosidad  = 0.001      # Pa*s == kg/s
s_sup       = 7e-2  # N/m

# Presion acustica como la suma de varias ondas
def ps(t, A, w, d = 0):
    # Error Handling es mi pasion
    assert len(A) == len(w) == len(d), "Different shapes"
    r = 0
    for i in range(len(A)):
        r += A[i] * np.sin(t*w[i] + d[i])  
    return r

# DiffEqs
def fun(t, y, ps_args):
    vel, radi, temp = y
    vDot = (
                - 3/2 * np.power(vel,2)
                + (
                    3/4 * N*kb*temp/(np.pi*np.power(radi,3))
                    - (2*s_sup+4*viscosidad*vel)/radi
                    - patm
                    - ps(t, *ps_args)
                )/rho_agua
            )/radi
    rDot = vel
    tDot = - 2 * temp * vel/radi

    return vDot, rDot, tDot   

In [None]:
'''
Normalized (as much as I can) observations
El observation space lo voy a hacer un poco mas grande para q tenga margen
    Radio always positive in the 1e-3 order  ==>  *100 to scale up
    Vel has high range ==> /100 to scale down a little
    Temp in logaritmic scale ==> between ~0 and 7
'''
def observations(R,RDot,T):
    return np.array([R*100, RDot/100, np.log10(T)]).astype(np.float32)

In [20]:
A  = np.array([2,1])
w  = np.array([2,1])
d  = np.array([np.pi/2,0])

t = np.linspace(0, 3*np.pi, 1000)

fun(0, [.5, 1e-3, 300], [A,w,d])

(np.float64(-92588.88682039597), 0.5, -300000.0)

In [None]:
class Bubble(gym.Env):
    """
    Custom Environment that follows gym interface.
    This is a simple env where the agent must learn to go always left. 
    """
    metadata = {'render.modes': ['data']}

    def __init__(self, dt = 1e-5, n_ondas = 1, render_mode = None):
        super(Bubble, self).__init__()   

        # (R,RDot,T)
        self.agent_vars = (np.random.normal(0,1),
                           np.random.normal(1e-3, 1e-4),
                           np.random.normal(T_amb, 10))

        # variables de tiempo
        self.t  = 0
        self.dt = dt  # timestep en seg

        '''
        Actions: cambios de Amplitud,frecuencia y fase
        '''
        self.action_space = spaces.Box(low = np.array([[]]*n_ondas), 
                                       high = np.array([[]]*n_ondas),
                                            shape=(n_ondas,3), dtype=np.float32)
        
        '''
        Observations: Basicamente R,RDot,T
        - Si hago los cambios de a pasos tendria que añadir los valores actuales de A,w,d
        '''
        self.observation_space = spaces.Box(low = -1, high = 1,
                                            shape=(1,), dtype=np.float32)
        
        '''
        Render init Cosas
        '''

    def reset(self, seed = 0,):
        super().reset(seed=seed)

        # Nuevas Cond Iniciales
        self.agent_vars = (np.random.normal(0,1),
                           np.random.normal(1e-3, 1e-4),
                           np.random.normal(T_amb, 10))
        # Render reset cosas
        
        return observations(*np.ravel(self.agent_vars)), {}
    
    def step(self, action):

        Rdot, R, T = self.agent_vars

        # Check action range (if stepped) ----------------------------------- ARAARA
        # if abs(action[0]) > 1:
        #     raise ValueError("Received invalid action={} which is not part of the action space".format(action))
        # else:
        #     thDotDot = a = action[0] * 3
        
        '''
         Setear Nuevas variables de A,w,d 
        '''

        sol = solve_ivp(fun, (self.t, self.t + self.dt), 
                        self.agent_vars, args = [[A,w,d]]
                        , max_step = self.dt/10, rtol = 1e-12)
        # La AI solo necesita el ultimo dato pero esta todo por si se quisiera graficar
        self.agent_vars = sol.y[:,-1] 
        
        ''' [ ] Reward '''
        reward = np.log10(T)
        
        # Additional info
        ''' [ ] Hypothesis checking '''
        info = {}

        return observations(*np.ravel(self.agent_vars)),\
                reward, False, False, info

    def render(self, seed = 0):
        if self.render_mode == None:
            return
        else:
            raise NotImplementedError()

    def close(self):
        pass