In [None]:
%reset -f
import gym
import numpy as np
import scipy.integrate as si
import matplotlib.pyplot as plt
from stable_baselines3 import PPO, DDPG
from stable_baselines3.common.env_checker import check_env

In [None]:
class Swing(gym.Env):
    def __init__(
        self,
        L0: float = 1.0,
        phi_0: float = 10.0,
        omega_0: float = -0.5,
        target_phi: float = np.pi / 2,
        dt: float = 0.01,
    ):
        super(Swing, self).__init__()
        self.g = 10
        self.L0 = L0
        self.Lmax = 2 * L0
        self.phi_0 = np.deg2rad(phi_0)
        self.omega_0 = omega_0
        self.phi = []
        self.phi.append(self.phi_0)
        self.omega = []
        self.omega.append(self.omega_0)
        self.dt = dt
        self.L = []
        self.L.append(self.L0)
        self.target_phi = target_phi
        self.pumps = 0
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(1,))
        self.observation_space = gym.spaces.Box(low=-10, high=10, shape=(2,))

    def evolve(self):
        L_dot = (self.L[-1] - self.L[-2]) / self.dt
        omega_f = self.omega[-1] - self.dt * (
            2 * L_dot / self.L[-1] * self.omega[-1]
            + self.g / self.L[-1] * np.sin(self.phi[-1])
        )
        self.omega.append(omega_f)
        phi_new = self.phi[-1] + self.dt * omega_f
        self.phi.append(phi_new)
        self.pumps += 1

    def step(self, action):
        """
        Action is a choice of L
        """
        action = action.item()
        l_new = (action + 0.01) * self.Lmax
        # print(l_new)
        self.L.append(l_new)
        self.evolve()
        state = np.array([self.phi[-1], self.omega[-1]])

        if np.isclose(self.phi[-1], self.target_phi, rtol=0.1):
            reward = 10
            done = True
            state = self.reset()
        else:
            reward = -1
            done = False
            if self.pumps > 15:
                done = True
                state = self.reset()

        return state, reward, done, {}

    def reset(self):
        self.clear()
        self.phi.append(self.phi_0)
        self.omega.append(self.omega_0)
        self.L.append(self.L0)
        state = np.array([self.phi[-1], self.omega[-1]])
        self.pumps = 0
        return state

    def render():
        pass

    def clear(self):
        self.L.clear()
        self.omega.clear()
        self.phi.clear()

In [None]:
env = Swing()

In [None]:
model = PPO("MlpPolicy", env, verbose=1)

In [None]:
model.learn(total_timesteps=int(2e5))