In [1]:
import non_dim_lderiv_control as ld
# import energy_reward as ld
import copy
import gym
import numpy as np
import pandas as pd
import scipy.integrate as si
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
from stable_baselines3 import A2C, PPO, SAC, TD3, DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import CheckpointCallback

In [2]:
lmin = 0.95
lmax = 1.05
phi_0 = np.pi / 4
phidot_0 = 0
tau = .125  # (lmax - lmin) / 4
ldot_max = 0.1
power_max = 100

In [3]:
power_bounded = power_max < 1
env = ld.Swing(power_bounded=power_bounded)
env.ldot_max = ldot_max
env.lmin = lmin
env.lmax = lmax
env.ldot_max = ldot_max
env.phi_0 = phi_0
env.phidot_0 = phidot_0
env.tau = tau
env.power_max = power_max

  "Box bound precision lowered by casting to {}".format(self.dtype)


In [4]:
checkpoint_callback = CheckpointCallback(
    save_freq=50_000,
    save_path="./big_state_unbounded_models/",
    name_prefix="rl_model",
)

In [None]:
#policy_kwargs = dict(net_arch=dict(pi=[256, 256]))
model = SAC("MlpPolicy", env, verbose=1, tensorboard_log="big_state_unbounded_logs/") 
# model = DQN("MlpPolicy", env, verbose=1, tensorboard_log="discrete_unbounded_logs/")
# model = SAC.load("big_state_unbounded_models/rl_model_300000_steps.zip", env = env, tensorboard_log="big_state_unbounded_logs/")
model.learn(total_timesteps=6e5, callback=checkpoint_callback)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to big_state_unbounded_logs/SAC_8
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5e+03    |
|    ep_rew_mean     | -5e+03   |
| time/              |          |
|    episodes        | 4        |
|    fps             | 62       |
|    time_elapsed    | 320      |
|    total_timesteps | 20004    |
| train/             |          |
|    actor_loss      | 57.5     |
|    critic_loss     | 0.0125   |
|    ent_coef        | 0.00651  |
|    ent_coef_loss   | -0.926   |
|    learning_rate   | 0.0003   |
|    n_updates       | 19903    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5e+03    |
|    ep_rew_mean     | -5e+03   |
| time/              |          |
|    episodes        | 8        |
|    fps             | 62       |
|    time_elapsed    | 641      |
|    total_tim

In [None]:
env = ld.Swing(power_bounded=power_bounded)
env.ldot_max = ldot_max
env.lmin = lmin
env.lmax = lmax
# env.L = [env.lmax]
env.ldot_max = ldot_max
env.phi_0 = phi_0
env.phidot_0 = phidot_0
env.power_max = power_max
env.tau = 0.0025 # tau/200
env.pump_limit = 500e3
env.rtol = 0.005
model = SAC.load("big_state_unbounded_models/rl_model_400000_steps.zip", env=env)

In [None]:
done = False
obs = env.reset()
env.L = [env.lmax]
while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, _ = env.step(action)

phi_hist = np.array(env.phi)
l_hist = np.array(env.L)

x_t = l_hist * np.sin(phi_hist)
y_t = -l_hist * np.cos(phi_hist)
ref_x = np.array(env.lmax) * np.sin(env.phi)
ref_y = -np.array(env.lmax) * np.cos(env.phi)

In [None]:
fontdict = {"fontsize": 16}
plt.plot(
    env.tau * np.arange(len(env.phi)), np.mod(np.array(env.phi) + np.pi, 2 * np.pi)
)
plt.yticks([0, np.pi, 2 * np.pi], ["0", r"$\pi$", r"2$\pi$"])
plt.xlabel("Time", fontdict=fontdict)
plt.ylabel("Angle", fontdict=fontdict)
plt.title("Angle over time", fontdict=fontdict)
#plt.savefig("theta.png")

In [None]:
plt.plot(env.tau * np.arange(len(env.phi)), env.L)
plt.xlabel("Time", fontdict=fontdict)
plt.ylabel("Length", fontdict=fontdict)
plt.title("Length over time", fontdict=fontdict)
#plt.savefig("length.png")
plt.show()

In [None]:
fig, ax1 = plt.subplots(figsize=(14, 10))

ax2 = ax1.twinx()
ax1.plot(
    env.tau* np.arange(len(env.phi)),
    np.mod(np.array(env.phi) + np.pi, 2 * np.pi),
    "g-",
)
ax2.plot(env.tau* np.arange(len(env.phi)), env.L, "k--")

ax1.set_xlabel("Time", fontsize=16)
ax1.set_ylabel("Angles", color="g", fontsize=16)
ax2.set_ylabel("Lengths", color="k", fontsize=16)

plt.title("Lengths and Angles over time", fontsize=16)
#plt.savefig("overlay.png")

In [None]:
t = env.tau * np.arange(len(env.phi))#[:-1]
phi = np.mod(np.array(env.phi) + np.pi, 2 * np.pi)#[:-1]
l_dot_hist_t = env.tau * np.arange(len(env.Ldot_hist))#[:-1]
l_dot_hist = env.Ldot_hist#[:-1]
# smooth_angle = savgol_filter(phi, 9, 2)

In [None]:
fig, ax1 = plt.subplots(figsize=(12, 7))

ax2 = ax1.twinx()
ax1.plot(t[:], phi[:] - np.pi, "b")
ax2.plot(l_dot_hist_t, l_dot_hist, "k--")

ax1.set_xlabel("t", fontsize=16)
ax1.set_ylabel(r"$\theta$", fontsize=16, color="b", rotation=0)
ax1.set_yticks([-np.pi, -np.pi / 2, 0, np.pi / 2, np.pi])
ax1.set_yticklabels([r"-$\pi$", "-$\pi/2$", "0", "$\pi/2$", r"$\pi$"])
ax2.set_ylabel(r"$u = \frac{dL}{dt}$", color="k", fontsize=16, rotation=0)

plt.title("Control and Angle over time", fontsize=20)
# plt.show()
#plt.savefig("control_overlay.png")

In [None]:
control_dict = {"time": l_dot_hist_t, "control": l_dot_hist}
angle_dict = {"time": t, "angle": phi - np.pi}

In [None]:
control_angle_dict = {"time": l_dot_hist_t, "control": l_dot_hist, "angle": phi-np.pi}
control_angle_df = pd.DataFrame(control_angle_dict)

In [None]:
# ang_df = pd.DataFrame(angle_dict)
# ctrl_df = pd.DataFrame(control_dict)

In [None]:
env.pumps*env.tau

In [None]:
env.pumps

In [None]:
max(env.Ldot_hist)

In [None]:
(env.target - env.phi[-1])/env.target

In [None]:
env.target

In [None]:
# control_angle_dict = {"time": l_dot_hist_t[:-37], "control": l_dot_hist[:-37], "angle": phi[:-37]-np.pi}
# control_angle_df = pd.DataFrame(control_angle_dict)

In [None]:
control_angle_df.tail()

In [None]:
control_angle_df.to_csv("bounded_control_angle.csv")