In [None]:
import non_dim_lderiv_control as ld
import copy
import gym
import numpy as np
import scipy.integrate as si
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import CheckpointCallback
from scipy.signal import savgol_filter

In [None]:
env = ld.Swing()

In [None]:
# Save a checkpoint every save_freq steps
checkpoint_callback = CheckpointCallback(
    save_freq=50_000,
    save_path="./logs/",
    name_prefix="rl_model",
)

In [None]:
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="tb_logs/")
# model.save("logs/rl_model_0_steps")
model.learn(total_timesteps=2.5e5, callback=checkpoint_callback)

In [None]:
# model.save("trained_model_new")
# del model
env = ld.Swing()
model = PPO.load("logs/rl_model_250000_steps.zip", env=env)

In [None]:
done = False
obs = env.reset()
while not done:
    action, _states = model.predict(obs)
    obs, reward, done, _ = env.step(action)

phi_hist = np.array(env.phi)
l_hist = np.array(env.L)

x_t = l_hist * np.sin(phi_hist)
y_t = -l_hist * np.cos(phi_hist)
ref_x = np.array(env.lmax) * np.sin(env.phi)
ref_y = -np.array(env.lmax) * np.cos(env.phi)

In [None]:
fontdict = {"fontsize": 16}
plt.plot(
    env.tau / 2 * np.arange(len(env.phi)), np.mod(np.array(env.phi) + np.pi, 2 * np.pi)
)
plt.yticks([0, np.pi, 2 * np.pi], ["0", r"$\pi$", r"2$\pi$"])
plt.xlabel("Time", fontdict=fontdict)
plt.ylabel("Angle", fontdict=fontdict)
plt.title("Angle over time", fontdict=fontdict)
plt.savefig("theta.png")

In [None]:
plt.plot(env.tau / 2 * np.arange(len(env.phi)), env.L)
plt.xlabel("Time", fontdict=fontdict)
plt.ylabel("Length", fontdict=fontdict)
plt.title("Length over time", fontdict=fontdict)
plt.savefig("length.png")
plt.show()

In [None]:
fig, ax1 = plt.subplots(figsize=(14, 10))

ax2 = ax1.twinx()
ax1.plot(
    env.tau / 2 * np.arange(len(env.phi)),
    np.mod(np.array(env.phi) + np.pi, 2 * np.pi),
    "g-",
)
ax2.plot(env.tau / 2 * np.arange(len(env.phi)), env.L, "k--")

ax1.set_xlabel("Time", fontsize=16)
ax1.set_ylabel("Angles", color="g", fontsize=16)
ax2.set_ylabel("Lengths", color="k", fontsize=16)

plt.title("Lengths and Angles over time", fontsize=16)
plt.savefig("overlay.png")

In [None]:
fig, ax1 = plt.subplots(figsize=(14, 10))

ax2 = ax1.twinx()
ax1.plot(
    env.tau / 2 * np.arange(len(env.phi))[:-1],
    np.mod(np.array(env.phi) + np.pi, 2 * np.pi)[:-1],
    "g-",
)
ax2.plot(env.tau * np.arange(len(env.Ldot_hist))[:-1], env.Ldot_hist[:-1], "k--")

ax1.set_xlabel("Time", fontsize=16)
ax1.set_ylabel("Angles", color="g", fontsize=16)
ax2.set_ylabel(r"$u = \frac{dL}{dt}$", color="k", fontsize=16)

plt.title("Control and Angles over time", fontsize=16)
plt.savefig("control_overlay.png")

In [None]:
t = env.tau / 2 * np.arange(len(env.phi))[:-1]
phi = np.mod(np.array(env.phi) + np.pi, 2 * np.pi)[:-1]
l_dot_hist_t = env.tau * np.arange(len(env.Ldot_hist))[:-1]
l_dot_hist = env.Ldot_hist[:-1]
smooth_angle = savgol_filter(phi, 7, 2)

In [None]:
plt.plot(t, phi - np.pi)
plt.plot(l_dot_hist_t, l_dot_hist)

In [None]:
fig, ax1 = plt.subplots(figsize=(14, 10))

ax2 = ax1.twinx()
ax1.plot(t, smooth_angle - np.pi, "b")
ax2.plot(l_dot_hist_t, l_dot_hist, "k--")

ax1.set_xlabel("t", fontsize=16)
ax1.set_ylabel(r"$\theta$", fontsize=16, color="b", rotation=0)
ax1.set_yticks([-np.pi, -np.pi / 2, 0, np.pi / 2, np.pi])
ax1.set_yticklabels([r"-$\pi$", "-$\pi/2$", "0", "$\pi/2$", r"$\pi$"])
ax2.set_ylabel(r"$u = \frac{dL}{dt}$", color="k", fontsize=16, rotation=0)

plt.title("Control and Angle over time", fontsize=20)
# plt.show()
plt.savefig("control_overlay.png")