Example file meant to illustrate a basic use case for one of the Environments in ACME Gym.

In [1]:
import numpy as np
from scipy.optimize import root
from scipy.integrate import odeint
from scipy.linalg import solve_continuous_are as scare
import gym
import acme_gym

In [2]:
#These are the functions from Brooke's Inverted Pendulum lab
def linearized_init(M, m, l, q1, q2, q3, q4, r):
    '''
    Parameters:
    ----------
    M, m: floats
    masses of the rickshaw and the present
    l   : float
    length of the rod
    q1, q2, q3, q4, r : floats
    relative weights of the position and velocity of the rickshaw, 
    the angular displacement theta and the change in theta, and the control
    Return
    -------
    A : ndarray of shape (4,4)
    B : ndarray of shape (4,1)
    Q : ndarray of shape (4,4)
    R : ndarray of shape (1,1)
    '''
    g = 9.8
    A = np.zeros((4,4))
    A[0,1] = 1
    A[1,2] = m*g/M
    A[2,3] = 1
    A[3,2] = g/(M*l)*(M+m)
    B = np.zeros((4,1))
    B[1] = 1/M
    B[3] = 1/(M*l)
    Q = np.diag([q1, q2, q3, q4])
    R = np.array([[r]])
    return A, B, Q, R

def find_P(A, B, Q, R):
    '''
    Parameters:
    ----------
    A, Q    : ndarrays of shape (4,4)
    B       : ndarray of shape (4,1)
    R       : ndarray of shape (1,1)
    Returns
    -------
    P       : the matrix solution of the Riccati equation
    '''
    def fun(P):
        P = P.reshape((4,4))
        root = P@A+A.T@P+Q-1/R[0]*(P@B@B.T@P)
        return root.reshape(16)
    P0 = np.ones(16)
    P = root(fun,P0).x.reshape((4,4))
    return P

def rickshaw(tv, X0, A, B, Q, R, P):
    '''
    Parameters:
    ----------
    tv  : ndarray of time values, with shape (n+1,)
    X0  : Initial conditions on state variables
    A, Q: ndarrays of shape (4,4)
    B   : ndarray of shape (4,1)
    R   : ndarray of shape (1,1)
    P   : ndarray of shape (4,4)
    Returns
    -------
    Z : ndarray of shape (n+1,4), the state vector at each time
    U : ndarray of shape (n+1,), the control values
    '''
    
    func = lambda z,t: (A - 1/R[0]*B@B.T@P)@z.T
    Z = odeint(func, X0, tv)
    BP = B.T@P
    U = -1/R[0]*(BP)@Z.T
    return Z,U  

def stabilize(M, m, l, q1, q2, q3, q4, r, X0, tf, step):
    A, B, Q, R = linearized_init(M, m, l, q1, q2, q3, q4, r)
    P = scare(A, B, Q, R)
    tv = np.linspace(0,tf,step)
    Z, U = rickshaw(tv,X0,A, B, Q, R, P)
    #Z is state vector
    #U is control values
    return Z,U

In [11]:
def run_pendulum():
    """
    Implement as a function so that we can properly exit early if needed
    without crashing our Kernel
    """

    env = gym.make('CartPoleContinuous-v0')
    T = round(6/0.02)
    # Initial state is drawn randomly, let the user pick a good starting point
    init_state = True
    if init_state:
        obs = env.reset()
        env.render()
        print("Initial State")
        print("X: {}, X': {}, θ: {}, θ': {}".format(obs[0], obs[1], obs[2], obs[3]))
        init_state = input("Enter to begin simulation\n")
    
    step = 500
    x0 = obs
    q1, q2, q3, q4 = 90000., 1., 90000., 1
    tf = .02*step
    r = 10. # Weight on the control, how do we know what it should be?
    M, m, l = 1, .1, 1
    Z, U = stabilize(M, m, l, q1, q2, q3, q4, r, x0, tf, step)
    U = U.T

    for i in range(step):
        #get a new obs every interation... are we supposed to include that in new calcs?
        obs, reward, state, info = env.step(np.array(U[0]))
        env.render()

        # Feedback
        x0 = np.array([obs[0],obs[1],-obs[2],-obs[3]])
        Z, U = stabilize(M, m, l, q1, q2, q3, q4, r, x0, tf, step)
        U = U.T
    env.close()

In [12]:
# WARNING! The pop up for rendering may not appear on the front of your screen
# check and see if it appeared underneath your files
run_pendulum()

Initial State
X: -2.3427006592862094, X': 0.17925809654784236, θ: -0.3495924100385281, θ': -0.3050921396011868
