In [163]:
import sys
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
import gym
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import copy
sys.path.append('p3')
import gridworld as gw

In [164]:
env = gym.make('Pendulum-v0')

[2017-07-26 12:47:47,559] Making new env: Pendulum-v0


In [165]:
"""Engineered solution - no RL
Using PID controllers to keep pendulum inverted.
The CleverController is essentially an FSM with 3 states: 
    (1) zero pendulum (to stable equilibrium using PID on velocity), 
    (2) invert pendulum to unstable equilibrium using predefined procedure, 
    (3) keep pendulum inverted with a PID contoller
The inversion procedure executed as part of state (2) has three steps.
The entire controller has about 11 adjustable parameters, including 3 for theta PID coefficients, 3 for velocity PID coefficients, 3 for transitioning from state to state in controller, 2 for transitioning between states in inversion procedure.
I fiddled with the parameters for a while manually, and it almost works, the only problem is that the inversion procedure inverts too quickly so the controller isn't able to keep the pendulum inverted.
[All of this can essentially be ignored, its just fun to play with]
"""
def getAngle(y, x, v): # north is theta = 0
    theta = np.arccos(y)
    if x > 0:
        theta = -theta
    return theta

class PID_Control:
    def __init__(self, Kp=1., Ki=1., Kd=1., maxI=None):
        self.Kp = Kp
        self.Ki = Ki
        self.Kd = Kd
        self.maxI = maxI if maxI else Ki * np.pi
        self.I = 0
        self.D = 0

    def getSol(self, theta):
        # P
        sol = self.Kp * theta

        # D
        sol += self.Kd * (theta - self.D)
        D = theta

        # I
        self.I += theta
        if self.I > self.maxI:
            self.I = self.maxI
        if self.I < -self.maxI:
            self.I = -self.maxI
        sol += self.I

        return -sol
    
    def reset(self):
        self.I = 0
        self.D = 0

# 3 states of controller
S_ZERO = 1
S_INV = 2
S_BAL = 3

# 3 states of inversion procedure
I_PUSH = 1
#I_WAIT = 2
I_REV = 2
I_SLO = 3

class CleverControl:
    def __init__(self, w_cutoff=1e-3, theta_cutoff=5 * np.pi / 10):
        self.theta_pid = PID_Control(0.1, 1, 1) # for balancing
        self.vel_pid = PID_Control(10, 1, 1) # for zeroing velocity
        self.state = S_ZERO # control state
        self.i_state = I_PUSH # inversion state - just for inversion procedure
        self.w_cutoff = w_cutoff
        self.theta_cutoff = theta_cutoff
    
    def getSol(self, theta, w):
        if self.state == S_ZERO:
            if np.abs(w) < self.w_cutoff:
                self.vel_pid.reset()
                self.i_state = I_PUSH
                self.state = S_INV # perform inversion
            return self.vel_pid.getSol(w)
        if self.state == S_INV:
            if np.abs(theta) < 5 * np.pi / 10:
                self.vel_pid.reset()
                self.state = S_BAL # start balancing
            return self.complete_inversion(theta, w)
        # continue balancing
        if np.abs(theta) > (np.pi + self.theta_cutoff)/2 :
            self.theta_pid.reset()
            self.state = S_ZERO # dropped pendulum, reset
        return self.theta_pid.getSol(theta)
    
    def complete_inversion(self, theta, w): # move from stable equilibrium to unstable equilibrium
        adj_theta = theta if theta > 0 else theta + 2 * np.pi
        if self.i_state == I_PUSH: # initial push
            if adj_theta > 6 * np.pi / 5:
                self.i_state = I_REV
            return -2
        #if self.i_state == I_WAIT: # reverse
        #    if w < self.w_cutoff:
        #        self.i_state = I_REV
        #    return -2
        if self.i_state == I_REV:
            if adj_theta < 95 * np.pi / 100: # over halfway point
                self.i_state = I_SLO
            return 2
        # I_SLO
        return -2

In [166]:
"""Running the engineered solutions - no RL"""
env = gym.make('Pendulum-v0')
env.reset()
obs = []
rewards = []
calcs = []
pid = PID_Control(10, 1, 1)
clever = CleverControl()
sol = 0
for _ in range(1000):
    #env.render()
    observation, reward, done, info = env.step(np.array([sol])) # take a random action
    obs.append(observation) # [ypos, xpos, ang-speed] - r=1
    rewards.append(reward)
    theta = getAngle(*observation)
    #sol = pid.getSol(theta)
    #sol = pid.getSol(observation[2])
    sol = clever.getSol(theta, observation[2])
    calcs.append([theta/np.pi, sol, reward, clever.state+2, clever.i_state+2, (theta if theta > 0 else theta + 2 * np.pi)/np.pi])

obs = np.hstack([np.vstack(obs),np.array(calcs)])
print sum(rewards)

[2017-07-26 12:47:48,712] Making new env: Pendulum-v0


-6986.25004384


In [167]:
"""Plot results from episode"""
print obs[0], obs.shape
fig, ax = plt.subplots(1,1)
labels = ['y','x','v','theta','sol','R', 'state', 'i_state', 'adj_theta']
for i in range(obs.shape[1]):
    ax.plot(obs[:,i], label=labels[i])
ax.legend()
plt.show()

[ 0.98512142  0.17185979  0.13445841 -0.0549776  -1.61350093 -0.02756521
  3.          3.          1.9450224 ] (1000, 9)


<IPython.core.display.Javascript object>

In [168]:
"""Print critical portion of results"""
print labels[2:]
for i in range(80, 110):
    print i, obs[i,2:]

['v', 'theta', 'sol', 'R', 'state', 'i_state', 'adj_theta']
80 [  7.06490216   0.75343098 -80.85551646 -13.28902964   3.           4.
   0.75343098]
81 [  6.24031906   0.65411321 -71.78510229 -10.59784647   3.           4.
   0.65411321]
82 [  5.2765197    0.57013479 -61.18330931  -8.12100756   3.           4.
   0.57013479]
83 [  4.24465147   0.50257907 -49.83275886  -5.99631727   3.           4.
   0.50257907]
84 [  3.19467609   0.45173422 -38.28302965  -4.29862774   3.           4.
   0.45173422]
85 [  2.15328162   0.41746368 -26.82769044  -3.03862455   3.           4.
   0.41746368]
86 [  1.12835348   0.39950537 -15.55348093  -2.18769658   3.           4.
   0.39950537]
87 [ 0.11542209  0.39766837 -4.41123567 -1.70655186  3.          4.
  0.39766837]
88 [-0.89615351  0.4119311   7.61224943 -1.56611281  3.          4.          0.4119311 ]
89 [ -1.31762993   0.43290183  13.56612004  -1.75905495   3.           4.
   0.43290183]
90 [ -1.75102857   0.46077032  20.08453368  -2.02721817  


