In [1]:
%matplotlib inline
%autoreload 2
import os
os.environ["CGT_COMPAT_MODE"] = "theano"
os.environ["THEANO_FLAGS"] = "device=cpu"


ERROR: Line magic function `%autoreload` not found.


In [2]:
import tensorfuse as theano
import tensorfuse.tensor as TT
import numpy as np

Using Theano for TensorFuse


In [3]:
state_dim = 2
action_dim = 1

In [4]:
A = np.random.randn(state_dim, state_dim)
B = np.random.randn(state_dim, action_dim)
Q = np.random.randn(state_dim, state_dim)
R = np.random.randn(action_dim, action_dim)

for name in ["A", "B", "Q", "R"]:
    def gen(name):
        return lambda x: globals()[name]
    globals()["get_%s" % name] = gen(name)

In [5]:
class MultivariateGaussian(object):
    def __init__(self, mu, cov):
        self._mu = mu
        self._cov = cov
    
    @property
    def mu(self):
        return self._mu
    
    @property
    def cov(self):
        return self._cov
    
    @staticmethod
    def pluck(x):
        if isinstance(x, MultivariateGaussian):
            return x.mu, x.cov
        else:
            return x, 0

    @staticmethod
    def dot(a, b):
        b_mu, b_cov = MultivariateGaussian.pluck(b)
        return MultivariateGaussian(TT.dot(a, b_mu), TT.dot(a, b_cov).dot(a.T))

    @staticmethod
    def add(a, b):
        a_mu, a_cov = MultivariateGaussian.pluck(a)
        b_mu, b_cov = MultivariateGaussian.pluck(b)

        return MultivariateGaussian(a_mu + b_mu, a_cov + b_cov)
    
    def __str__(self):
        return "Mu: %s, Cov: %s" % (self.mu, self.cov)
    
    def __repr__(self):
        return self.__str__()
    
    

M = MultivariateGaussian

Sigma = np.identity(action_dim) * 0.01
control_noise = MultivariateGaussian(0, Sigma)
W = theano.shared(np.random.randn(action_dim, state_dim))
b = theano.shared(np.random.randn(action_dim, 1))

def linear_policy(x):
    return M.add(M.add(M.dot(W, x), b), control_noise)

def next_state(t, x_t, u_t):
    return M.add(
        M.dot(get_A(t), x_t),
        M.dot(get_B(t), u_t)
    )

def step_cost(t, x_t, u_t):
    Q = get_Q(t)
    R = get_R(t)
    static = x_t.mu.T.dot(Q).dot(x_t.mu) + u_t.mu.T.dot(R).dot(u_t.mu)
    var = TT.sum(Q * x_t.cov) + TT.sum(R * u_t.cov)
    return static + var


In [308]:
x0 = MultivariateGaussian(np.random.random((state_dim, 1)), 0)
H = 2
E_xs = [None for _ in xrange(H)]
E_xs[0] = x0
E_us = [None for _ in xrange(H)]
E_cs = [None for _ in xrange(H)]
for t in xrange(H):
    x_t = E_xs[t]
    u_t = linear_policy(x_t)
    x_tp = next_state(t, x_t, u_t)
    if t+1 < H:
        E_xs[t+1] = x_tp
    E_us[t] = u_t
    E_cs[t] = step_cost(t, x_t, u_t)

In [309]:
### print E_cs
s_cost = TT.sum(E_cs)
cost = s_cost.eval()
print cost
s_dW, s_db = TT.grad(s_cost, [W, b])
dW, db = s_dW.eval(), s_db.eval()
print dW, db

-15.8882516767
[[ 22.75858175  -6.08293098]] [[ 24.70739033]]


In [319]:
once(*[np.random.randn(action_dim).astype('float32') for _ in xrange(H)])

[array(-15.468293215428087),
 array([[ 0.0650786 ,  2.04510541]]),
 array([[ 5.28414415]])]

In [316]:
def normpdf(x, mean, var):
    pi = np.pi
    denom = (2*pi*var)**.5
    num = TT.exp(-(x-mean)**2/(2*var))
    return TT.exp(TT.sum(TT.log(num/denom)))

def sym_grad_log_norm(x, mean, var, thetas):
    return TT.grad(TT.log(normpdf(x, mean, var)), thetas)

def grad_log_given(x, u):
    pi = linear_policy(x)
    return sym_grad_log_norm(u, pi.mu, TT.diag(pi.cov), [W, b])

def gen_once():
        E_xs = [None for _ in xrange(H)]
        E_xs[0] = MultivariateGaussian(theano.shared(np.asarray(x0.mu)), theano.shared(np.asarray(x0.cov)))
        E_us = [None for _ in xrange(H)]
        E_cs = [None for _ in xrange(H)]
        random_noises = [TT.vector("w_%i" % i) for i in xrange(H)]
        for t in xrange(H):
            x_t = E_xs[t]
            u_t = M.add(deterministic_linear_policy(x_t), TT.dot(Sigma, random_noises[t]))
            x_tp = next_state(t, x_t, u_t)
            if t+1 < H:
                E_xs[t+1] = x_tp
            E_us[t] = u_t
            E_cs[t] = step_cost(t, x_t, u_t)
        cum_dWs = []
        cum_dbs = []
        import theano.tensor as OT
        cum_cs = OT.extra_ops.cumsum(E_cs[::-1])[::-1]
        for t in xrange(H-2, -1, -1):
            lW, lb = grad_log_given(xs[t], us[t])
            cum_dWs.append(lW * cum_cs[t])
            cum_dbs.append(lb * cum_cs[t])
        return theano.function(random_noises, [TT.sum(E_cs), TT.sum(cum_dWs, axis=0), TT.sum(cum_dbs, axis=0)])
    
once = gen_once()
def rollout(N = 100):
    
    return np.mean([once(*[np.random.randn(action_dim).astype('float32') for _ in xrange(H)]) for _ in xrange(N)], axis=0)


In [234]:
rollout()

ValueError: setting an array element with a sequence.

In [235]:
once(*[np.random.randn(action_dim).astype('float32') for _ in xrange(H)])

[array(-12.313152718894377),
 array([[ 0.06989664,  2.19651318]]),
 array([[ 5.67535161]])]

In [57]:
def gen_once():
    E_xs = [None for _ in xrange(H)]
    E_xs[0] = MultivariateGaussian(theano.shared(np.asarray(x0.mu)), theano.shared(np.asarray(x0.cov)))
    E_us = [None for _ in xrange(H)]
    E_cs = [None for _ in xrange(H)]
    random_noises = [TT.vector("w_%i" % i) for i in xrange(H)]
    for t in xrange(H):
        x_t = E_xs[t]
        u_t = M.add(deterministic_linear_policy(x_t), TT.dot(Sigma, random_noises[t]))
        x_tp = next_state(t, x_t, u_t)
        if t+1 < H:
            E_xs[t+1] = x_tp
        E_us[t] = u_t
        E_cs[t] = step_cost(t, x_t, u_t)
#         print E_cs
    return theano.function(random_noises, map(lambda l: TT.stack(map(lambda x: x.mu if isinstance(x, M) else (x), l)), [E_xs, E_us, E_cs]))
once = gen_once()
xs, us, cs = once(*[np.random.randn(action_dim).astype('float32') for _ in xrange(H)])
cum_dW = 0.
cum_db = 0.
cum_c = cs[-1]
for t in xrange(H-2, -1, -1):
    lW, lb = grad_log_given(xs[t], us[t])
    cum_dW += lW * cum_c
    cum_db += lb * cum_c
    cum_c += cs[t]


In [59]:
cum_dW.eval()


array([[  1.98006328e+11,   3.72782461e+11]])

In [60]:
cum_db.eval()

array([[ -8.58856607e+08]])