In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import array_ops

x = tf.placeholder(tf.float32, (2, 1))
fun = tf.reduce_sum((3 * x[0] + x[1]) ** 2)

grad = tf.gradients(fun, x)[0]
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/gradients.py
hess = array_ops.pack([tf.gradients(g, x)[0] for g in array_ops.unpack(grad)])

with tf.Session() as sess:
    point = np.array([0.1, 0.3]).reshape(2, 1)
    print grad.eval({x: point})[:, 0]
    print hess.eval({x: point})[:, :, 0]

[ 3.60000014  1.20000005]
[[ 18.   6.]
 [  6.   2.]]


In [2]:
import numpy as np
import sympy as sm

x, y = sm.symbols('x, y')
fun = (3 * x + y) ** 2

def grad(fun):
    vars = list(fun.free_symbols)
    fns = [sm.lambdify(vars, sm.diff(fun, wrt)) for wrt in vars]

    def eval(x):
        x = np.array(x).astype(float)
        return [fn(*x) for fn in fns]
    return eval

def hess(fun):
    vars = list(fun.free_symbols)
    n = len(vars)
    gs = [sm.diff(fun, wrt) for wrt in vars]
    fns = [sm.lambdify(vars, sm.diff(gs[i], wrt)) for i, wrt in enumerate(vars)]

    def eval(x):
        x = np.array(x).astype(float)
        return np.array([fns[i](*x) for i in xrange(n) for j in xrange(n)]).reshape(n, n)
    return eval

grad = grad(fun)
hess = hess(fun)

point = [0.1, 0.3]
print grad(point)
print hess(point)

[3.5999999999999996, 1.2000000000000002]
[[18 18]
 [ 2  2]]


In [3]:
import theano
import theano.tensor as T

x = T.dvector()
fun = T.sum((3 * x[0] + x[1]) ** 2)

g = theano.function([x], T.grad(fun, x))
H = theano.function([x], theano.gradient.hessian(fun, x))

x = [0.1, 0.3]
print g(x)
print H(x)

[ 3.6  1.2]
[[ 18.   6.]
 [  6.   2.]]


In [4]:
import numpy as np
import scipy.optimize as opt
from converge import Converge
from numpy.linalg import inv

def minimize(f, x, g, H, epsilon = 1e-7, maxsteps = 100):
    x = np.array(x).astype(float)
    converge = Converge(f, epsilon, maxsteps)
    
    while not converge.done(x):
        d = -inv(H(x)).dot(g(x))
        x += d * opt.minimize(lambda eta: f(x + eta * d), 1).x
    return x

In [5]:
import numpy as np
from scipy import stats
import scipy.optimize as opt
from scipy.misc import derivative
# import numdifftools as nd

def partial(fun):
    def eval(x, i):
        # http://stackoverflow.com/questions/20708038/scipy-misc-derivative-for-mutiple-argument-function
        v = x[:]
        def wraps(x):
            v[i] = x
            return fun(v)    
        return derivative(wraps, x[i], dx = 1e-6)
    return eval

def grad(fun, wrt = None):
    part = partial(fun)

    def eval(x):
        x = np.array(x).astype(float)
        return np.array([part(x, i) for i in xrange(len(x))])
    return eval

def hess(fun, wrt = None):
    part = partial(grad(fun, wrt))

    def eval(x):
        x = np.array(x).astype(float)
        n = len(x)
        return np.array([part(x, i)[j] for i in xrange(n) for j in xrange(n)]).reshape((n, n))    
    return eval

pdf = stats.multivariate_normal((23, 7)).pdf
fun = lambda x: -pdf(x)
x = [21, 6]
g = grad(fun)
H = hess(fun)
print minimize(fun, x, g, H) 
print opt.minimize(fun, x).x
print opt.minimize(fun, x, method = 'Newton-CG', jac = g, hess = H).x

[ 23.00000001   7.        ]
[ 23.           6.99999999]
[ 22.99999977   6.99999988]


In [6]:
def my_numeric_grad(fun):
    def eval(x):
        # http://localhost:8888/edit/Desktop/assignment1/cs231n/gradient_check.py
        x = np.array(x).astype(float)
        h = 1e-6
        grad = np.zeros_like(x)
        it = np.nditer(x, flags = ['multi_index'], op_flags = ['readwrite'])

        while not it.finished:
            ix = it.multi_index
            oldval = x[ix]
            x[ix] = oldval + h
            fxph = fun(x)
            x[ix] = oldval - h
            fxmh = fun(x)
            x[ix] = oldval
            grad[ix] = (fxph - fxmh) / (2 * h)
            it.iternext()
        return grad
    return eval