In [14]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from optimization import Optimization
%matplotlib inline

# Methods

## Newton-Raphson

In [15]:
def cons(x):
    return tf.constant(x, dtype=tf.float32)

In [16]:
def compute_hessian(fn, vars):
    mat = []
    for v1 in vars:
        temp = []
        for v2 in vars:
            temp.append(tf.gradients(tf.gradients(fn, v2)[0], v1)[0])
        temp = [cons(0) if t == None else t for t in temp]
        temp = tf.stack(temp)
        mat.append(temp)
    mat = tf.stack(mat)
    return mat

In [17]:
def compute_grads(fn, vars):
    grads = []
    for v in vars:
        grads.append(tf.gradients(fn, v)[0])
    return tf.reshape(tf.stack(grads), shape=[len(vars), -1])

In [18]:
def optimize(all_variables, update):
    optmize_variables = []
    for i in range(len(all_variables)):
        optmize_variables.append(all_variables[i].assign(all_variables[i] - tf.squeeze(update[i])))
    return tf.stack(optmize_variables)

In [19]:
def NewtonRaphson(x, f, num_iterations):
    hessian = compute_hessian(f, x)
    hessian_inv = tf.matrix_inverse(hessian)
    g = compute_grads(f, x)
    update = tf.unstack(tf.matmul(hessian_inv, g))
    optimize_op = optimize(x, update)

    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    func = np.inf
    for i in range(num_iterations):
        prev = func
        v, func = sess.run([x, f])
        print( v, func )
        sess.run(optimize_op)
    sess.close()
    return v

### Test

In [20]:
x = []
for i in range( 2 ):
    x.append(tf.Variable(np.random.random() * 10.24 - 5.12))
print(x)

f = tf.pow(x, 4) + 5

print(NewtonRaphson(x,f,10))

[<tf.Variable 'Variable:0' shape=() dtype=float32_ref>, <tf.Variable 'Variable_1:0' shape=() dtype=float32_ref>]
[-2.8769305, -3.8552346] [ 73.50424 225.90384]
[-1.9179536, -2.5701563] [18.5317  48.63532]
[-1.2786357, -1.7134376] [ 7.672929 13.619323]
[-0.8524238, -1.1422918] [5.5279856 6.702583 ]
[-0.56828254, -0.7615279] [5.1042933 5.336313 ]
[-0.37885502, -0.5076853] [5.0206013 5.066432 ]
[-0.25257, -0.33845687] [5.0040693 5.0131226]
[-0.16837999, -0.22563791] [5.000804 5.002592]
[-0.11225332, -0.15042527] [5.000159 5.000512]
[-0.074835554, -0.1002835] [5.0000315 5.000101 ]
[-0.074835554, -0.1002835]


In [37]:
tf.reset_default_graph()

## Gradient Descent

In [38]:
def GradientDescent(X, f, num_iterations=100, lr=0.1):
    sess = tf.Session()
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(f)
    sess.run(tf.initialize_all_variables())
    X_opt = None
    for i in range(num_iterations):
        X_opt, _, cost = sess.run([X, optimizer, f])
        print(X_opt, cost)
    sess.close()
    return X_opt

### Test

In [39]:
x = tf.get_variable("x", initializer=np.random.random(2) * 10.24 - 5.12)

f2 = tf.pow(x, 2) + 5

print(GradientDescent(x, f2, 10, 0.1))

[ 3.89463273 -1.58813562] [28.70025659  8.94089806]
[ 3.11570617 -1.27050849] [20.1681641   7.52217474]
[ 2.49256493 -1.01640679] [14.70762495  6.61419182]
[ 1.99405194 -0.81312543] [11.21287992  6.03308276]
[ 1.59524154 -0.65050034] [8.97624312 5.66117296]
[ 1.27619323 -0.52040027] [7.54479558 5.42315069]
[ 1.02095458 -0.41632021] [6.62866916 5.27081644]
[ 0.81676366 -0.33305617] [6.04234825 5.17332252]
[ 0.65341093 -0.26644494] [5.66710288 5.11092641]
[ 0.52272874 -0.21315595] [5.42694584 5.0709929 ]
[ 0.52272874 -0.21315595]


In [40]:
tf.reset_default_graph()

## Gradient Descent With Momentum

In [41]:
def GradientDescentWithMomentum(X, f, num_iterations=100, lr=0.1, betha=0.4):
    sess = tf.Session()
    optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=betha).minimize(f)
    sess.run(tf.initialize_all_variables())
    X_opt = None
    for i in range(num_iterations):
        X_opt, _, cost = sess.run([X, optimizer, f])
        print(X_opt, cost)
    sess.close()
    return X_opt

### Test

In [42]:
X1 = tf.get_variable("X1", initializer=np.random.random(2) * 10.24 - 5.12)

f3 = tf.pow(X1, 2) + 5

print(GradientDescentWithMomentum(X1, f3, 10, 0.1, 0.4))

[1.58919709 0.09151493] [8.94616782 5.01308591]
[1.11243795 0.06406045] [7.52554739 5.00837498]
[0.6992467  0.04026657] [6.2375182  5.00410374]
[0.39412086 0.0226957 ] [5.48894595 5.0016214 ]
[0.19324634 0.01112821] [5.15533125 5.00051509]
[0.07424727 0.00427558] [5.03734415 5.00012384]
[0.01179818 0.00067941] [5.00551266 5.00001828]
[-0.01554109 -0.00089494] [5.0001392  5.00000046]
[-0.02336858 -0.00134569] [5.00024153 5.0000008 ]
[-0.02182586 -0.00125686] [5.00054609 5.00000181]
[-0.02182586 -0.00125686]


In [118]:
tf.reset_default_graph()

## Hill Climbing

In [78]:
alpha = 2.0

    def powerLawGenerator(x):
        dir = np.random.choice([-1,1])
        return dir * (1.0 - x)**(1 - alpha)

In [79]:
def HillClimbingStep(x, f, delta, powerLaw):
    n = len(x)
    y = np.array(x)
    fx = f(x)
    if not powerLaw:
        y += delta * np.random.randn(n)
    else:
        y += delta * powerLawGenerator(np.random.randn(n))
    fx_prime = f(y)
    return (y if fx_prime <= fx else x)

In [80]:
def HillClimbing(x, f, num_iterations=100, delta=0.6, powerLaw=True):
    for i in range(num_iterations):
        x_next = HillClimbingStep(x, f, delta, powerLaw)
        x = x_next
        print(x, f(x))
    return x

### Test

In [83]:
X = np.random.random(2) * 10.24 - 5.12

f4 = lambda x: np.sum(x**2) + 5

print(HillClimbing(X, f4))

[3.65677956 1.03322677] 19.439594331388676
[3.65677956 1.03322677] 19.439594331388676
[3.34126517 0.8068125 ] 16.814999336917182
[2.93513291 0.56513853] 13.934386764298297
[2.93513291 0.56513853] 13.934386764298297
[2.93513291 0.56513853] 13.934386764298297
[2.47139878 0.18822685] 11.143241261950283
[ 2.20538486 -0.73229751] 10.399982018750142
[ 1.82356526 -0.32961883] 8.43403883555031
[ 1.82356526 -0.32961883] 8.43403883555031
[ 1.46747482 -0.93396761] 8.025777854975306
[ 1.46747482 -0.93396761] 8.025777854975306
[ 1.46747482 -0.93396761] 8.025777854975306
[ 1.46747482 -0.93396761] 8.025777854975306
[-0.13782119 -0.29647545] 5.106892373950549
[-0.13782119 -0.29647545] 5.106892373950549
[-0.13782119 -0.29647545] 5.106892373950549
[-0.13782119 -0.29647545] 5.106892373950549
[-0.13782119 -0.29647545] 5.106892373950549
[-0.13782119 -0.29647545] 5.106892373950549
[-0.13782119 -0.29647545] 5.106892373950549
[-0.13782119 -0.29647545] 5.106892373950549
[-0.13782119 -0.29647545] 5.106892373950

## Simulated Annealing

In [128]:
def SimulatedAnnealing(x, f, t_max=1000, delta=0.6, powerLaw=True):
    for t in range(1,t_max):
        T = t / t_max
        x_next = np.array(x)
        if not powerLaw:
            x_next += delta * np.random.randn(len(x))
        else:
            x_next += delta * powerLawGenerator(np.random.randn(len(x)))
        DE = f(x_next) - f(x)
        q = min(1,np.exp(-DE/T))
        if DE < 0:
            x = x_next
        elif T != 0 and np.random.random() < q:
            x = x_next
        print(x, f(x))
        
    return x
        

In [129]:
X = np.random.random(2) * 10.24 - 5.12

f4 = lambda x: np.sum(x**2) + 5

print(SimulatedAnnealing(X, f4, t_max=100, delta=1, powerLaw=False))

[-0.25672219 -1.209186  ] 6.528037072705448
[-0.25672219 -1.209186  ] 6.528037072705448
[ 1.02931064 -0.37803281] 6.202389191216779
[ 0.69722451 -0.57459376] 5.816280016232588
[-0.17200824  0.05284908] 5.032379859679148
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[-0.08558664 -0.12932628] 5.024050358594083
[0.17282324 0.28944834] 5.113648215238225
[0.17282324 0.28944834] 5.113648215238225
[0.17282324 0.28944834] 5.113648215238225
[0.17282324 0.28944834] 5.113648215238225
[0.17282324 0.28944834] 5.113648215238225
[0.17282324 0.28944834] 5.113648215238225
[0.17282324 0.28944834] 5.113648215238225
[0.29977796 0.12296171] 5.1049864082278
[0.299

# Rastrigin
On an n-dimensional domain it is defined by:
$$f(x) = Ad + \sum_{i=1}^{d}x_i-Acos(2\pi x_i)$$

## 2-Dimensions  

In [30]:
tf.train.MomentumOptimizer?

In [12]:
x = np.random.random(2) * 10.24 - 5.12
opt = Optimization()
print(opt.hillClimbing(x, 0.6, False))

(array([-3.36011995,  4.9642166 ]), 52.565585783768)
(array([-3.36011995,  4.9642166 ]), 52.565585783768)
(array([-3.36011995,  4.9642166 ]), 52.565585783768)
(array([-3.36011995,  4.9642166 ]), 52.565585783768)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.11918753,  5.06263097]), 38.79938351101021)
(array([-3.1191875

(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462623855)
(array([0.00957181, 0.00732803]), 0.028822926462

In [113]:
np.exp(-1)

0.36787944117144233