# Minimização com restrições de igualdade

$$
\min f(x) \qquad \mbox{s.a} \quad Ax = b
$$

Como já vimos, buscamos $(x,\lambda)$ tal que
$$
\nabla f(x) + A^T\lambda = 0 \\
Ax - b = 0
$$
Podemos definir a função Lagrangiana
$$
L(x,\lambda) = f(x) + \lambda^T(Ax-b),
$$
e imaginar que queremos um ponto crítico:
$$
\nabla_{x,\lambda} L(x,\lambda) = 0.
$$
Note, no entanto que $(x,\lambda)$ é um ponto de sela para $L$, de modo que
a busca linear em $L$ não funciona.

Então, vamos utilizar uma função diferente, geralmente da forma
$$
\Phi(x;\mu) = f(x) + \mu P(x),
$$
onde $P$ é uma função de penalidade, isto é, $P(x) \geq 0$ e
$P(x) = 0 \iff Ax = b$.

Um exemplo seria $P(x) = \left\Vert Ax - b\right\Vert$.

In [152]:
using ForwardDiff

In [153]:
f(x) = dot(x,x)/2
g(x) = x
H(x) = eye(2)
A = ones(1, 2)
b = ones(1)

1-element Array{Float64,1}:
 1.0

In [154]:
Φ(x,μ) = f(x) + μ*norm(A*x-b, 1)

Φ (generic function with 1 method)

In [155]:
x = zeros(2)
λ = zeros(1)

1-element Array{Float64,1}:
 0.0

In [156]:
using Plots
gr()

Plots.GRBackend()

In [157]:
contour(linspace(-1.5,1.5,100), linspace(-0.5,1.5,100), (x,y)->f([x;y]), levels=50)
plot!(x->(b[1]-A[1,1]*x)/A[1,2], -1.5, 1.5, lw=2, leg=false)
scatter!([x[1]], [x[2]], c=:blue)
xlims!(-1.5,1.5)
ylims!(-0.5,1.5)

In [158]:
d = -[H(x) A'; A zeros(1,1)]\[g(x) + A'*λ; A*x-b]

3-element Array{Float64,1}:
  0.5
  0.5
 -0.5

In [159]:
dx, dλ = d[1:2], d[3:3]

([0.5,0.5],[-0.5])

In [82]:
contour(linspace(-1.5,1.5,100), linspace(-0.5,1.5,100), (x,y)->f([x;y]), levels=50)
plot!(x->(b[1]-A[1,1]*x)/A[1,2], -1.5, 1.5, lw=2, leg=false)
scatter!([x[1]], [x[2]], c=:blue)
scatter!([x[1]+dx[1]], [x[2]+dx[2]], c=:red)
xlims!(-1.5,1.5)
ylims!(-0.5,1.5)

In [160]:
L(x,λ) = f(x) + dot(λ, A*x-b)
s(t,μ) = Φ(x + t*dx, μ)

s (generic function with 1 method)

In [161]:
plot(t->s(t,0), 0, 1, lab="f(x)")
plot!(t->L(x+t*dx,λ+t*dλ), 0, 1, c=:red, lab="L(x,lambda)")

In [162]:
plot(t->s(t,0.2), 0, 1, lab="Phi(x,1)")
#plot!(t->L(x+t*dx,λ+t*dλ), 0, 1, c=:red, lab="L(x,lambda)")

In [163]:
DΦ(x,v,μ) = ForwardDiff.derivative(t->Φ(x+t*v,μ), 0)

DΦ (generic function with 2 methods)

In [164]:
DΦ(x,dx,μ)

-0.2

In [165]:
μ = 0.2
t = 1.0
while Φ(x + t*dx,μ) > Φ(x,μ) + 0.5*t*DΦ(x,dx,μ)
    t = t * 0.9
end
t

0.38742048900000015

In [166]:
function newton_method(f, x0, A, b; tol = 1e-5, max_iter = 1000, max_time = 60)
    exit_flag = 0
    ∇f(x) = ForwardDiff.gradient(f, x)
    H(x) = ForwardDiff.hessian(f, x)
    (m,n) = size(A)
    μ = 10.0
    
    x = copy(x0) # Cópia de x0
    iter = 0
    start_time = time()
    elapsed_time = 0.0
    fx = f(x)
    ∇fx = ∇f(x)
    B = H(x)
    λ = zeros(m)
    Φ(x,μ) = f(x) + μ*norm(A*x-b, 1)
    DΦ(x,v,μ) = ForwardDiff.derivative(t->Φ(x+t*v,μ),0) # Corrigir posteriormente
    while norm(∇fx+A'*λ) > tol || norm(A*x-b, 1) > tol
        dtil = -[B A'; A zeros(m,m)]\[∇fx + A'*λ; A*x-b]
        d = dtil[1:n]
        dλ = dtil[n+1:n+m]
        ∇fx_dot_d = DΦ(x,d,μ)
        #=
        if ∇fx_dot_d > -1e-6 * norm(∇fx) * norm(d)
            d = -∇fx
            ∇fx_dot_d = dot(∇fx, d)
        elseif norm(d) < 1e-6 * norm(∇fx)
            d = -∇fx
            ∇fx_dot_d = dot(∇fx, d)
        end
        =#
        t = 1.0
        while Φ(x + t*d,μ) > Φ(x,μ) + 0.5*t*∇fx_dot_d
            t = t*0.9
        end
        x = x + t*d
        
        fx = f(x)
        ∇fx = ∇f(x)
        B = H(x)
        λ += t*dλ
        iter = iter + 1
        println("x$iter = $x")
        if iter >= max_iter
            exit_flag = 1
            break
        end
        elapsed_time = time() - start_time
        if elapsed_time >= max_time
            
            exit_flag = 2
            break
        end
    end
    return x, fx, ∇fx, exit_flag, iter, elapsed_time # Precisamos retornar o ponto encontrado
end

newton_method (generic function with 1 method)

In [167]:
newton_method(f, zeros(2), A, ones(1))

x1 = [0.5,0.5]


([0.5,0.5],0.25,[0.5,0.5],0,1,0.029558181762695312)

In [168]:
f(x) = (1-x[1])^2 + 100*(x[2]-x[1]^2)^2

f (generic function with 1 method)

In [169]:
A = [-2.0 1.0]
b = [-1.0]
x, fx, gx, ef, iter, el_time = newton_method(f, zeros(2), A, b)

x1 = [0.4511221945137157,0.0022443890274314216]
x2 = [0.6589646309066077,0.3179292618132156]
x3 = [0.7758551110018301,0.5517102220036602]
x4 = [0.855368020479061,0.7107360409581219]
x5 = [0.9106940957503774,0.8213881915007547]
x6 = [0.950753812124178,0.901507624248356]
x7 = [0.9805416281922733,0.9610832563845467]
x8 = [0.9975985569147726,0.9951971138295453]
x9 = [0.9999944795211216,0.9999889590422433]
x10 = [0.9999999999999327,0.9999999999998654]


([0.9999999999999327,0.9999999999998654],4.526533177964742e-27,[-1.3455903058456897e-13,0.0],0,10,0.06076693534851074)

In [170]:
x

2-element Array{Float64,1}:
 1.0
 1.0

In [171]:
contour(linspace(-1.5,1.5,100), linspace(-0.5,1.5,100), (x,y)->f([x;y]), levels=50)
plot!(x->(b[1]-A[1,1]*x)/A[1,2], -1.5, 1.5, lw=2, leg=false)
scatter!([0.0], [0.0], c=:blue)
scatter!([x[1]], [x[2]], c=:red)
xlims!(-1.5,1.5)
ylims!(-0.5,1.5)