In [1]:
using LinearAlgebra, Plots

In [2]:
function infty_norm(x)
    max = 0;
    for i in eachindex(x)
        if( max < abs(x[i]) )
            max = abs(x[i]);
        end
    end

    return max;
end

infty_norm (generic function with 1 method)

In [3]:
function gradient_descent(a::Float64, s::Float64, ϵ::Float64,
    M::Int64  , x::Vector{Float64}, λ::Float64, k1::Int64, 
    k2::Int64, flag::Bool)
    k     = 0; # COUNTER
    f, ∇f = loss_function(x, λ, k1, k2);
    μ     = norm(∇f, Inf);
    x_ant = 0;
    ∇f_ant= 0;

    while( (μ >= ϵ) && (k < M) )
        t_k = 1;
        if( flag == true && k != 0 )
            t_k = norm(x - x_ant, 2) / norm(∇f - ∇f_ant, 2);
        end

        armijo = a * dot(∇f, ∇f);
        while( loss_function(x - t_k * ∇f, λ, k1, k2)[1] > (f - t_k*armijo) )
            t_k = s * t_k;
        end

        x_ant  = x;
        ∇f_ant = ∇f;
       
        x      = x - t_k * ∇f;
        f, ∇f  = loss_function(x, λ, k1, k2);
        
        μ      = norm(∇f, Inf);
        k      += 1;
    end

    #println(k);
    return x;
end

gradient_descent (generic function with 1 method)

In [13]:
# Primeiro preciso ler os dados do problema:
global u₁, u₂, w;
u₁ = Float64[];
u₂ = Float64[];
w  = Float64[];

filepath = "dado_treino";
file     = open(filepath);
for line in eachline(file)
    row = parse.( Float64, split(line) );
    push!( u₁, row[1] );
    push!( u₂, row[2] );
    push!( w , row[3] );
end

In [14]:
# Função de Ativação do problema!
function ϕ(x::Float64, k::Int64)
    if( k == 1 )
        f = x;
        g = 1;
    elseif( k == 2 )
        aux = 2 / π;
        f = aux * atan(x);
        g = a / ( 1 + x * x );
    elseif( k == 3 )
        f = tanh(x);
        g = 1 - f * f;
    elseif( k == 4 )
        aux = sqrt( 1 + x * x );
        f = 0.5 * ( x + aux );
        g = f / aux;
    end

    return f, g
end

ϕ (generic function with 1 method)

In [15]:
function f(x::Vector{Float64}, u::Vector{Float64}, k1::Int64, k2::Int64)
    # k1 : define a função de ativação da primeira camada.
    # k2 : define a função de ativação da segunda camada.
    func = 0;
    grad = zeros(length(x));

    b = x[1] * u[1] + x[2] * u[2] + x[5];
    c = x[3] * u[1] + x[4] * u[2] + x[6];
    ϕ_b, ϕ_b_prime = ϕ(b, k1);
    ϕ_c, ϕ_c_prime = ϕ(c, k1);


    a = x[7] * ϕ_b + x[8] * ϕ_c + x[9];
    ϕ_a, ϕ_a_prime = ϕ(a, k2);

    func = ϕ_a;
    
    grad[9] = 1;
    grad[8] = ϕ_c;
    grad[7] = ϕ_b;
    grad[6] = x[8] * ϕ_c_prime;
    grad[5] = x[7] * ϕ_b_prime;
    grad[4] = x[8] * ϕ_c_prime * u[2];
    grad[3] = x[8] * ϕ_c_prime * u[1];
    grad[2] = x[7] * ϕ_b_prime * u[2];
    grad[1] = x[7] * ϕ_b_prime * u[1];

    grad = ϕ_a_prime * grad;

    return func, grad;
end

f (generic function with 1 method)

In [16]:
function loss_function( x::Vector{Float64}, λ::Float64, k1::Int64, k2::Int64 )
    func = 0;
    grad = zeros(9);
    N = length(u₁);

    for i in 1:N
        u = [u₁[i]; u₂[i]];
        func_aux, grad_aux = f(x, u, k1, k2);
        aux = func_aux - w[i];

        func = func + func_aux ^ 2;
        grad = grad + aux * grad_aux;
    end

    
    func = (1 / (2*N))*func + λ/2 * dot(x, x);
    grad = (1/N)*grad + λ*x;
    
    return func, grad
end

loss_function (generic function with 1 method)

In [17]:
function loop_cost()
    M = 2000;
    x_best = zeros(9);
    best_cost = Inf;
    for i = 1:M
        x_testing = (2 * rand(9) .- 1) * 10;
        cost_value = loss_function(x_testing, 0.0, 4, 1)[1];
        if( cost_value < best_cost )
            x_best = x_testing;
            best_cost = cost_value
        end
    end
    
    return x_best;
end

loop_cost (generic function with 1 method)

In [18]:
x_inicial = loop_cost()

9-element Vector{Float64}:
 -5.656207606734038
  9.060896440276787
 -2.550243314990157
  0.4361033405519654
  9.513025082240441
 -4.505546960850344
  1.9218412626114434
 -3.429008201348709
  0.012313068464593346

In [19]:
loss_function(x_inicial, 0.0, 4, 1)

(4.302187199352921e-5, [-0.005655289294445748, -0.00012601128516854807, 0.042489137499223506, 0.0009365538737288424, -7.31495803449667e-5, 0.000543925759823697, -0.01568804729698027, -0.03219628107689117, -27.777099426262534])

In [20]:
λ = [0; 0.001; 0.01; 0.1; 1; 10; 100]
for λ_i in λ
    Error = Inf;
    iteracoes = 10;
    x_inicial_list = x_inicial = (2 * rand(iteracoes, 9) .- 1) * 10;
    x_melhor = zeros(9);
    for j in 1:iteracoes
        x_inicial = (2 * rand(9) .- 1) * 10;
        a = 10e-4;
        s = 0.5  ;
        ϵ = 1e-6 ;
        M = 200 ;
        x_final = gradient_descent(a, s, ϵ, M, x_inicial_list[j, :], λ_i, 4, 1, false);
        Error_parcial = loss_function(x_inicial, λ_i, 4,1)[1];
        if( Error_parcial < Error )
             Error = Error_parcial;
             x_melhor = x_final;
        end
        println("λ = $λ_i e iter = $j Feito");
    end
    println("O erro total dos testes para λ = $λ_i foi $Error")
end

λ = 0.0 e iter = 1 Feito
λ = 0.0 e iter = 2 Feito
λ = 0.0 e iter = 3 Feito
λ = 0.0 e iter = 4 Feito
λ = 0.0 e iter = 5 Feito
λ = 0.0 e iter = 6 Feito
λ = 0.0 e iter = 7 Feito
λ = 0.0 e iter = 8 Feito
λ = 0.0 e iter = 9 Feito
λ = 0.0 e iter = 10 Feito
O erro total dos testes para λ = 0.0 foi 36.655421407154684
λ = 0.001 e iter = 1 Feito
λ = 0.001 e iter = 2 Feito
λ = 0.001 e iter = 3 Feito
λ = 0.001 e iter = 4 Feito
λ = 0.001 e iter = 5 Feito
λ = 0.001 e iter = 6 Feito
λ = 0.001 e iter = 7 Feito
λ = 0.001 e iter = 8 Feito
λ = 0.001 e iter = 9 Feito
λ = 0.001 e iter = 10 Feito
O erro total dos testes para λ = 0.001 foi 6.184698069360779
λ = 0.01 e iter = 1 Feito
λ = 0.01 e iter = 2 Feito
λ = 0.01 e iter = 3 Feito
λ = 0.01 e iter = 4 Feito
λ = 0.01 e iter = 5 Feito
λ = 0.01 e iter = 6 Feito
λ = 0.01 e iter = 7 Feito
λ = 0.01 e iter = 8 Feito
λ = 0.01 e iter = 9 Feito
λ = 0.01 e iter = 10 Feito
O erro total dos testes para λ = 0.01 foi 43.85429355298575
λ = 0.1 e iter = 1 Feito
λ = 0.1 e i

In [21]:
x_inicial = (2 * rand(9, 9) .- 1) * 10;
x_inicial[1, :]

9-element Vector{Float64}:
  2.5007762250894405
 -8.825052996453987
  9.708941856680545
  7.1492512897860605
 -5.049544470517651
  7.423026288764298
  3.269604928720833
 -3.520813861466394
  9.705542476396385

In [22]:
#Escolhi o λ = 0.001
function loop_cost()
    M = 2000;
    x_best = zeros(9);
    best_cost = Inf;
    for i = 1:M
        x_testing = (2 * rand(9) .- 1);
        cost_value = loss_function(x_testing, 0.001, 4, 1)[1];
        if( cost_value < best_cost )
            x_best = x_testing;
            best_cost = cost_value
        end
    end
    
    return x_best;
end

loop_cost (generic function with 1 method)

In [26]:
x_init = loop_cost()

9-element Vector{Float64}:
 -0.7573190799867995
  0.2989379700917085
 -0.21023118696713472
 -0.6578006856025655
 -0.05407835057654342
  0.040912187939805555
 -0.5206239934863017
  0.10313957291336306
 -0.006717457615087241

In [27]:
a = 10e-4;
s = 0.5  ;
ϵ = 1e-10 ;
M = 1000 ;
x_final = gradient_descent(a, s, ϵ, M, x_init, 0.001, 4, 1, false);

In [28]:
x_final

9-element Vector{Float64}:
 -0.7573190799867995
  0.2989379700917085
 -0.21023118696713472
 -0.6578006856025655
 -0.05407835057654342
  0.040912187939805555
 -0.5206239934863017
  0.10313957291336306
 -0.006717457615085395

In [37]:
# 0.000741018428845503
loss_function(x_final, 0.001, 4, 1)[1]

0.000741018428845503

In [38]:
# Primeiro preciso ler os dados do problema:
global u_1_teste, u_2_teste, w_teste;
u_1_teste = Float64[];
u_2_teste = Float64[];
w_teste  = Float64[];

filepath = "dado_teste";
file     = open(filepath);
for line in eachline(file)
    row = parse.( Float64, split(line) );
    push!( u_1_teste, row[1] );
    push!( u_2_teste, row[2] );
    push!( w_teste , row[3] );
end

In [30]:
function loss_function_teste( x::Vector{Float64}, λ::Float64, k1::Int64, k2::Int64 )
    func = 0;
    grad = zeros(9);
    N = length(u_1_teste);

    for i in 1:N
        u = [u_1_teste[i]; u_2_teste[i]];
        func_aux, grad_aux = f(x, u, k1, k2);
        aux = func_aux - w_teste[i];

        func = func + func_aux ^ 2;
        grad = grad + aux * grad_aux;
    end

    
    func = (1 / (2*N))*func + λ/2 * dot(x, x);
    grad = (1/N)*grad + λ*x;
    
    return func, grad
end

loss_function_teste (generic function with 1 method)

In [31]:
soma_diffTeste = 0;
soma_wTeste = 0;
previsao = zeros(300)
for i in 1:length(w_teste)
    u_teste = [u_1_teste[i]; u_2_teste[i]];
    previsao[i] = f(x_final, u_teste, 4, 1)[1] 
    soma_diffTeste += (f(x_final, u_teste, 4, 1)[1] - w[i])^2
    soma_wTeste += w[i] * w[i];
end

In [32]:
println(soma_diffTeste/(2*300))
boa = soma_diffTeste / soma_wTeste

422.40309251544545


1.0004883764674926

In [40]:
loss_function_teste(x_final, 0.001, 4, 1)[1]

0.000741018428845503