In [75]:
using Plots
plotly()

X = [3 5; 5 1; 10 2];
y = [75; 82; 93];
w1 = randn(2,3);
w2 = randn(3,1);

mu(xs, n) = sum(xs) / n;
norm(xs, μ, min, max) = map(x -> (x - μ) / (max - min));

# simple normalize
X = X./maximum(X);
y = y./100;

function sigmoid(x)
    1 / (1 + e^(-x))
end;

function sigmoidPrime(x)
    e^(-x) / ((1 + e^(-x))^2)
end;

function g(xs)
    map(sigmoid, xs);
end;

function gPrime(xs)
    map(sigmoidPrime, xs);
end;

function forward(X, w1, w2)
    z2 = X * w1;
    a2 = g(z2);
    z3 = a2 * w2;
    yhat = g(z3);
    
    yhat, z3, z2, a2
end;

function cost(X, y, yhat)
    J = 0.5 * sum(map(x -> x^2, y - yhat));
end;

function costPrime(X, y, yhat, z3, z2, a2, lambda)
#     println("y: $(size(y-yhat)), z3: $(size(gPrime(z3))), a2: $(size(a2))");
    delta3 = (-(y - yhat) .* gPrime(z3));
#     println("delta3: $(delta3)");
    dJdW2 = (a2 * delta3) + (w2.* lambda);
#     println("dJdW2: $(dJdW2)");
#     println("delta3: $(size(delta3)), w2.': $(size(w2)), z2: $(size(z2))");
    delta2 = dot(delta3, w2.') * gPrime(z2);
    dJdW1 = *(X.', delta2) +  (w1.* lambda);
    
    dJdW1, dJdW2;
end;

function numericalGradient()

end;

# regularization term
function regTerm(lambda, w1, w2)
    (lambda/2) * (sum(w1.^2) + sum(w2.^2));
end

# recursive train function
# w1, w2 - weights
# alpha - learning rate
# lambda - regularization parameter
# history - values of cost J
# j - counter
# steps - how many iteration to make before returning the result
function train(w1, w2, alpha, lambda, history, j, steps)
    if(j == steps)
        return w1, w2, history;
    end
    
    yhat, z3, z2, a2 = forward(X, w1, w2);
    J = cost(X, y, yhat) + regTerm(lambda, w1, w2);
    dJdW1, dJdW2 = costPrime(X, y, yhat, z3, z2, a2, lambda); 
    w1 = w1 - (dJdW1 .* alpha);
    w2 = w2 - (dJdW2 .* alpha);
    append!(history, J);
    
    train(w1, w2, alpha, lambda, history, j+=1, steps);
end;

w1, w2, history = train(w1, w2, 0.3, 0.0001, zeros(0), 1, 30);

In [12]:
# plot(history, title="Cost")

In [10]:
# scatter()

$ M = \begin{bmatrix}
       \frac{5}{6} & \frac{1}{6} & 0           \\[0.3em]
       \frac{5}{6} & 0           & \frac{1}{6} \\[0.3em]
       0           & \frac{5}{6} & \frac{1}{6}
     \end{bmatrix} $

In [59]:
arr = zeros(0);
for i=1:10 append!(arr, i) end
println(arr)

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]


In [94]:
π/2 

1.5707963267948966