In [1]:
function cross_entropy_error(y::Vector, t::Vector)
    δ = 1e-7
    return -sum(t .* log.(y .+ δ))
end

function cross_entropy_error(y::Matrix, t::Matrix)
    δ = 1e-7
    batch_size = size(y)[2]
    return -sum(t .* log.(y .+ δ)) / batch_size
end

function cross_entropy_error(y::Matrix, t::Vector)
    δ = 1e-7
    batch_size = size(y)[2]
    return -sum(log.(y[[CartesianIndex(i1, i2) for (i1, i2)=zip(t.+1, 1:batch_size)]] .+ δ)) / batch_size
end

cross_entropy_error (generic function with 3 methods)

In [2]:
function softmax(a)
    c = maximum(a)
    exp_a = exp.(a .- c)
    sum_exp_a = sum(exp_a)
    y = exp_a ./ sum_exp_a
    
    return y
end

function softmax(A::AbstractMatrix)
    mapslices(softmax, A, 1)
end

softmax (generic function with 2 methods)

In [3]:
function numerical_gradient(f, x::AbstractArray)
    h = 1e-4 # 0.0001
    grad = zeros(x)

    for idx in 1:length(x)
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / 2h
        
        x[idx] = tmp_val # 値を元に戻す
    end
    return grad
end

numerical_gradient (generic function with 1 method)

In [4]:
function_2(x) = x[1]^2 + x[2]^2

function_2 (generic function with 1 method)

In [5]:
numerical_gradient(function_2, [3.0, 4.0])

2-element Array{Float64,1}:
 6.0
 8.0

In [6]:
numerical_gradient(function_2, [0.0, 2.0])

2-element Array{Float64,1}:
 0.0
 4.0

In [7]:
numerical_gradient(function_2, [3.0, 0.0])

2-element Array{Float64,1}:
 6.0
 0.0

In [8]:
function gradient_descent(f, init_x; lr=0.01, step_num=100)
    x = init_x
    
    for i in 1:step_num
        grad = numerical_gradient(f, x)
        x .-= lr .* grad
    end
    
    return x
end

gradient_descent (generic function with 1 method)

In [9]:
init_x = [-3.0, 4.0]
gradient_descent(function_2, init_x; lr=0.1, step_num=100)

2-element Array{Float64,1}:
 -6.11111e-10
  8.14814e-10

In [10]:
init_x = [-3.0, 4.0]
gradient_descent(function_2, init_x; lr=10.0, step_num=100)

2-element Array{Float64,1}:
 -2.58984e13
 -1.29525e12

In [11]:
init_x = [-3.0, 4.0]
gradient_descent(function_2, init_x; lr=1e-10, step_num=100)

2-element Array{Float64,1}:
 -3.0
  4.0

In [16]:
type SimpleNet
    W::AbstractMatrix
    (::Type{SimpleNet})() = new(randn(Float32, (3, 2)))
end

In [17]:
function predict(self::SimpleNet, x::AbstractVector)
    return net.W * x
end

predict (generic function with 1 method)

In [18]:
function loss(self::SimpleNet, x::AbstractVector, t::AbstractVector)
    z = predict(self, x)
    y = softmax(z)
    loss = cross_entropy_error(y, t)
    
    return loss
end

loss (generic function with 1 method)

In [19]:
net = SimpleNet()

SimpleNet(Float32[0.579896 -1.18736; 1.87794 -0.681736; -1.20426 -0.878191])

In [20]:
x = Float32[0.6, 0.9]

2-element Array{Float32,1}:
 0.6
 0.9

In [21]:
p = predict(net, x)

3-element Array{Float32,1}:
 -0.720689
  0.513201
 -1.51293 

In [22]:
t = Float32[0, 1, 0]
loss(net, x, t)

0.3527690704488135

In [23]:
f(W) = loss(net, x, t)
dW = numerical_gradient(f, net.W)

3×2 Array{Float32,2}:
  0.122561    0.184054 
 -0.178117   -0.268023 
  0.0551314   0.0835453