In [10]:
# network structure rework: split into 3 different structures: network, batch_trainer and batch_tester for preallocation
# the whole run time is faster due to preallocation for evaluation batch (batch_tester)
using LinearAlgebra
using MLDatasets
using Random
using Plots
using Debugger
using CSV
using DataFrames

@inline σ(z) = 1/(1+exp(-z))        #sigmoid function
@inline σ_grad(z) = σ(z)*(1-σ(z))   #grad of sigmoid function

struct network
    num_layers::Int64
    sizearr::Array{Int64,1}
    biases::Array{Array{Float64,1},1}
    weights::Array{Array{Float64,2},1}
end
function network(sizes)
    num_layers = length(sizes)
    sizearr = sizes
    biases = [randn(y) for y in sizes[2:end]]
    weights = [randn(y, x) for (x, y) in zip(sizes[1:end-1], sizes[2:end])]
    network(num_layers, sizearr, biases, weights)
end
function (net::network)(a)
    for (w, b) in zip(net.weights, net.biases)
        a = σ.(w*a .+ b)
    end
    return a
end

struct batch_trainer
    η::Float64
    batch_size::Int64
    ∇_b::Array{Array{Float64,1},1}
    ∇_w::Array{Array{Float64,2},1}
    zs::Array{Array{Float64,2},1}
    activations::Array{Array{Float64,2},1}
    δs::Array{Array{Float64,2},1}
end
function batch_trainer(net::network, batch_size, η)
    sizes = net.sizearr
    ∇_b = [zeros(y) for y in sizes[2:end]]
    ∇_w = [zeros(y, x) for (x, y) in zip(sizes[1:end-1], sizes[2:end])]
    zs = [zeros(y, batch_size) for y in sizes[2:end]]
    activations = [zeros(y, batch_size) for y in sizes[2:end]]
    δs = [zeros(y, batch_size) for y in sizes[2:end]]
    batch_trainer(η, batch_size, ∇_b, ∇_w, zs, activations, δs)
end

struct batch_tester
    batch_size::Int64
    zs::Array{Array{Float64,2},1}
    activations::Array{Array{Float64,2},1}
    δs::Array{Array{Float64,2},1}
end
function batch_tester(net::network, batch_size)
    sizes = net.sizearr
    zs = [zeros(y, batch_size) for y in sizes[2:end]]
    activations = [zeros(y, batch_size) for y in sizes[2:end]]
    δs = [zeros(y, batch_size) for y in sizes[2:end]]
    batch_tester(batch_size, zs, activations, δs)
end

# forward pass for testing
function (tester::batch_tester)(net::network, x)
    activations = tester.activations
    zs = tester.zs
    len = length(activations)

	input = x
    for i in 1:len
        b, w, z = net.biases[i], net.weights[i], zs[i]
        mul!(z, w, input) # z = w * input
        z .+= b
        activations[i] .= σ.(z)
        input = activations[i]
    end
    return activations[end]
end




# forward and backprop for training
function (trainer::batch_trainer)(net::network, x, y)
    ∇_b = trainer.∇_b
    ∇_w = trainer.∇_w
    #println(∇_w)
	len = net.num_layers - 1
    activations = trainer.activations
    zs = trainer.zs
    δs = trainer.δs

    input = x

    for i in 1:len
        b, w, z = net.biases[i], net.weights[i], zs[i]
        mul!(z, w, input) # z = w * input
        z .+= b
        activations[i] .= σ.(z)
        input = activations[i]
    end


    δ = δs[end]
    δ .= (activations[end] .- y) .* σ_grad.(zs[end])

    # println("input1 ", length(input), "przykład ", sum(input))
    # println("δ1 ", length(δ), "przykład ", δ[1], "\n")

    sum!(∇_b[end], δ)


    #backprop, 3 layers = 1 loop (because u only need middle layer to update)
    for l in 1:len-1
        #print(len-1, "\n")
        mul!(∇_w[end-l+1], δ, activations[end-l]') # ∇_w[end-l+1] = δ * activations[end-l]'
        z = zs[end-l]
        mul!(δs[end-l], net.weights[end-l+1]', δ) # δs[end-l] = net.weights[end-l+1]' * δ
        δ = δs[end-l]
        δ .*= σ_grad.(z)
        sum!(∇_b[end-l], δ)
    end


    #println("δ2 ", length(δ), "przykład ", δ[1], "\n")

    #print(size(∇_w), "   /   ", size(∇_w[1]))
    mul!(∇_w[1], δ, x') # ∇_w[1] = δ * x'

    return nothing
end

function update_batch(net::network, trainer::batch_trainer, x, y)

    trainer(net, x, y)

    global coef = trainer.η/size(x,2) 

    for i in 1:length(trainer.∇_b)
        net.biases[i] .-= coef .* trainer.∇_b[i]
    end
    for i in 1:length(trainer.∇_w)
        net.weights[i] .-= coef .* trainer.∇_w[i]
    end
    return x, y
end

function SGDtrain(net::network, trainer::batch_trainer, traindata, epochs, tester, testdata=nothing)

    #println("len?", length(traindata[2]), "     ")

    n_test = testdata != nothing ? size(testdata[1], 2) : nothing
    n = size(traindata[1], 2)

    idx = randperm(n) # one time shuffle for performance, then only take random batch index
    # idx = 1:n
    train_x = traindata[1][:,idx]
    train_y = traindata[2][:,idx]
    test_x, test_y = testdata
	
	batch_size = trainer.batch_size
    # reorganize data in batches
    batch = [(train_x[:, k-batch_size+1 : k], train_y[:, k-batch_size+1 : k]) for k in batch_size:batch_size:n]
    #println(length(batch))

        #println("START")
    get_precision = []
    for j in 1:epochs
        idx = randperm(length(batch))
        #println(length(idx))
        for k in idx
            update_batch(net, trainer, batch[k]...)
        end

        if testdata != nothing
            true_positive = evaluate(tester(net, test_x), test_y) 
            #println(true_positive)
            #println(length(test_y))
            false_positive = true_positive - tester.batch_size
            #println("\n", true_positive, "    ", tester.batch_size,"\n")
            precision = true_positive/ length(test_y)
            push!(get_precision, precision)
            #println("Epoch ", j," with ", "precision: ",precision)
            #println(length(evaluate(tester(net, test_x))), ":::::", length(test_y))
            #println(tester(net, test_x), test_y[1:10])
        else
            #println("Epoch ", j," complete.")
        end
    end
    return get_precision
end

function evaluate(out, y)
    hits = 0
    for i = 1:size(out, 2)
        if (findmax(out[:,i])[2] - 1) == y[i]
            #println(findmax(out[:,i])[2] - 1, "lllllll", y[i])
            hits += 1
        end
    end
    hits
end

function loaddata_lib(rng = 1:60000)

    #print("czas ładowania samych danych bez przekształceń:")
    train_x, train_y = FashionMNIST.traindata(Float64, Vector(rng))
    train_x = reshape(train_x, size(train_x,1)*size(train_x,2), :) # 28 x 28 x N -> 28*28 x N
    train_y = vectorize(train_y)
    test_x, test_y = FashionMNIST.testdata(Float64)
    test_x = reshape(test_x, size(test_x,1)*size(test_x,2), :) # 28 x 28 x N -> 28*28 x N
    #println(size(train_x), size(train_y), size(test_x), size(test_y))
    #println(typeof(train_x), typeof(train_y), typeof(test_x), typeof(test_y))
    return (train_x, train_y), (test_x, test_y)
end

function loaddata_csv(rng = 1:60000)
    
    
    #train_x, train_y = FashionMNIST.traindata(Float64, Vector(rng))


    #print("czas ładowania samych danych bez przekształceń:")
    data_test = CSV.read("C:/Users/krukd/project_julia_net_scrath/fashion_mnist_test.csv", DataFrame)
    data_train = CSV.read("C:/Users/krukd/project_julia_net_scrath/fashion_mnist_train.csv", DataFrame)
    #println("/n")
    #print(typeof(data_test))
    #print(typeof(data_train))
    data_test = Matrix{Float64}(data_test)
    data_train = Matrix{Float64}(data_train)
    tr_m, tr_n = size(data_train)
    #print(tr_m, "  train  ",tr_n, "\n")
    ts_m, ts_n = size(data_test)
    #print(ts_m, "  tesst  ",ts_n, "\n")

    data_test = data_test[1:ts_m,:]' #transponowanie po to, aby każda kolumna to był przykład
    test_y = data_test[1,:]
    test_x = data_test[2:ts_n,:]
    #print(size(data_test), '\n', size(Y_test), '\n', size(X_test))
    test_x = test_x / 255.

    data_train = data_train[1:tr_m, :]'
    train_y = data_train[1, :]
    train_x = data_train[2:tr_n, :]
    train_x = train_x / 255.
    _,m_train = size(train_x)

    #train_x = reshape(train_x, size(train_x,1)*size(train_x,2), :) # 28 x 28 x N -> 28*28 x N
    train_y = vectorize(train_y)
    #test_x, test_y = FashionMNIST.testdata(Float64)
    #test_x = reshape(test_x, size(test_x,1)*size(test_x,2), :) # 28 x 28 x N -> 28*28 x N
    
    #println(size(train_x), size(train_y), size(test_x), size(test_y))
    #println(typeof(train_x), typeof(train_y), typeof(test_x), typeof(test_y))
    return (train_x, train_y), (test_x, test_y)
end

function vectorize(vec)
    N = 10
    len = length(vec)
    mtx = zeros(N, len)
    for i = 1:len
        mtx[Int64(vec[i])+1, i] = 1
    end
    return mtx
end

function main(epochs, batch_size, coefi)
    get_accuracy = []
    epochs = epochs
    batch_size = batch_size
    η = coefi
    net = network([784, 10, 10])
    #println("czas twania loaddata z biblio:")
    #println(epochs, " ", batch_size)
	trainer = batch_trainer(net, batch_size, η)
	tester = batch_tester(net, size(testdata[1],2))
    global prec = SGDtrain(net, trainer, traindata, epochs, tester, testdata)
    # @profiler SGDtrain(net, trainer, traindata, 1, tester, testdata)
end

main (generic function with 1 method)

In [2]:
traindata, testdata = loaddata_csv()

(([0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 1.0 0.0; 0.0 1.0 … 0.0 0.0]), ([0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.00392156862745098 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 8.0, 6.0, 5.0, 0.0  …  7.0, 9.0, 4.0, 8.0, 0.0, 0.0, 6.0, 8.0, 8.0, 1.0]))

In [3]:
traindata, testdata = loaddata_lib();

In [4]:
using UUIDs

In [5]:
function tester(epochs, batch_size, coefi, coef_step, tests)
    plot(titlefontsize = 12)
    for i=1:tests
        if i == 1
            coefi = coefi
        else 
            coefi = coefi + (coef_step * i)
        end
        a = @timed main(epochs, batch_size, coefi);
        plot!(1:epochs, prec, label = string(trunc(Int, round(a.time)), "sec epochs:", epochs, " batch_size:", batch_size, " alpha:", round(coef, digits = 6)), lw = 4)
        println("max prec in last 10 tests was: ", findmax(prec[end-20:end]), " alpha:", coef)
    end

    plot!(size=(1200,600))
    plot!(dpi=120)
    plot!(grid = true)

    png(string("C:/Users/krukd/project_julia_net_scrath/plots_bez_auto/", "tester_", uuid4()))
end

tester (generic function with 1 method)

In [None]:
#epochs, batch_size, coef, coef_step, tests
tester(100, 1, 1.25, 0.05, 5)

In [19]:
#epochs, batch_size, coef, coef_step, tests
tester(100, 5, 1.25, 0.4, 5)

max prec in last 10 tests was: (0.8424, 11) alpha:0.25
max prec in last 10 tests was: (0.8436, 8) alpha:0.41
max prec in last 10 tests was: (0.8315, 5) alpha:0.65
max prec in last 10 tests was: (0.8269, 10) alpha:0.97
max prec in last 10 tests was: (0.8289, 11) alpha:1.3699999999999999


In [20]:
#epochs, batch_size, coef, coef_step, tests
tester(100, 15, 1.25, 0.4, 5)

max prec in last 10 tests was: (0.8398, 9) alpha:0.08333333333333333
max prec in last 10 tests was: (0.84, 1) alpha:0.13666666666666666
max prec in last 10 tests was: (0.8448, 4) alpha:0.21666666666666667
max prec in last 10 tests was: (0.8443, 5) alpha:0.3233333333333333
max prec in last 10 tests was: (0.8399, 9) alpha:0.45666666666666667


In [21]:
#epochs, batch_size, coef, coef_step, tests
tester(100, 50, 1.25, 0.4, 5)

max prec in last 10 tests was: (0.8446, 2) alpha:0.025
max prec in last 10 tests was: (0.8454, 7) alpha:0.040999999999999995
max prec in last 10 tests was: (0.8432, 5) alpha:0.065
max prec in last 10 tests was: (0.8464, 10) alpha:0.09699999999999999
max prec in last 10 tests was: (0.8402, 3) alpha:0.13699999999999998


In [22]:
#epochs, batch_size, coef, coef_step, tests
tester(100, 500, 1.25, 0.4, 5)

max prec in last 10 tests was: (0.815, 9) alpha:0.0025
max prec in last 10 tests was: (0.7965, 10) alpha:0.0040999999999999995
max prec in last 10 tests was: (0.829, 10) alpha:0.0065
max prec in last 10 tests was: (0.8328, 9) alpha:0.009699999999999999
max prec in last 10 tests was: (0.8456, 7) alpha:0.013699999999999999


In [24]:
#epochs, batch_size, coef, coef_step, tests
tester(300, 10000, 1.25, 1, 5)

max prec in last 10 tests was: (0.6655, 11) alpha:0.000125
max prec in last 10 tests was: (0.7678, 7) alpha:0.000325
max prec in last 10 tests was: (0.794, 10) alpha:0.000625
max prec in last 10 tests was: (0.7743, 9) alpha:0.001025
max prec in last 10 tests was: (0.813, 5) alpha:0.001525


: 

In [9]:
# epochs = 11
# batch_size = 5
# coef = 1.25
# a = @timed main(epochs, batch_size, coef);
# plot(1:epochs, prec, title = string(a.time, "sec epochs:", epochs, " batch_size:", batch_size, " alpha:", coef), label = "")
# plot!(titlefontsize = 12)
# println(prec[end-10:end])

Any[7742, 8162, 8237, 8052, 8336, 8342, 8309, 8293, 8396, 8188, 8241]
