In [14]:
ENV["COLUMNS"]=72
using Pkg; for p in ("Knet","Plots"); haskey(Pkg.installed(),p) || Pkg.add(p); end
using Base.Iterators: flatten
using Statistics: mean
using AutoGrad
import .Iterators: cycle, Cycle, take, repeat
using Plots; default(fmt=:png,ls=:auto)
import Base: length, size, iterate, eltype, IteratorSize, IteratorEltype, haslength, @propagate_inbounds, repeat, rand, tail
import .Iterators: cycle, Cycle, take, repeat
using Knet: Knet, conv4, pool, adam!, mat, KnetArray, nll, zeroone, progress, sgd, param, param0, dropout, relu, Data, accuracy, progress!;

In [16]:
include(Knet.dir("data","mnist.jl"))
dtrn, dtst = mnistdata(batchsize=100)

#uncomment if you have a gpu
#dtrn, dtst = mnistdata(batchsize=100, xtype=Knet.KnetArray);

In [17]:
#Conv + Pool Layer
struct ConvLayer; w; b; f; p; end
(c::ConvLayer)(x) = c.f.(pool(conv4(c.w, dropout(x,c.p)) .+ c.b))
ConvLayer(w1::Int, w2::Int, cx::Int, cy::Int, f=relu;pdrop=0)= ConvLayer(param(w1,w2,cx,cy), param0(1,1,cy,1), f, pdrop);

In [4]:
#Conv Layer
struct Conv4Layer; w; b; f; end
(c::Conv4Layer)(x) = c.f.(conv4(c.w) .+ c.b)

In [5]:
#Dense Layer (Linear)
struct DenseLayer; w; b; f; p; end
(d::DenseLayer)(x) = d.f.(d.w * mat(dropout(x, d.p)) .+ d.b)
DenseLayer(i::Int,o::Int,f=relu;pdrop=0) = DenseLayer(param(o,i), param0(o), f, pdrop)

#SoftmaxLayer
struct SoftmaxLayer; w; b; end
(s::SoftmaxLayer)(x) = s.w * x .+ s.b
(s::SoftmaxLayer)(x,y) = nll(s(x),y)
(s::SoftmaxLayer)(d::Data) = mean(s(x,y) for (x,y) in d)
SoftmaxLayer(i::Int,o::Int) = SoftmaxLayer(param(o,i), param0(o))

#Chain
struct Chain
    layers
    Chain(layers...) = new(layers)
end

(c::Chain)(x) = (for l in c.layers; x = l(x); end; x)
(c::Chain)(x,y) = nll(c(x),y)
(c::Chain)(d::Data) = mean(c(x,y) for (x,y) in d)

In [11]:
function mytrain!(c::Chain, dtrn, dtst, valid=10, max_iters=500, )
    
    function pusher(c::Chain,dtrn,dtst,trnloss,tstloss)
        push!(trnloss, c(dtrn))
        push!(tstloss, c(dtst))
    end
    
    trnloss = []
    tstloss = []
    
    takeevery(n,itr) = (x for (i,x) in enumerate(itr) if i % n == 1)       
    #change the optimizer here: sgd, adam, ... @doc Knet.sgd to see other options :
    a = sgd(c, take(cycle(dtrn), max_iters+1))
    b = (pusher(c,dtrn,dtst,trnloss,tstloss) for x in takeevery(valid, a))
    progress!(b)    
    return 0:valid:max_iters, trnloss, tstloss
end
            
function tgraph(c::Chain, dtrn, dtst; valid=10, max_iters=500)
    Training_Accuracy = accuracy(c, dtrn)
    Test_Accuracy = accuracy(c, dtst)
    println("Training Accuracy: ", accuracy(c, dtrn))
    println("Test Accuracy: ", accuracy(c, dtst))
    iters, trnloss, tstloss = mytrain!(c,dtrn,dtst,valid,max_iters)
    
    println("Training Accuracy: ", accuracy(c, dtrn))
    println("Test Accuracy: ", accuracy(c, dtst))
    plot(iters, [trnloss, tstloss], labels=[:trn, :tst], xlabel="iterations", ylabel="loss")
end

tgraph (generic function with 1 method)

In [12]:
#Various models to try out
simple_model = Chain(ConvLayer(5,5,1,30),
           DenseLayer(4320,10))

lenet = Chain(ConvLayer(5,5,1,20), 
              ConvLayer(5,5,20,50),
              DenseLayer(800, 500, pdrop=0.3),
              DenseLayer(500,10,identity,pdrop=0.3))

simple_model2 = Chain(ConvLayer(3,3,1,30),
               ConvLayer(3,3,30,50),
               DenseLayer(1250,700),
               DenseLayer(700,400, pdrop=0),
               DenseLayer(400,100, pdrop=0),
               DenseLayer(100,10,pdrop=0,identity));

In [13]:
#uncomment to train the model and graph loss functions, don't even try if you don't have a gpu 
#tgraph(simple_model, dtrn, dtst, max_iters = 1000)
#tgraph(lenet, dtrn, dtst, max_iters = 1000)
#tgraph(simple_model, dtrn, dtst, max_iters = 1000)