In [1]:
using Knet

include("../src/data.jl")
include("../src/model.jl")

train! (generic function with 1 method)

In [2]:
datadir = "../data/enwik8"
jld2dir = "../jld2/enwik8.jld2"

if !isfile(jld2dir)
    BATCHSIZE = 64
    println("Reading data from directory: $datadir")
    println("Setting batch size to $BATCHSIZE")
    vocab = Vocab("$datadir/train.txt")
    trainfile = TextReader("$datadir/train.txt", vocab)
    validfile = TextReader("$datadir/valid.txt", vocab)
    testfile = TextReader("$datadir/test.txt", vocab)
    dtrn = TextData(trainfile, batchsize=BATCHSIZE)
    ddev = TextData(validfile, batchsize=BATCHSIZE)
    dtst = TextData(testfile, batchsize=BATCHSIZE)
    println("Saving data from $jld2dir")
    Knet.save(jld2dir, "dtrn", dtrn, "dtst", dtst, "ddev", ddev)
else 
    println("Loading data from $jld2dir")
    (dtrn, dtst, ddev) = Knet.load(jld2dir, "dtrn", "dtst", "ddev")
    vocab = dtrn.src.vocab
end

Loading data from ../jld2/enwik8.jld2


64

In [3]:
@info "Initializing and Training Language Model"
epochs, em_size, hidden_size, layers = 12, 1024, 1024, 4
println("embedding size: ", em_size)
println("hidden size: ", hidden_size)
println("layers: ", layers)

println("Collecting training data...")
println("epochs: ", epochs)
ctrn = collect(dtrn)
trnx10 = collect(flatten(shuffle!(ctrn) for i in 1:epochs))
trnmini = ctrn[1:20]
dev = collect(ddev)

model = SimpleLSTMModel(em_size, hidden_size, vocab; layers=layers, dropout=0.2)

┌ Info: Initializing and Training Language Model
└ @ Main In[3]:1


embedding size: 1024
hidden size: 1024
layers: 4
Collecting training data...
epochs: 12


SimpleLSTMModel(Embed(P(KnetArray{Float32,2}(1024,206))), LSTM(input=1024,hidden=1024,layers=4,dropout=0.2), Linear(P(KnetArray{Float32,2}(206,1024))), 0.2, Vocab(Dict("54" => 67,"101" => 4,"41" => 52,"65" => 38,"168" => 126,"159" => 175,"228" => 183,"190" => 117,"227" => 96,"88" => 104…), ["<s>", "<unk>", "32", "101", "116", "97", "105", "111", "110", "114"  …  "210", "239", "211", "198", "212", "240", "205", "220", "222", "200"], 2, 1, split))

In [None]:
model = train!(model, trnx10, dev, trnmini)


┣                    ┫ [0.01%, 1/16488, 00:49/226:25:51, 49.44s/i] (trnloss = (5.1319246f0,), trnppl = (169.34273f0,), trnbpc = (7.403802212779256,), devloss = 5.1315694f0, devppl = 169.28258f0, devbpc = 7.403289704479685)
┣                    ┫ [0.11%, 18/16488, 02:03/31:11:03, 4.30s/i] (trnloss = (3.5610683f0,), trnppl = (35.200783f0,), trnbpc = (5.13753557153007,), devloss = 3.5333343f0, devppl = 34.237934f0, devbpc = 5.097523807806837)
┣                    ┫ [0.21%, 35/16488, 03:16/25:41:29, 4.34s/i] (trnloss = (3.5498776f0,), trnppl = (34.80906f0,), trnbpc = (5.121390872162975,), devloss = 3.5233138f0, devppl = 33.89657f0, devbpc = 5.083067290140612)
┣                    ┫ [0.32%, 52/16488, 04:31/23:52:04, 4.39s/i] (trnloss = (3.547924f0,), trnppl = (34.741123f0,), trnbpc = (5.118572420480635,), devloss = 3.518152f0, devppl = 33.722054f0, devbpc = 5.075620441358252)
┣                    ┫ [0.42%, 70/16488, 05:45/22:33:34, 4.10s/i] (trnloss = (3.544971f0,), trnppl = (34.63868f0,),

┣▋                   ┫ [3.74%, 617/16488, 45:01/20:02:54, 4.38s/i] (trnloss = (3.1912284f0,), trnppl = (24.318281f0,), trnbpc = (4.603969372221957,), devloss = 3.177541f0, devppl = 23.987696f0, devbpc = 4.58422266821519)
┣▊                   ┫ [3.85%, 634/16488, 46:15/20:02:47, 4.36s/i] (trnloss = (3.1797414f0,), trnppl = (24.040535f0,), trnbpc = (4.587397123984816,), devloss = 3.1677105f0, devppl = 23.75304f0, devbpc = 4.57004029089444)
┣▊                   ┫ [3.95%, 651/16488, 47:28/20:02:19, 4.31s/i] (trnloss = (3.2154942f0,), trnppl = (24.915602f0,), trnbpc = (4.6389774727009865,), devloss = 3.202185f0, devppl = 24.58619f0, devbpc = 4.6197762976627565)
┣▊                   ┫ [4.05%, 668/16488, 48:42/20:02:13, 4.36s/i] (trnloss = (3.1564322f0,), trnppl = (23.486649f0,), trnbpc = (4.55376901229631,), devloss = 3.1461482f0, devppl = 23.246353f0, devbpc = 4.538932412971676)
┣▊                   ┫ [4.15%, 685/16488, 49:56/20:01:59, 4.34s/i] (trnloss = (3.1376014f0,), trnppl = (23.048515

┣█▍                  ┫ [7.48%, 1233/16488, 01:29:15/19:53:26, 4.32s/i] (trnloss = (2.669386f0,), trnppl = (14.431105f0,), trnbpc = (3.851109814625184,), devloss = 2.679348f0, devppl = 14.575586f0, devbpc = 3.865482060792487)
┣█▌                  ┫ [7.58%, 1250/16488, 01:30:29/19:53:28, 4.36s/i] (trnloss = (2.648749f0,), trnppl = (14.136345f0,), trnbpc = (3.8213372100037195,), devloss = 2.659688f0, devppl = 14.291829f0, devbpc = 3.8371186820121927)
┣█▌                  ┫ [7.68%, 1267/16488, 01:31:42/19:53:15, 4.28s/i] (trnloss = (2.635405f0,), trnppl = (13.948961f0,), trnbpc = (3.8020858160314384,), devloss = 2.6458824f0, devppl = 14.095878f0, devbpc = 3.817201371215771)
┣█▌                  ┫ [7.78%, 1283/16488, 01:32:54/19:53:58, 4.55s/i] (trnloss = (2.6332917f0,), trnppl = (13.919514f0,), trnbpc = (3.7990369075969426,), devloss = 2.644843f0, devppl = 14.081236f0, devbpc = 3.815702026465549)
┣█▌                  ┫ [7.89%, 1301/16488, 01:34:09/19:53:13, 4.15s/i] (trnloss = (2.612478f0,

┣██▏                 ┫ [11.08%, 1827/16488, 02:12:14/19:53:18, 4.13s/i] (trnloss = (2.3090749f0,), trnppl = (10.065109f0,), trnbpc = (3.331290876531138,), devloss = 2.3248134f0, devppl = 10.2247715f0, devbpc = 3.353996714028649)
┣██▏                 ┫ [11.18%, 1844/16488, 02:13:27/19:53:12, 4.30s/i] (trnloss = (2.292919f0,), trnppl = (9.903804f0,), trnbpc = (3.307982755790305,), devloss = 2.3085463f0, devppl = 10.05979f0, devbpc = 3.330528305457212)
┣██▎                 ┫ [11.29%, 1861/16488, 02:14:40/19:53:03, 4.28s/i] (trnloss = (2.2842479f0,), trnppl = (9.818298f0,), trnbpc = (3.2954730817318456,), devloss = 2.3012984f0, devppl = 9.987141f0, devbpc = 3.3200717602847534)
┣██▎                 ┫ [11.38%, 1877/16488, 02:15:52/19:53:33, 4.56s/i] (trnloss = (2.280037f0,), trnppl = (9.777041f0,), trnbpc = (3.2893979665727677,), devloss = 2.2971213f0, devppl = 9.945511f0, devbpc = 3.3140454881985204)
┣██▎                 ┫ [11.49%, 1894/16488, 02:17:06/19:53:30, 4.32s/i] (trnloss = (3.33328

┣██▉                 ┫ [14.90%, 2456/16488, 02:57:44/19:53:11, 4.09s/i] (trnloss = (2.2261589f0,), trnppl = (9.264213f0,), trnbpc = (3.2116683437235114,), devloss = 2.243287f0, devppl = 9.424259f0, devbpc = 3.2363791549647813)
┣██▉                 ┫ [15.00%, 2473/16488, 02:58:58/19:53:14, 4.37s/i] (trnloss = (2.210404f0,), trnppl = (9.119399f0,), trnbpc = (3.1889387726201806,), devloss = 2.2276196f0, devppl = 9.277756f0, devbpc = 3.213775819127184)
┣███                 ┫ [15.10%, 2490/16488, 03:00:12/19:53:11, 4.32s/i] (trnloss = (2.2091758f0,), trnppl = (9.108207f0,), trnbpc = (3.187167007350992,), devloss = 2.2250526f0, devppl = 9.253969f0, devbpc = 3.2100723447235042)
┣███                 ┫ [15.21%, 2508/16488, 03:01:26/19:52:44, 4.11s/i] (trnloss = (2.2000027f0,), trnppl = (9.025038f0,), trnbpc = (3.1739329423670988,), devloss = 2.2195256f0, devppl = 9.202964f0, devbpc = 3.202098541098901)
┣███                 ┫ [15.32%, 2526/16488, 03:02:40/19:52:17, 4.11s/i] (trnloss = (2.1899981

┣███▋                ┫ [18.60%, 3066/16488, 03:41:49/19:52:50, 4.53s/i] (trnloss = (1.9685208f0,), trnppl = (7.160077f0,), trnbpc = (2.8399751391125374,), devloss = 1.9750322f0, devppl = 7.206852f0, devbpc = 2.8493691754679658)
┣███▋                ┫ [18.69%, 3082/16488, 03:43:02/19:53:08, 4.55s/i] (trnloss = (1.9613196f0,), trnppl = (7.108701f0,), trnbpc = (2.829586011139217,), devloss = 1.9686288f0, devppl = 7.1608505f0, devbpc = 2.840130955394219)
┣███▊                ┫ [18.80%, 3099/16488, 03:44:16/19:53:11, 4.37s/i] (trnloss = (1.9516195f0,), trnppl = (7.04008f0,), trnbpc = (2.8155917828385104,), devloss = 1.9580419f0, devppl = 7.0854397f0, devbpc = 2.8248573481537442)
┣███▊                ┫ [18.90%, 3117/16488, 03:45:31/19:52:52, 4.15s/i] (trnloss = (1.9429457f0,), trnppl = (6.9792795f0,), trnbpc = (2.8030781531790816,), devloss = 1.948757f0, devppl = 7.0199566f0, devbpc = 2.8114621354259928)
┣███▊                ┫ [19.01%, 3134/16488, 03:46:44/19:52:49, 4.30s/i] (trnloss = (1.93

┣████▍               ┫ [22.20%, 3660/16488, 04:24:46/19:52:46, 4.28s/i] (trnloss = (1.7494001f0,), trnppl = (5.751152f0,), trnbpc = (2.5238509048565443,), devloss = 1.75054f0, devppl = 5.7577114f0, devbpc = 2.5254954029640944)
┣████▍               ┫ [22.30%, 3676/16488, 04:25:59/19:53:02, 4.55s/i] (trnloss = (1.7438271f0,), trnppl = (5.7191896f0,), trnbpc = (2.515810715928708,), devloss = 1.7431867f0, devppl = 5.7155285f0, devbpc = 2.514886825128273)
┣████▍               ┫ [22.40%, 3694/16488, 04:27:14/19:52:46, 4.15s/i] (trnloss = (1.7362455f0,), trnppl = (5.675993f0,), trnbpc = (2.504872791316484,), devloss = 1.7379022f0, devppl = 5.685404f0, devbpc = 2.507262834215525)
┣████▌               ┫ [22.51%, 3712/16488, 04:28:28/19:52:29, 4.13s/i] (trnloss = (1.7280636f0,), trnppl = (5.6297417f0,), trnbpc = (2.4930687620745156,), devloss = 1.7305932f0, devppl = 5.644001f0, devbpc = 2.496718233925824)
┣████▌               ┫ [22.62%, 3729/16488, 04:29:42/19:52:29, 4.33s/i] (trnloss = (1.72671

┣█████▏              ┫ [25.91%, 4272/16488, 05:08:56/19:52:21, 4.27s/i] (trnloss = (1.5813792f0,), trnppl = (4.861656f0,), trnbpc = (2.281447893806148,), devloss = 1.5858155f0, devppl = 4.883272f0, devbpc = 2.287848228158008)
┣█████▏              ┫ [26.01%, 4289/16488, 05:10:10/19:52:22, 4.35s/i] (trnloss = (1.5819095f0,), trnppl = (4.8642354f0,), trnbpc = (2.282213044619837,), devloss = 1.5827683f0, devppl = 4.8684144f0, devbpc = 2.2834520076366527)
┣█████▏              ┫ [26.12%, 4306/16488, 05:11:23/19:52:20, 4.31s/i] (trnloss = (1.5792463f0,), trnppl = (4.851298f0,), trnbpc = (2.278370780216911,), devloss = 1.5788472f0, devppl = 4.849362f0, devbpc = 2.277794982301822)
┣█████▏              ┫ [26.22%, 4323/16488, 05:12:38/19:52:22, 4.36s/i] (trnloss = (1.5739677f0,), trnppl = (4.8257575f0,), trnbpc = (2.2707553884367058,), devloss = 1.5746027f0, devppl = 4.828823f0, devbpc = 2.271671540017852)
┣█████▎              ┫ [26.32%, 4340/16488, 05:13:51/19:52:22, 4.35s/i] (trnloss = (1.56994

┣█████▉              ┫ [29.66%, 4890/16488, 05:53:06/19:50:35, 4.25s/i] (trnloss = (1.4732442f0,), trnppl = (4.363368f0,), trnbpc = (2.125442087243193,), devloss = 1.4781263f0, devppl = 4.384722f0, devbpc = 2.1324854647266283)
┣█████▉              ┫ [29.77%, 4908/16488, 05:54:20/19:50:23, 4.13s/i] (trnloss = (1.4674785f0,), trnppl = (4.3382826f0,), trnbpc = (2.1171239743515624,), devloss = 1.4723085f0, devppl = 4.3592873f0, devbpc = 2.1240921954165706)
┣█████▉              ┫ [29.86%, 4924/16488, 05:55:33/19:50:35, 4.55s/i] (trnloss = (1.46616f0,), trnppl = (4.3325663f0,), trnbpc = (2.1152218462330197,), devloss = 1.4709773f0, devppl = 4.353488f0, devbpc = 2.1221716651543856)
┣█████▉              ┫ [29.97%, 4941/16488, 05:56:46/19:50:33, 4.30s/i] (trnloss = (1.4631684f0,), trnppl = (4.319624f0,), trnbpc = (2.110905769626967,), devloss = 1.4695451f0, devppl = 4.347257f0, devbpc = 2.12010546558692)
┣██████              ┫ [30.07%, 4958/16488, 05:58:00/19:50:32, 4.32s/i] (trnloss = (1.46007

┣██████▋             ┫ [33.36%, 5501/16488, 06:37:18/19:50:48, 4.32s/i] (trnloss = (1.3857664f0,), trnppl = (3.9978886f0,), trnbpc = (1.9992382943349993,), devloss = 1.396789f0, devppl = 4.0421996f0, devbpc = 2.015140498164378)
┣██████▋             ┫ [33.47%, 5518/16488, 06:38:31/19:50:46, 4.30s/i] (trnloss = (1.383032f0,), trnppl = (3.9869716f0,), trnbpc = (1.995293356289508,), devloss = 1.3935599f0, devppl = 4.029168f0, devbpc = 2.010482004100457)
┣██████▋             ┫ [33.57%, 5535/16488, 06:39:45/19:50:47, 4.36s/i] (trnloss = (1.3811171f0,), trnppl = (3.9793446f0,), trnbpc = (1.992530798968699,), devloss = 1.3916662f0, devppl = 4.021545f0, devbpc = 2.0077498877088504)
┣██████▋             ┫ [33.67%, 5552/16488, 06:40:58/19:50:45, 4.29s/i] (trnloss = (1.3778826f0,), trnppl = (3.966494f0,), trnbpc = (1.9878643937028384,), devloss = 1.3891522f0, devppl = 4.0114474f0, devbpc = 2.004122945584805)
┣██████▊             ┫ [33.78%, 5569/16488, 06:42:12/19:50:47, 4.38s/i] (trnloss = (1.3783

┣███████▍            ┫ [37.01%, 6102/16488, 07:20:19/19:49:45, 4.29s/i] (trnloss = (1.3263782f0,), trnppl = (3.767374f0,), trnbpc = (1.913559289397562,), devloss = 1.3398496f0, devppl = 3.818469f0, devbpc = 1.9329943608409643)
┣███████▍            ┫ [37.12%, 6120/16488, 07:21:33/19:49:35, 4.13s/i] (trnloss = (1.323769f0,), trnppl = (3.757557f0,), trnbpc = (1.9097949331354767,), devloss = 1.3364981f0, devppl = 3.8056931f0, devbpc = 1.9281592405946073)
┣███████▍            ┫ [37.23%, 6138/16488, 07:22:47/19:49:25, 4.13s/i] (trnloss = (1.3238728f0,), trnppl = (3.757947f0,), trnbpc = (1.9099447300243781,), devloss = 1.3369112f0, devppl = 3.8072655f0, devbpc = 1.928755160479847)
┣███████▍            ┫ [37.33%, 6155/16488, 07:24:01/19:49:26, 4.34s/i] (trnloss = (1.3224992f0,), trnppl = (3.7527885f0,), trnbpc = (1.907962973938486,), devloss = 1.3355863f0, devppl = 3.8022246f0, devbpc = 1.9268437452981577)
┣███████▍            ┫ [37.43%, 6172/16488, 07:25:15/19:49:27, 4.36s/i] (trnloss = (1.32

┣████████▏           ┫ [40.65%, 6703/16488, 08:03:19/19:48:51, 4.30s/i] (trnloss = (1.2802805f0,), trnppl = (3.5976486f0,), trnbpc = (1.8470542862395172,), devloss = 1.2938073f0, devppl = 3.6466439f0, devbpc = 1.866569329615571)
┣████████▏           ┫ [40.75%, 6719/16488, 08:04:31/19:48:59, 4.53s/i] (trnloss = (1.2772784f0,), trnppl = (3.5868645f0,), trnbpc = (1.8427232471428392,), devloss = 1.293134f0, devppl = 3.6441894f0, devbpc = 1.8655979716034978)
┣████████▏           ┫ [40.85%, 6736/16488, 08:05:45/19:48:59, 4.32s/i] (trnloss = (1.2773902f0,), trnppl = (3.5872655f0,), trnbpc = (1.8428845668693483,), devloss = 1.2928644f0, devppl = 3.6432073f0, devbpc = 1.865209118829897)
┣████████▏           ┫ [40.96%, 6753/16488, 08:06:57/19:48:56, 4.27s/i] (trnloss = (1.2757728f0,), trnppl = (3.581468f0,), trnbpc = (1.8405511062624418,), devloss = 1.2904092f0, devppl = 3.6342735f0, devbpc = 1.8616669641527266)
┣████████▏           ┫ [41.06%, 6770/16488, 08:08:11/19:48:57, 4.35s/i] (trnloss = (

┣████████▊           ┫ [44.27%, 7299/16488, 08:46:12/19:48:39, 4.27s/i] (trnloss = (1.2409623f0,), trnppl = (3.4589403f0,), trnbpc = (1.7903301084186913,), devloss = 1.260585f0, devppl = 3.5274844f0, devbpc = 1.8186396566292657)
┣████████▊           ┫ [44.37%, 7316/16488, 08:47:25/19:48:38, 4.30s/i] (trnloss = (1.2388818f0,), trnppl = (3.4517517f0,), trnbpc = (1.7873286671958335,), devloss = 1.2578939f0, devppl = 3.5180044f0, devbpc = 1.8147573202686886)
┣████████▉           ┫ [44.47%, 7333/16488, 08:48:39/19:48:40, 4.37s/i] (trnloss = (1.2369509f0,), trnppl = (3.445093f0,), trnbpc = (1.7845428922171578,), devloss = 1.2559917f0, devppl = 3.5113187f0, devbpc = 1.8120129931088709)
┣████████▉           ┫ [44.58%, 7351/16488, 08:49:54/19:48:33, 4.15s/i] (trnloss = (1.2376323f0,), trnppl = (3.447441f0,), trnbpc = (1.7855259450494896,), devloss = 1.2575423f0, devppl = 3.5167675f0, devbpc = 1.8142499714486433)
┣████████▉           ┫ [44.69%, 7368/16488, 08:51:08/19:48:34, 4.36s/i] (trnloss = 

┣█████████▌          ┫ [47.88%, 7895/16488, 09:29:16/19:48:51, 4.36s/i] (trnloss = (1.2064091f0,), trnppl = (3.3414643f0,), trnbpc = (1.7404804211181537,), devloss = 1.2295617f0, devppl = 3.4197302f0, devbpc = 1.7738825476034266)
┣█████████▌          ┫ [47.99%, 7912/16488, 09:30:29/19:48:51, 4.34s/i] (trnloss = (1.2041615f0,), trnppl = (3.3339624f0,), trnbpc = (1.7372378602187863,), devloss = 1.2270541f0, devppl = 3.4111657f0, devbpc = 1.7702648925425275)
┣█████████▌          ┫ [48.09%, 7929/16488, 09:31:44/19:48:53, 4.38s/i] (trnloss = (1.2051309f0,), trnppl = (3.3371959f0,), trnbpc = (1.7386364231356026,), devloss = 1.2284862f0, devppl = 3.4160542f0, devbpc = 1.7723309201273423)
┣█████████▋          ┫ [48.19%, 7946/16488, 09:32:57/19:48:53, 4.32s/i] (trnloss = (1.2050037f0,), trnppl = (3.3367715f0,), trnbpc = (1.738452917647132,), devloss = 1.2282251f0, devppl = 3.4151626f0, devbpc = 1.7719542781219528)
┣█████████▋          ┫ [48.30%, 7963/16488, 09:34:12/19:48:54, 4.36s/i] (trnloss 

┣██████████▎         ┫ [51.52%, 8494/16488, 10:12:09/19:48:17, 4.36s/i] (trnloss = (1.181401f0,), trnppl = (3.258937f0,), trnbpc = (1.704401384672201,), devloss = 1.2069365f0, devppl = 3.343227f0, devbpc = 1.7412412723655708)
┣██████████▎         ┫ [51.62%, 8511/16488, 10:13:23/19:48:18, 4.35s/i] (trnloss = (1.1793242f0,), trnppl = (3.2521753f0,), trnbpc = (1.7014051029288693,), devloss = 1.2043457f0, devppl = 3.3345766f0, devbpc = 1.7375035734143693)
┣██████████▎         ┫ [51.72%, 8528/16488, 10:14:37/19:48:18, 4.35s/i] (trnloss = (1.1778789f0,), trnppl = (3.2474785f0,), trnbpc = (1.6993199852698087,), devloss = 1.2038643f0, devppl = 3.3329718f0, devbpc = 1.7368091074701852)
┣██████████▎         ┫ [51.83%, 8545/16488, 10:15:51/19:48:18, 4.32s/i] (trnloss = (1.177438f0,), trnppl = (3.2460473f0,), trnbpc = (1.6986839934269178,), devloss = 1.2037907f0, devppl = 3.3327262f0, devbpc = 1.736702822191952)
┣██████████▍         ┫ [51.93%, 8562/16488, 10:17:04/19:48:18, 4.31s/i] (trnloss = (1.

┣███████████         ┫ [55.15%, 9093/16488, 10:54:57/19:47:36, 4.10s/i] (trnloss = (1.1529763f0,), trnppl = (3.1676066f0,), trnbpc = (1.6633931534698696,), devloss = 1.1845189f0, devppl = 3.2691138f0, devbpc = 1.7088995909055178)
┣███████████         ┫ [55.25%, 9110/16488, 10:56:10/19:47:35, 4.29s/i] (trnloss = (1.1520771f0,), trnppl = (3.1647596f0,), trnbpc = (1.6620958883344115,), devloss = 1.1845136f0, devppl = 3.2690964f0, devbpc = 1.708891851686229)
┣███████████         ┫ [55.36%, 9127/16488, 10:57:24/19:47:36, 4.33s/i] (trnloss = (1.1518539f0,), trnppl = (3.1640534f0,), trnbpc = (1.6617739368119961,), devloss = 1.1841909f0, devppl = 3.2680416f0, devbpc = 1.7084262946503435)
┣███████████         ┫ [55.45%, 9143/16488, 10:58:37/19:47:42, 4.55s/i] (trnloss = (1.1505226f0,), trnppl = (3.159844f0,), trnbpc = (1.6598532345671604,), devloss = 1.1831709f0, devppl = 3.26471f0, devbpc = 1.7069548110895612)
┣███████████         ┫ [55.56%, 9160/16488, 10:59:51/19:47:44, 4.37s/i] (trnloss = (

┣███████████▊        ┫ [58.76%, 9688/16488, 11:37:56/19:47:48, 4.31s/i] (trnloss = (1.1372392f0,), trnppl = (3.1181479f0,), trnbpc = (1.6406893797641684,), devloss = 1.1676258f0, devppl = 3.2143521f0, devbpc = 1.6845279294517492)
┣███████████▊        ┫ [58.86%, 9705/16488, 11:39:08/19:47:47, 4.28s/i] (trnloss = (1.1380527f0,), trnppl = (3.1206856f0,), trnbpc = (1.6418629893736563,), devloss = 1.1688148f0, devppl = 3.2181761f0, devbpc = 1.686243284411455)
┣███████████▊        ┫ [58.97%, 9723/16488, 11:40:22/19:47:40, 4.10s/i] (trnloss = (1.1372324f0,), trnppl = (3.1181269f0,), trnbpc = (1.6406795767530693,), devloss = 1.1685576f0, devppl = 3.2173488f0, devbpc = 1.6858723178335437)
┣███████████▊        ┫ [59.08%, 9741/16488, 11:41:37/19:47:34, 4.13s/i] (trnloss = (1.1372364f0,), trnppl = (3.118139f0,), trnbpc = (1.6406852521805477,), devloss = 1.1679814f0, devppl = 3.215495f0, devbpc = 1.685040953699273)
┣███████████▊        ┫ [59.18%, 9758/16488, 11:42:50/19:47:34, 4.31s/i] (trnloss = (

┣████████████▍       ┫ [62.38%, 10285/16488, 12:20:59/19:47:53, 4.37s/i] (trnloss = (1.1183882f0,), trnppl = (3.0599182f0,), trnbpc = (1.613493075252629,), devloss = 1.1536171f0, devppl = 3.1696372f0, devbpc = 1.664317732200908)
┣████████████▍       ┫ [62.48%, 10302/16488, 12:22:14/19:47:55, 4.38s/i] (trnloss = (1.1175367f0,), trnppl = (3.0573137f0,), trnbpc = (1.612264603177516,), devloss = 1.152605f0, devppl = 3.166431f0, devbpc = 1.6628575994950827)
┣████████████▌       ┫ [62.58%, 10318/16488, 12:23:27/19:48:01, 4.56s/i] (trnloss = (1.11875f0,), trnppl = (3.0610256f0,), trnbpc = (1.6140150425979978,), devloss = 1.1532567f0, devppl = 3.1684947f0, devbpc = 1.6637976566646988)
┣████████████▌       ┫ [62.69%, 10336/16488, 12:24:41/19:47:55, 4.11s/i] (trnloss = (1.1194422f0,), trnppl = (3.0631452f0,), trnbpc = (1.615013745851558,), devloss = 1.1537734f0, devppl = 3.1701326f0, devbpc = 1.6645432014561892)
┣████████████▌       ┫ [62.80%, 10354/16488, 12:25:55/19:47:49, 4.11s/i] (trnloss = 

┣█████████████▏      ┫ [66.11%, 10901/16488, 13:05:11/19:47:36, 4.38s/i] (trnloss = (1.1028067f0,), trnppl = (3.0126095f0,), trnbpc = (1.591013738906304,), devloss = 1.1410197f0, devppl = 3.1299584f0, devbpc = 1.6461434655709808)
┣█████████████▏      ┫ [66.22%, 10918/16488, 13:06:25/19:47:37, 4.35s/i] (trnloss = (1.1040119f0,), trnppl = (3.0162425f0,), trnbpc = (1.592752483506527,), devloss = 1.141907f0, devppl = 3.1327367f0, devbpc = 1.6474235324413526)
┣█████████████▎      ┫ [66.31%, 10934/16488, 13:07:38/19:47:43, 4.57s/i] (trnloss = (1.1024f0,), trnppl = (3.0113845f0,), trnbpc = (1.59042693410156,), devloss = 1.1398882f0, devppl = 3.1264186f0, devbpc = 1.644511006248991)
┣█████████████▎      ┫ [66.42%, 10951/16488, 13:08:52/19:47:44, 4.38s/i] (trnloss = (1.1039406f0,), trnppl = (3.0160277f0,), trnbpc = (1.592649637881311,), devloss = 1.1409651f0, devppl = 3.1297874f0, devbpc = 1.6460646975168856)
┣█████████████▎      ┫ [66.52%, 10968/16488, 13:10:05/19:47:43, 4.29s/i] (trnloss = (1

┣█████████████▉      ┫ [69.81%, 11510/16488, 13:48:19/19:46:33, 4.33s/i] (trnloss = (1.0915304f0,), trnppl = (2.9788294f0,), trnbpc = (1.5747455559958892,), devloss = 1.1315305f0, devppl = 3.100398f0, devbpc = 1.6324534745796502)
┣█████████████▉      ┫ [69.91%, 11527/16488, 13:49:33/19:46:34, 4.37s/i] (trnloss = (1.0897369f0,), trnppl = (2.9734917f0,), trnbpc = (1.5721580770136583,), devloss = 1.1301976f0, devppl = 3.0962684f0, devbpc = 1.6305305365603533)
┣██████████████      ┫ [70.01%, 11544/16488, 13:50:46/19:46:33, 4.29s/i] (trnloss = (1.0897075f0,), trnppl = (2.9734042f0,), trnbpc = (1.5721155972988952,), devloss = 1.1290736f0, devppl = 3.0927901f0, devbpc = 1.6289089121453677)
┣██████████████      ┫ [70.12%, 11561/16488, 13:51:59/19:46:34, 4.33s/i] (trnloss = (1.0881555f0,), trnppl = (2.9687932f0,), trnbpc = (1.5698765551673122,), devloss = 1.1283164f0, devppl = 3.090449f0, devbpc = 1.6278164783470872)
┣██████████████      ┫ [70.23%, 11579/16488, 13:53:14/19:46:29, 4.13s/i] (trnl

┣██████████████▋     ┫ [73.52%, 12122/16488, 14:31:20/19:45:10, 4.14s/i] (trnloss = (1.0777643f0,), trnppl = (2.9381034f0,), trnbpc = (1.554885171456903,), devloss = 1.1205178f0, devppl = 3.0664418f0, devbpc = 1.6165655453102932)
┣██████████████▋     ┫ [73.63%, 12140/16488, 14:32:34/19:45:05, 4.12s/i] (trnloss = (1.0789423f0,), trnppl = (2.9415665f0,), trnbpc = (1.5565847040127292,), devloss = 1.12141f0, devppl = 3.0691786f0, devbpc = 1.6178526634693504)
┣██████████████▋     ┫ [73.74%, 12158/16488, 14:33:48/19:45:00, 4.12s/i] (trnloss = (1.0787199f0,), trnppl = (2.9409122f0,), trnbpc = (1.556263784386219,), devloss = 1.1203297f0, devppl = 3.065865f0, devbpc = 1.6162941566872315)
┣██████████████▊     ┫ [73.85%, 12176/16488, 14:35:03/19:44:56, 4.14s/i] (trnloss = (1.0787156f0,), trnppl = (2.9408998f0,), trnbpc = (1.556257593010788,), devloss = 1.1198676f0, devppl = 3.0644484f0, devbpc = 1.6156273799498366)
┣██████████████▊     ┫ [73.96%, 12194/16488, 14:36:17/19:44:51, 4.12s/i] (trnloss 

┣███████████████▍    ┫ [77.24%, 12735/16488, 15:14:28/19:43:57, 4.14s/i] (trnloss = (1.065639f0,), trnppl = (2.9026933f0,), trnbpc = (1.537392128107043,), devloss = 1.1124071f0, devppl = 3.0416713f0, devbpc = 1.6048641897108893)
┣███████████████▍    ┫ [77.34%, 12752/16488, 15:15:41/19:43:57, 4.33s/i] (trnloss = (1.0637273f0,), trnppl = (2.8971493f0,), trnbpc = (1.5346340423351563,), devloss = 1.1119312f0, devppl = 3.040224f0, devbpc = 1.604177634968645)
┣███████████████▍    ┫ [77.44%, 12769/16488, 15:16:55/19:43:58, 4.34s/i] (trnloss = (1.0649345f0,), trnppl = (2.900649f0,), trnbpc = (1.5363757106404439,), devloss = 1.1131703f0, devppl = 3.0439935f0, devbpc = 1.6059652226417132)
┣███████████████▌    ┫ [77.55%, 12786/16488, 15:18:08/19:43:57, 4.28s/i] (trnloss = (1.0649874f0,), trnppl = (2.9008026f0,), trnbpc = (1.536452070937427,), devloss = 1.1129047f0, devppl = 3.043185f0, devbpc = 1.6055820452955907)
┣███████████████▌    ┫ [77.65%, 12803/16488, 15:19:21/19:43:58, 4.34s/i] (trnloss =

In [None]:
Knet.save("../jld2/baseline-1024-4.jld2", "model", model)

In [None]:
testloss = loss(model, dtst)
(testloss=testloss, testppl=exp.(testloss), testbpc=(testloss ./ log(2)))

In [None]:
devloss = loss(model, ddev)
(devloss=devloss, devppl=exp.(devloss), devbpc=(devloss ./ log(2)))

In [None]:
s = generate(model, start="Syrian people are", maxlength=1024)