example fixes

denizyuret · Aug 20, 2018 · 78d3a96 · 78d3a96
1 parent b2f3a27
commit 78d3a96
Show file tree

Hide file tree

Showing 39 changed files with 981 additions and 704 deletions.
diff --git a/REQUIRE → .deprecated/REQUIRE b/REQUIRE → .deprecated/REQUIRE
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,7 @@ log
 *.job
 *.h5
 *.jld
+*.jld2
 test1
 test2
 *.so

diff --git a/Project.toml b/Project.toml
@@ -1,12 +1,24 @@
 name = "Knet"
 uuid = "1902f260-5fb4-5aff-8c31-6271790ab950"
+author = ["Deniz Yuret <denizyuret@gmail.com>"]
+version = "1.0.0"
 
 [deps]
 AutoGrad = "6710c13c-97f1-543f-91c5-74e8f7d95b35"
 CUDAapi = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3"
-Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
-SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[compat]
+AutoGrad = "1.0.0"
+CUDAapi = "0.5.0"
+SpecialFunctions = "0.7.0"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/data/cifar.jl b/data/cifar.jl
@@ -1,8 +1,10 @@
+using Knet
+
 "Where to download cifar from"
 cifarurl = "http://www.cs.toronto.edu/~kriz"
 
 "Where to download cifar to"
-cifardir = Pkg.dir("Knet","data","cifar")
+cifardir = Knet.dir("data", "cifar")
 
 "cifar10() => (xtrn,ytrn,xtst,ytst,labels)"
 function cifar10(;
@@ -13,7 +15,7 @@ function cifar10(;
                  lbl="batches.meta.txt",
                  )
     global _cifar10_xtrn, _cifar10_ytrn, _cifar10_xtst, _cifar10_ytst, _cifar10_lbls
-    if !isdefined(:_cifar10_xtrn)
+    if !(@isdefined _cifar10_xtrn)
         _cifar10_xtrn, _cifar10_ytrn, _cifar10_xtst, _cifar10_ytst, _cifar10_lbls = _cifar_read_tgz(tgz,dir,trn,tst,lbl)
     end
     return _cifar10_xtrn, _cifar10_ytrn, _cifar10_xtst, _cifar10_ytst, _cifar10_lbls
@@ -28,7 +30,7 @@ function cifar100(;
                   lbl="fine_label_names.txt",
                   )
     global _cifar100_xtrn, _cifar100_ytrn, _cifar100_xtst, _cifar100_ytst, _cifar100_lbls
-    if !isdefined(:_cifar100_xtrn)
+    if !(@isdefined _cifar100_xtrn)
         _cifar100_xtrn, _cifar100_ytrn, _cifar100_xtst, _cifar100_ytst, _cifar100_lbls = _cifar_read_tgz(tgz,dir,trn,tst,lbl)
     end
     return _cifar100_xtrn, _cifar100_ytrn, _cifar100_xtst, _cifar100_ytst, _cifar100_lbls
@@ -38,7 +40,7 @@ end
 cifarview(x,i)=colorview(RGB,permutedims(x[:,:,:,i],(3,2,1)))
 
 function _cifar_read_tgz(tgz,dir,trn,tst,labels)
-    info("Reading $tgz...")
+    @info("Reading $tgz...")
     if !isdir(cifardir)
         mkpath(cifardir)
     end
@@ -63,14 +65,14 @@ function _cifar_read_files(dir,files)
         x,y = _cifar_read_file(dir,file)
         push!(xs,x); push!(ys,y)
     end
-    return cat(4, xs...), vcat(ys...)
+    return cat(xs..., dims=4), vcat(ys...)
 end
 
 function _cifar_read_file(dir,file)
     a = read(joinpath(dir,file))
-    d = contains(dir,"cifar-100") ? 1 : 0
+    d = occursin("cifar-100",dir) ? 1 : 0
     a = reshape(a, (3073+d, div(length(a),3073+d)))
-    y = a[1+d,:] + 0x1 # first row (second for cifar100) is Int8 index representation of correct answers
+    y = a[1+d,:] .+ 0x1 # first row (second for cifar100) is Int8 index representation of correct answers
     x = a[2+d:end,:] # rows 2:end (3:end for cifar100) give 32,32,3,N images
     # y = full(sparse(y,1:length(y),1f0,10,length(y))) # one-hot vector representation
     # maybe convert y to int?

diff --git a/data/fashion-mnist.jl b/data/fashion-mnist.jl
@@ -1,14 +1,10 @@
-for p in ("GZip",)
-    Pkg.installed(p) == nothing && Pkg.add(p)
-end
-
-using GZip
+using GZip, Knet
 
 "Where to download fmnist from"
 fmnisturl = "https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion"
 
 "Where to download fmnist to"
-fmnistdir = Pkg.dir("Knet","data","fashion-mnist")
+fmnistdir = Knet.dir("data","fashion-mnist")
 
 """
 
@@ -36,8 +32,8 @@ descriptions are shown below.
 
 function fmnist()
     global _fmnist_xtrn,_fmnist_ytrn,_fmnist_xtst,_fmnist_ytst,_fmnist_lbls
-    if !isdefined(:_fmnist_xtrn)
-        info("Loading FMNIST...")
+    if !(@isdefined _fmnist_xtrn)
+        @info("Loading FMNIST...")
         _fmnist_xtrn = _fmnist_xdata("train-images-idx3-ubyte.gz")
         _fmnist_xtst = _fmnist_xdata("t10k-images-idx3-ubyte.gz")
         _fmnist_ytrn = _fmnist_ydata("train-labels-idx1-ubyte.gz")
@@ -56,7 +52,7 @@ function _fmnist_xdata(file)
 end
 
 function _fmnist_ydata(file)
-    _fmnist_gzload(file)[9:end] + 0x1
+    _fmnist_gzload(file)[9:end] .+ 0x1
 end
 
 function _fmnist_gzload(file)

diff --git a/data/gutenberg.jl b/data/gutenberg.jl
@@ -1,11 +1,11 @@
-using Random
+using Random, Knet
 
 "Where to download gutenberg from"
 gutenbergurl = "http://www.gutenberg.org/files"
 
 
 "Where to download gutenberg to"
-gutenbergdir = joinpath(dirname(pathof(Knet)), "..", "data", "gutenberg")
+gutenbergdir = Knet.dir("data", "gutenberg")
 
 
 "Download text from Project Gutenberg and return contents as String."

diff --git a/data/housing.jl b/data/housing.jl
@@ -1,3 +1,5 @@
+using DelimitedFiles, Statistics, Knet
+
 """
 
     housing([test]; [url, file])
@@ -18,18 +20,18 @@ deviation.
 
 """
 function housing(test=0.0;
-                 file=Pkg.dir("Knet","data","housing","housing.data"),
+                 file=Knet.dir("data", "housing", "housing.data"),
                  url="https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")
     if !isfile(file)
         isdir(dirname(file)) || mkpath(dirname(file))
-        info("Downloading $url to $file")
+        @info("Downloading $url to $file")
         download(url, file)
     end
     data = readdlm(file)'
     # @show size(data) # (14,506)
     x = data[1:13,:]
     y = data[14:14,:]
-    x = (x .- mean(x,2)) ./ std(x,2) # Data normalization
+    x = (x .- mean(x,dims=2)) ./ std(x,dims=2) # Data normalization
     if test == 0
         xtrn = xtst = x
         ytrn = ytst = y

diff --git a/data/imagenet.jl b/data/imagenet.jl
@@ -1,7 +1,3 @@
-for p in ("MAT","Images")
-    Pkg.installed(p) == nothing && Pkg.add(p)
-end
-
 using MAT,Images
 
 _mcnurl = "http://www.vlfeat.org/matconvnet/models"

diff --git a/data/imdb.jl b/data/imdb.jl
@@ -2,11 +2,8 @@
 # Also see https://github.com/fchollet/keras/raw/master/examples/imdb_lstm.py
 # Also see https://github.com/ilkarman/DeepLearningFrameworks/raw/master/common/utils.py
 
-for p in ("PyCall","JSON","JLD")
-    Pkg.installed(p) == nothing && Pkg.add(p)
-end
-
-using PyCall,JSON,JLD
+using PyCall,JSON,JLD2,Random,Knet
+@pyimport numpy as np
 
 """
 
@@ -28,37 +25,36 @@ https://keras.io/datasets and return (xtrn,ytrn,xtst,ytst,dict) tuple.
 """
 function imdb(;
               url = "https://s3.amazonaws.com/text-datasets",
-              dir = Pkg.dir("Knet","data","imdb"),
+              dir = Knet.dir("data", "imdb"),
               data="imdb.npz",
               dict="imdb_word_index.json",
-              jld="imdb.jld",
+              jld2="imdb.jld2",
               maxval=nothing,
               maxlen=nothing,
               seed=0, oov=true, stoken=true, pad=true
               )
     global _imdb_xtrn,_imdb_ytrn,_imdb_xtst,_imdb_ytst,_imdb_dict
-    if !isdefined(:_imdb_xtrn)
+    if !(@isdefined _imdb_xtrn)
         isdir(dir) || mkpath(dir)
-        jldpath = joinpath(dir,jld)
-        if !isfile(jldpath)
-            info("Downloading IMDB...")
+        jld2path = joinpath(dir,jld2)
+        if !isfile(jld2path)
+            @info("Downloading IMDB...")
             datapath = joinpath(dir,data)
             dictpath = joinpath(dir,dict)
             isfile(datapath) || download("$url/$data",datapath)
             isfile(dictpath) || download("$url/$dict",dictpath)
-            @pyimport numpy as np
             d = np.load(datapath)
             _imdb_xtrn = map(a->np.asarray(a,dtype=np.int32), get(d, "x_train"))
             _imdb_ytrn = Array{Int8}(get(d, "y_train") .+ 1)
             _imdb_xtst = map(a->np.asarray(a,dtype=np.int32), get(d, "x_test"))
             _imdb_ytst = Array{Int8}(get(d, "y_test") .+ 1)
             _imdb_dict = Dict{String,Int32}(JSON.parsefile(dictpath))
-            JLD.@save jldpath _imdb_xtrn _imdb_ytrn _imdb_xtst _imdb_ytst _imdb_dict
+            JLD2.@save jld2path _imdb_xtrn _imdb_ytrn _imdb_xtst _imdb_ytst _imdb_dict
             #rm(datapath)
             #rm(dictpath)
         end
-        info("Loading IMDB...")
-        JLD.@load jldpath _imdb_xtrn _imdb_ytrn _imdb_xtst _imdb_ytst _imdb_dict
+        @info("Loading IMDB...")
+        JLD2.@load jld2path _imdb_xtrn _imdb_ytrn _imdb_xtst _imdb_ytst _imdb_dict
     end
     if seed != 0; srand(seed); end
     xs = [_imdb_xtrn;_imdb_xtst]
@@ -85,7 +81,7 @@ function imdb(;
                 xi = xi[end-maxlen+1:end]
             end
             if pad && length(xi) < maxlen
-                xi = append!(repmat([pad_token], maxlen-length(xi)), xi)
+                xi = append!(repeat([pad_token], maxlen-length(xi)), xi)
             end
             newx[i] = xi
         end

diff --git a/data/mikolovptb.jl b/data/mikolovptb.jl
@@ -1,5 +1,6 @@
+using Knet
 mikolovptburl = "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz"
-mikolovptbdir = Pkg.dir("Knet","data","mikolovptb")
+mikolovptbdir = Knet.dir("data","mikolovptb")
 mikolovptbtrn = "ptb.train.txt"
 mikolovptbdev = "ptb.valid.txt"
 mikolovptbtst = "ptb.test.txt"
@@ -21,10 +22,10 @@ been lowercased and reduced to a 10K vocabulary size.  Return a tuple
 """
 function mikolovptb()
     global _mptb_trn, _mptb_dev, _mptb_tst, _mptb_vocab
-    if !isdefined(:_mptb_trn)
+    if !(@isdefined _mptb_trn)
         isdir(mikolovptbdir) || mkpath(mikolovptbdir)
         if !isfile(joinpath(mikolovptbdir, "ptb.train.txt"))
-            info("Downloading $mikolovptburl")
+            @info("Downloading $mikolovptburl")
             tgz = download(mikolovptburl)
             run(`tar --strip-components 3 -C $mikolovptbdir -xzf $tgz ./simple-examples/data/ptb.train.txt ./simple-examples/data/ptb.valid.txt ./simple-examples/data/ptb.test.txt`)
         end
@@ -43,7 +44,7 @@ function mikolovptb()
             push!(data, sentences)
         end
         _mptb_trn, _mptb_dev, _mptb_tst = data
-        _mptb_vocab = Array{String}(length(dict))
+        _mptb_vocab = Array{String}(undef, length(dict))
         for (k,v) in dict; _mptb_vocab[v] = k; end
     end
     return _mptb_trn, _mptb_dev, _mptb_tst, _mptb_vocab

diff --git a/data/treebank.jl b/data/treebank.jl
@@ -1,14 +1,10 @@
-for p in ("ZipFile",)
-    Pkg.installed(p) == nothing && Pkg.add(p)
-end
-
-using ZipFile
+using ZipFile, Knet
 
 "Where to download dataset from"
 const TREEBANK_URL = "https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip"
 
 "Where to download dataset to"
-const TREEBANK_DIR = Pkg.dir("Knet","data","treebank")
+const TREEBANK_DIR = Knet.dir("data","treebank")
 
 const TREEBANK_ZIPNAME = "trainDevTestTrees_PTB.zip"
 const TREEBANK_SPLITS = ("train", "dev", "test")
@@ -39,7 +35,6 @@ make_data!(trn, w2i, l2i)
 ```
 
 """
-
 mutable struct SentimentTree
     label
     children

diff --git a/data/wikiner.jl b/data/wikiner.jl
@@ -1,6 +1,7 @@
+using Knet
 const WIKINER_DOWNLOAD_PREFIX =
     "https://github.com/neulab/dynet-benchmark/raw/master/data/tags/"
-const WIKINER_DIR = Pkg.dir("Knet","data","wikiner")
+const WIKINER_DIR = Knet.dir("data","wikiner")
 const WIKINER_FILES = ("train.txt","dev.txt")
 const UNK = "_UNK_"
 const PAD = "<*>"
@@ -35,7 +36,6 @@ data = WikiNERData()
 ```
 
 """
-
 mutable struct WikiNERData
     trn
     dev
@@ -100,7 +100,7 @@ let
     end
 
     function parse_line(line)
-        return map(x->split(x,"|"), split(replace(line,"\n",""), " "))
+        return map(x->split(x,"|"), split(replace(line,"\n"=>""), " "))
     end
 
     function get_words_tags_chars(trn)
@@ -109,15 +109,15 @@ let
             for (word,tag) in sample
                 push!(words, word)
                 push!(tags, tag)
-                push!(chars, convert(Array{UInt8,1}, word)...)
+                push!(chars, convert(Array{UInt8,1}, codeunits(word))...)
             end
         end
         push!(chars, PAD)
         return words, tags, chars
     end
 
     function filter_words(wordcounts,minoccur)
-        filtered_words = filter((w,c)-> c >= minoccur, wordcounts)
+        filtered_words = filter(x-> x[2] >= minoccur, wordcounts)
         filtered_words = collect(keys(filtered_words))
         !in(UNK, filtered_words) && push!(filtered_words, UNK)
         return filtered_words